Please wait until the page is fully downloaded and then press the "Expand" button or the blue line numbers.

0030001 /*
0030002 icmp.c
0030003 
0030004 Copyright 1995 Philip Homburg
0030005 */
Unlike the ethernet, ip, udp, and tcp code, there are no file descriptors in the icmp code. Instead of user processes directly sending icmp packets, functions within the ip code send icmp packets. For example, icmp_snd_time_exceeded() is called by the ip code to send an ICMP_TYPE_TIME_EXCEEDED icmp message if some fragments of the original packet were not received in time to be reassembled by the destination system.

Furthermore, if an icmp packet is received, a user process does not receive the packet (unlike, for example, udp packets). For example, if an ICMP_TYPE_ROUTER_ADVER icmp packet is received, the packet is not passed on to a user process. Instead, the icmp router advertisement packet is used to modify the routing table.


0030006 
0030007 #include "inet.h"
0030008 #include "buf.h"
0030009 #include "event.h"
0030010 #include "type.h"
0030011 
0030012 #include "assert.h"
0030013 #include "icmp.h"
0030014 #include "icmp_lib.h"
0030015 #include "io.h"
0030016 #include "ip.h"
0030017 #include "ip_int.h"
0030018 #include "ipr.h"
0030019 
0030020 THIS_FILE
0030021 
0030022 typedef struct icmp_port
0030023 {
0030024          int icp_flags;
0030025          int icp_state;
0030026          int icp_ipport;
0030027          int icp_ipfd;
0030028          acc_t *icp_head_queue;
0030029          acc_t *icp_tail_queue;
0030030          acc_t *icp_write_pack;
0030031 } icmp_port_t;
icmp_port

There is one icmp port for each interface. For the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be 2 icmp ports, one for the ethernet interface and one for the psip interface.

Note that the icmp layer does not have a concept of a file descriptor.

typedef struct icmp_port

{
int icp_flags;
int icp_state;
int icp_ipport;
int icp_ipfd;
acc_t *icp_head_queue;
acc_t *icp_tail_queue;
acc_t *icp_write_pack;
} icmp_port_t;
int icp_flags:

icp_flags will be of the following:

#define ICPF_EMPTY 0x0
#define ICPF_SUSPEND 0x1
#define ICPF_READ_IP 0x2
#define ICPF_READ_SP 0x4
#define ICPF_WRITE_IP 0x8
#define ICPF_WRITE_SP 0x10

The flags above are self-explanatory.


int icp_state:

icp_state will be one of the following:

#define ICPS_BEGIN 0
#define ICPS_IPOPT 1
#define ICPS_MAIN 2
#define ICPS_ERROR 3

After the network service has been initialized and configured, the state of the icmp port is ICPS_MAIN and does not leave this state unless there is an error or the icmp port is reconfigured.


int icp_ipport:

The icmp port's associated ip port.
int icp_ipfd: The icmp port's associated ip file descriptor.


acc_t *icp_head_queue, *icp_tail_queue, *icp_write_pack:

icp_head_queue/icp_tail_queue is the head and tail of the write queue for the icmp port. Immediately before ip_write() is called, the head of the write queue is placed in icp_write_pack field and then the ip code attempts to send out the icmp packet in this field.


0030032 
0030033 #define ICPF_EMPTY       0x0
0030034 #define ICPF_SUSPEND       0x1
0030035 #define ICPF_READ_IP       0x2
0030036 #define ICPF_READ_SP       0x4
0030037 #define ICPF_WRITE_IP       0x8
0030038 #define ICPF_WRITE_SP       0x10
0030039 
0030040 #define ICPS_BEGIN       0
0030041 #define ICPS_IPOPT       1
0030042 #define ICPS_MAIN       2
0030043 #define ICPS_ERROR       3
0030044 
0030045 PRIVATE icmp_port_t *icmp_port_table;
0030046 
0030047 FORWARD void icmp_main ARGS(( icmp_port_t *icmp_port ));
0030048 FORWARD acc_t *icmp_getdata ARGS(( int port, size_t offset,
0030049          size_t count, int for_ioctl ));
0030050 FORWARD int icmp_putdata ARGS(( int port, size_t offset,
0030051          acc_t *data, int for_ioctl ));
0030052 FORWARD void icmp_read ARGS(( icmp_port_t *icmp_port ));
0030053 FORWARD void process_data ARGS(( icmp_port_t *icmp_port,
0030054          acc_t *data ));
0030055 FORWARD u16_t icmp_pack_oneCsum ARGS(( acc_t *ip_pack ));
0030056 FORWARD void icmp_echo_request ARGS(( icmp_port_t *icmp_port,
0030057          acc_t *ip_pack, int ip_hdr_len, ip_hdr_t *ip_hdr,
0030058          acc_t *icmp_pack, int icmp_len, icmp_hdr_t *icmp_hdr ));
0030059 FORWARD void icmp_dst_unreach ARGS(( icmp_port_t *icmp_port,
0030060          acc_t *ip_pack, int ip_hdr_len, ip_hdr_t *ip_hdr,
0030061          acc_t *icmp_pack, int icmp_len, icmp_hdr_t *icmp_hdr ));
0030062 FORWARD void icmp_time_exceeded ARGS(( icmp_port_t *icmp_port,
0030063          acc_t *ip_pack, int ip_hdr_len, ip_hdr_t *ip_hdr,
0030064          acc_t *icmp_pack, int icmp_len, icmp_hdr_t *icmp_hdr ));
0030065 FORWARD void icmp_router_advertisement ARGS(( icmp_port_t *icmp_port,
0030066          acc_t *icmp_pack, int icmp_len, icmp_hdr_t *icmp_hdr ));
0030067 FORWARD void icmp_redirect ARGS(( icmp_port_t *icmp_port,
0030068          ip_hdr_t *ip_hdr, acc_t *icmp_pack, int icmp_len,
0030069          icmp_hdr_t *icmp_hdr ));
0030070 FORWARD acc_t *make_repl_ip ARGS(( ip_hdr_t *ip_hdr,
0030071          int ip_len ));
0030072 FORWARD void enqueue_pack ARGS(( icmp_port_t *icmp_port,
0030073          acc_t *reply_ip_hdr ));
0030074 FORWARD void icmp_write ARGS(( icmp_port_t *icmp_port ));
0030075 FORWARD void icmp_buffree ARGS(( int priority ));
0030076 FORWARD acc_t *icmp_err_pack ARGS(( acc_t *pack, icmp_hdr_t **icmp_hdr ));
0030077 #ifdef BUF_CONSISTENCY_CHECK
0030078 FORWARD void icmp_bufcheck ARGS(( void ));
0030079 #endif
0030080 
0030081 PUBLIC void icmp_prep()
icmp_prep()

icmp_prep() simply allocates space for icmp_port_table[].


0030082 {
0030083          icmp_port_table= alloc(ip_conf_nr * sizeof(icmp_port_table[0]));
0030084 }
0030085 
0030086 PUBLIC void icmp_init()
icmp_init()

icmp_init() initializes icmp_port_table[] by setting a few fields for each icmp port and then (again for each icmp port) calls icmp_main() to complete the initialization.


0030087 {
0030088          int i;
0030089          icmp_port_t *icmp_port;
icmp_port

There is one icmp port for each interface. For the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be 2 icmp ports, one for the ethernet interface and one for the psip interface.

Note that the icmp layer does not have a concept of a file descriptor.

typedef struct icmp_port

{
int icp_flags;
int icp_state;
int icp_ipport;
int icp_ipfd;
acc_t *icp_head_queue;
acc_t *icp_tail_queue;
acc_t *icp_write_pack;
} icmp_port_t;
int icp_flags:

icp_flags will be of the following:

#define ICPF_EMPTY 0x0
#define ICPF_SUSPEND 0x1
#define ICPF_READ_IP 0x2
#define ICPF_READ_SP 0x4
#define ICPF_WRITE_IP 0x8
#define ICPF_WRITE_SP 0x10

The flags above are self-explanatory.


int icp_state:

icp_state will be one of the following:

#define ICPS_BEGIN 0
#define ICPS_IPOPT 1
#define ICPS_MAIN 2
#define ICPS_ERROR 3

After the network service has been initialized and configured, the state of the icmp port is ICPS_MAIN and does not leave this state unless there is an error or the icmp port is reconfigured.


int icp_ipport:

The icmp port's associated ip port.
int icp_ipfd: The icmp port's associated ip file descriptor.


acc_t *icp_head_queue, *icp_tail_queue, *icp_write_pack:

icp_head_queue/icp_tail_queue is the head and tail of the write queue for the icmp port. Immediately before ip_write() is called, the head of the write queue is placed in icp_write_pack field and then the ip code attempts to send out the icmp packet in this field.


0030090 
0030091          assert (BUF_S >= sizeof (nwio_ipopt_t));
0030092 
0030093          for (i= 0, icmp_port= icmp_port_table; i<ip_conf_nr; i++, icmp_port++)
Initialize each of the icmp ports. For the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be 2 icmp ports, one for the ethernet interface and one for the psip interface.


0030094          {
0030095 #if ZERO
0030096                   icmp_port->icp_flags= ICPF_EMPTY;
0030097                   icmp_port->icp_state= ICPS_BEGIN;
0030098 #endif
0030099                   icmp_port->icp_ipport= i;
0030100          }
0030101 
0030102 #ifndef BUF_CONSISTENCY_CHECK
0030103          bf_logon(icmp_buffree);
bf_logon()

bf_logon() is used by eth_init(), psip_init(), ip_init(), icmp_init(), tcp_init(), and udp_init() to register their functions for freeing buffers. For example, eth_init() calls bf_logon() with an argument of eth_buffree().

After bf_logon() is finished, freereq[] is configured as follows:

freereq[0]=eth_buffree
freereq[1]=psip_buffree
freereq[2]=ip_buffree
freereq[3]=icmp_buffree
freereq[4]=tcp_buffree
freereq[5]=udp_buffree



0030104 #else
0030105          bf_logon(icmp_buffree, icmp_bufcheck);
0030106 #endif
0030107 
0030108          for (i= 0, icmp_port= icmp_port_table; i<ip_conf_nr; i++, icmp_port++)
0030109          {
0030110                   icmp_main (icmp_port);
icmp_main()

icmp_main() is called during the initialization of the network service to initialize each of the icmp ports on a system. During the initialization of an icmp port, ip_open() is called to acquire an ip file descriptor. ip_ioctl() is then called to initialize this newly-opened ip file descriptor. Finally, icmp_read() is called to read any packets that have arrived at the ip file descriptor.

icmp_main() is called later by icmp_getdata() if, for whatever reason, the initialization steps were not able to complete.


0030111          }
0030112 }
0030113 
0030114 PRIVATE void icmp_main(icmp_port)
0030115 icmp_port_t *icmp_port;
icmp_main()

icmp_main() is called during the initialization of the network service to initialize each of the icmp ports on a system. During the initialization of an icmp port, ip_open() is called to acquire an ip file descriptor. ip_ioctl() is then called to initialize this newly-opened ip file descriptor. Finally, icmp_read() is called to read any packets that have arrived at the ip file descriptor.

icmp_main() is called later by icmp_getdata() if, for whatever reason, the initialization steps were not able to complete.


0030116 {
0030117          int result;
0030118          switch (icmp_port->icp_state)
0030119          {
0030120          case ICPS_BEGIN:
0030121                   icmp_port->icp_head_queue= 0;
icp_head_queue is the head of the queue of icmp packets that are waiting to be delivered to their destination (i.e., that have been written).


0030122                   icmp_port->icp_ipfd= ip_open (icmp_port->icp_ipport,
0030123                            icmp_port->icp_ipport, icmp_getdata, icmp_putdata, 0);
ip_open()

ip_open() finds an available ip file descriptor in ip_fd_table[], sets a few of the ip file descriptor's fields, and then returns the index of the ip file descriptor within ip_fd_table[]. ip_open() is called by higher-level code (e.g., udp_main()) and the returned ip file descriptor is then associated with a higher-level port (e.g., udp port).



Note that there will only be a few ip file descriptors open at any given time. There will be an ip file descriptor opened for each interface for each client (udp, tcp, and icmp) and there will be one ip file descriptor opened each time the /dev/ip file is opened directly (as opposed to when, for example, the /dev/udp file is opened).


0030124                   if (icmp_port->icp_ipfd<0)
0030125                   {
0030126                            DBLOCK(1, printf("unable to open ip_port %d\n",
0030127                                     icmp_port->icp_ipport));
0030128                            break;
0030129                   }
0030130                   icmp_port->icp_state= ICPS_IPOPT;
0030131                   icmp_port->icp_flags &= ~ICPF_SUSPEND;
0030132                   result= ip_ioctl (icmp_port->icp_ipfd, NWIOSIPOPT);
ip_ioctl()

ip_ioctl(fd, req) performs one of several tasks on the ip file descriptor whose index within ip_fd_table[] is fd, the first parameter. The task performed depends on req, the second parameter.

NWIOSIPOPT: Set the options (the if_ipopt field of the ip file descriptor) on the ip file descriptor. For example, during the initialization of a physical udp port, ip_ioctl() is called with req equal to NWIOSIPOPT.

An example of an ip iption (i.e., ip flag) is the NWIO_EN_BROAD flag. This flag is set if the ip file descriptor accepts broadcast packets. The options desired are obtained from the user process. For example, if a udp port opened up the ip file descriptor, udp_get_data() is (indirectly) called to obtain the configuration data.

NWIOGIPOPT: Send the ip file descriptor's options to the user process requesting the information. The information is sent in a struct of type nwio_ipopt_t.

NWIOSIPCONF: Configure the ip port (for example, the ip address can be configured) that corresponds to the ip file descriptor fd. The fields are obtained from the user process. For a detailed description of the different settings, click here.

NWIOGIPCONF: Send the ip address/subnet information (i.e., send a nwio_ipconf_t struct) to the next higher layer. For example, if the next higher layer is udp, ip_ioctl() calls (indirectly) udp_put_data(), which sets the ip address for the udp port (i.e., sets the up_ipaddr field of the corresponding element in udp_port_table[]).

NWIOGIPIROUTE, NWIOSIPIROUTE, NWIOGIPOROUTE, NWIODIPIROUTE, NWIOSIPOROUTE: It is possible to influence the route taken by a packet. These ioctl requests alter the input and output routing tables.


0030133                   if (result == NW_SUSPEND)
ip_ioctl() will never return NW_SUSPEND for this call.


0030134                   {
0030135                            icmp_port->icp_flags |= ICPF_SUSPEND;
0030136                            break;
0030137                   }
0030138                   assert(result == NW_OK);
0030139 
0030140                   /* falls through */
0030141          case ICPS_IPOPT:
0030142                   icmp_port->icp_state= ICPS_MAIN;
0030143                   icmp_port->icp_flags &= ~ICPF_SUSPEND;
0030144                   icmp_read(icmp_port);
icmp_read()

icmp_read() repeatedly calls ip_read() until all the valid packets in the read queue of the icmp port's associated ip file descriptor have been processed.

icmp_read() is called during initialization and is also called after an icmp packet has been processed.


0030145                   break;
0030146          default:
0030147                   DBLOCK(1, printf("unknown state %d\n",
0030148                            icmp_port->icp_state));
0030149                   break;
0030150          }
0030151 }
0030152 
0030153 PRIVATE acc_t *icmp_getdata(port, offset, count, for_ioctl)
0030154 int port;
0030155 size_t offset, count;
0030156 int for_ioctl;
icmp_getdata()

During the initialization of the network service, the icmp code calls ip_open() to acquire an ip file descriptor. The third and fourth arguments to ip_open() are pointers to icmp_getdata() and icmp_putdata(). These two values set the if_get_userdata and if_put_userdata fields of the ip file descriptor.

icmp_getdata() is (indirectly) called by ip_ioctl() to configure the ip file descriptor that the icmp code opened and is called by ip_write() to assist in sending out the next icmp packet (e.g., an icmp echo request packet) in the queue of icmp packets waiting to be sent out.


0030157 {
0030158          icmp_port_t *icmp_port;
0030159          nwio_ipopt_t *ipopt;
0030160          acc_t *data;
0030161          int result;
0030162 
0030163          icmp_port= &icmp_port_table[port];
0030164 
0030165          if (icmp_port->icp_flags & ICPF_WRITE_IP)
If the icmp code attempts to send out an icmp packet and everything goes well, icmp_getdata() will be called with a non-zero value of count. In this case, bf_cut() is called to trim the icmp packet to size and then return the newly resized packet. If something goes wrong with the write operation, error_reply() calls icmp_getdata() with a value of zero (0) for count.


0030166          {
0030167                   if (!count)
If called from error_reply(), ip_write() (which called error_reply()) had problems sending out the icmp packet. Therefore, clear out the icp_write_pack field (which contains the problem icmp packet) and, if other icmp packets were suspended, call icmp_write() in an attempt to send these packets out.


0030168                   {
0030169                            bf_afree(icmp_port->icp_write_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030170                            icmp_port->icp_write_pack= 0;
0030171 
0030172                            result= (int)offset;
0030173                            if (result<0)
0030174                            {
0030175                                     DBLOCK(1, printf("got write error %d\n",
0030176                                              result));
0030177                            }
0030178                            if (icmp_port->icp_flags & ICPF_WRITE_SP)
The ICPF_WRITE_SP flag is never set (see line 30615) so this block is never executed.


0030179                            {
0030180                                     icmp_port->icp_flags &=
0030181                                              ~(ICPF_WRITE_IP|ICPF_WRITE_SP);
0030182                                     icmp_write (icmp_port);
icmp_write()

icmp_write() loops through an icmp port's write queue, each time placing the head of the queue into the icp_write_pack field of the icmp port and then calling ip_write() to send this icmp packet out.

After functions build an icmp packet, they call enqueue_pack() to send the icmp packet out rather than call icmp_write() directly. enqueue_pack() enqueues the icmp packet if another icmp packet is currently being sent out.

It is doubtful that the write queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030183                            }
0030184                            return NW_OK;
0030185                   }
0030186                   return bf_cut(icmp_port->icp_write_pack, offset, count);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030187          }
0030188          switch (icmp_port->icp_state)
If the icmp port is being configured, its state (icp_state) will be ICPS_IPOPT.


0030189          {
0030190          case ICPS_IPOPT:
0030191                   if (!count)
If the flags on lines 30209-30212 are unacceptable, icmp_getdata() will be called by reply_thr_get() (which was called by ip_ioctl()) and, therefore, count will be zero. However, since the network service would be unusable if the options were unacceptable, the options are obviously acceptable and so count will never be zero.


0030192                   {
0030193                            result= (int)offset;
0030194                            assert(result == NW_OK);
0030195                            if (result < 0)
0030196                            {
0030197                                     icmp_port->icp_state= ICPS_ERROR;
0030198                                     break;
0030199                            }
0030200                            if (icmp_port->icp_flags & ICPF_SUSPEND)
0030201                                     icmp_main(icmp_port);
icmp_main()

icmp_main() is called during the initialization of the network service to initialize each of the icmp ports on a system. During the initialization of an icmp port, ip_open() is called to acquire an ip file descriptor. ip_ioctl() is then called to initialize this newly-opened ip file descriptor. Finally, icmp_read() is called to read any packets that have arrived at the ip file descriptor.

icmp_main() is called later by icmp_getdata() if, for whatever reason, the initialization steps were not able to complete.


0030202                            return NW_OK;
0030203                   }
0030204 
If icmp_getdata() is (indirectly) called by ip_ioctl() to set the options for an icmp port's associated ip file descriptor. Allocate an nwio_ipopt struct, set the struct's nwio_flags and nwio_proto fields to values appropriate for icmp, and return the struct to ip_ioctl() so that ip_ioctl() can use the fields to configure the ip file descriptor.


0030205 assert (count == sizeof (*ipopt));
0030206                   data= bf_memreq (sizeof (*ipopt));
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0030207 assert (data->acc_length == sizeof(*ipopt));
0030208                   ipopt= (nwio_ipopt_t *)ptr2acc_data(data);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030209                   ipopt->nwio_flags= NWIO_COPY | NWIO_EN_LOC |
0030210                            NWIO_EN_BROAD |
0030211                            NWIO_REMANY | NWIO_PROTOSPEC |
0030212                            NWIO_HDR_O_ANY | NWIO_RWDATALL;
0030213                   ipopt->nwio_proto= IPPROTO_ICMP;
Configure the ip file descriptor with options appropriate for icmp.


0030214                   return data;
0030215          default:
0030216                   DBLOCK(1, printf("unknown state %d\n",
0030217                            icmp_port->icp_state));
0030218                   return 0;
0030219          }
0030220 }
0030221 
0030222 PRIVATE int icmp_putdata(port, offset, data, for_ioctl)
0030223 int port;
0030224 size_t offset;
0030225 acc_t *data;
0030226 int for_ioctl;
icmp_putdata()

During the initialization of the network service, the icmp code calls ip_open() to acquire an ip file descriptor. The third and fourth arguments to the ip_open() call are pointers to icmp_getdata() and icmp_putdata(). These two values set the if_get_userdata and if_put_userdata fields of the ip file descriptor.

The two most significant places that icmp_getdata() is called are in packet2user(). The first time that packet2user() calls icmp_getdata(), icmp_getdata() simply calls process_data() to process the icmp packet that the system received and that packet2user() was processing. The second time that packet2user() calls icmp_getdata(), icmp_getdata() calls icmp_read() to retry any previously suspended icmp read operations.


0030227 {
0030228          icmp_port_t *icmp_port;
0030229          int result;
0030230 
0030231          icmp_port= &icmp_port_table[port];
0030232 
0030233          if (icmp_port->icp_flags & ICPF_READ_IP)
0030234          {
0030235 assert (!for_ioctl);
0030236                   if (!data)
0030237                   {
0030238                            result= (int)offset;
0030239                            if (result<0)
0030240                            {
0030241                                     DBLOCK(1, printf("got read error %d\n",
0030242                                              result));
0030243                            }
0030244                            if (icmp_port->icp_flags & ICPF_READ_SP)
Attempt to continue previously suspended icmp read operations.


0030245                            {
0030246                                     icmp_port->icp_flags &=
0030247                                              ~(ICPF_READ_IP|ICPF_READ_SP);
0030248                                     icmp_read (icmp_port);
icmp_read()

icmp_read() repeatedly calls ip_read() until all the valid packets in the read queue of the icmp port's associated ip file descriptor have been processed.

icmp_read() is called during initialization and is also called after an icmp packet has been processed.


0030249                            }
0030250                            return NW_OK;
0030251                   }
0030252                   process_data(icmp_port, data);
process_data() / icmp

process_data() is called by icmp_putdata() to handle received icmp packets. The icmp packet is handed off by process_data() to the function responsible for the icmp packet's type (e.g., icmp_dst_unreach() handles icmp packets of type ICMP_TYPE_DST_UNRCH).


0030253                   return NW_OK;
0030254          }
0030255          switch (icmp_port->icp_state)
0030256          {
0030257          default:
0030258                   DBLOCK(1, printf("unknown state %d\n",
0030259                            icmp_port->icp_state));
0030260                   return 0;
0030261          }
0030262 }
0030263 
0030264 PRIVATE void icmp_read(icmp_port)
0030265 icmp_port_t *icmp_port;
icmp_read()

icmp_read() repeatedly calls ip_read() until all the valid packets in the read queue of the icmp port's associated ip file descriptor have been processed.

icmp_read() is called during initialization and is also called after an icmp packet has been processed.


0030266 {
0030267          int result;
0030268 
0030269 assert (!(icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP) ||
0030270          (icmp_port->icp_flags & (ICPF_READ_IP|ICPF_READ_SP)) ==
0030271          (ICPF_READ_IP|ICPF_READ_SP)));
0030272 
0030273          for (;;)
0030274          {
0030275                   icmp_port->icp_flags |= ICPF_READ_IP;
0030276                   icmp_port->icp_flags &= ~ICPF_READ_SP;
0030277 
0030278                   result= ip_read(icmp_port->icp_ipfd, ICMP_MAX_DATAGRAM);
ip_read()

If there are unexpired packets in the ip file descriptor fd's read queue, ip_read(fd, count) passes count (ip_read()'s second parameter) bytes off to the next-higher layer by calling packet2user(). If the packets in the file descriptor's read queue have expired, ip_read() discards the packets.

ip_read() is (indirectly) called by sr_rwio() when a process reads an ip device file (e.g., /dev/ip).

In the udp code, ip_read() is called by read_ip_packets() during the initialization of the udp code. Normally, ip_read() is not called by the udp code after the initialization.


0030279                   if (result == NW_SUSPEND)
0030280                   {
0030281                            icmp_port->icp_flags |= ICPF_READ_SP;
Note that outside of icmp_read(), the ICPF_READ_SP flag will always be set.


0030282                            return;
0030283                   }
0030284          }
0030285 }
0030286 
0030287 PUBLIC void icmp_snd_time_exceeded(port_nr, pack, code)
0030288 int port_nr;
0030289 acc_t *pack;
0030290 int code;
icmp_snd_time_exceeded()

icmp_snd_time_exceeded() sends an ICMP_TYPE_TIME_EXCEEDED icmp message. ICMP_TYPE_TIME_EXCEEDED messages are sent when either a packet's TTL timer has expired or if some fragments of the original packet were not received in time to be reassembled by the destination system.


0030291 {
0030292          acc_t *icmp_acc;
0030293          icmp_hdr_t *icmp_hdr;
0030294          icmp_port_t *icmp_port;
0030295 
0030296          assert(0 <= port_nr && port_nr < ip_conf_nr);
0030297          icmp_port= &icmp_port_table[port_nr];
Find the icmp port whose index within icmp_port_table[] is port_nr.


0030298          pack= icmp_err_pack(pack, &icmp_hdr);
icmp_err_pack()

icmp_err_pack(pack, icmp_hdr) creates an icmp message and encapsulates the message in an ip header.

In order to do this, icmp_err_pack() first cuts out everything except the ip header and the first 8 bytes of data from the ip packet pack, icmp_err_pack()'s first parameter. Next, icmp_err_pack() zeroizes most of the fields (all fields except ih_chksum; the other fields must be set by the calling function) of the icmp header icmp_hdr, icmp_err_pack()'s second parameter, and appends the remains of the ip packet to the icmp header to create an icmp message. Finally, icmp_err_pack() creates an ip header and appends the newly created icmp message to this ip header.


0030299          if (pack == NULL)
0030300                   return;
0030301          icmp_hdr->ih_type= ICMP_TYPE_TIME_EXCEEDED;
Set the icmp message type to ICMP_TYPE_TIME_EXCEEDED. ICMP_TYPE_TIME_EXCEEDED messages are sent when either a packets's TTL timer has expired or if some fragments of the original packet were not received in time to be reassembled by the destination system.


0030302          icmp_hdr->ih_code= code;
code, icmp_snd_time_exceeded()'s third parameter, will be one of the following:

define ICMP_TTL_EXC 0
define ICMP_FRAG_REASSEM 1


0030303          icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
0030304                   (u16_t *)&icmp_hdr->ih_type, 2);
Recalculate the icmp header's checksum to reflect the new values of the ih_type and ih_code fields. Note that both ih_type and ih_code are 1 byte values and that the recalculation involves 2 bytes.


0030305          enqueue_pack(icmp_port, pack);
enqueue_pack()

enqueue_pack enqueues an outgoing icmp packet in an icmp port's write queue and then calls icmp_write() to send the packet out.

It is doubtful that the queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030306 }
0030307 
0030308 PUBLIC void icmp_snd_redirect(port_nr, pack, code, gw)
0030309 int port_nr;
0030310 acc_t *pack;
0030311 int code;
0030312 ipaddr_t gw;
icmp_snd_redirect()

icmp_snd_redirect(port_nr, pack, code) builds an icmp redirect packet (partially using the ip packet pack, icmp_snd_redirect()'s second parameter) and then places the icmp redirect packet in the outgoing queue.

The function first calls icmp_err_pack() to build a generic icmp packet, sets the ih_type field of the icmp header to ICMP_TYPE_REDIRECT and recalibrates the checksum (since the type, code, and gateway fields of the icmp header have changed) of the icmp header before placing the icmp packet in the icmp port's write queue.


0030313 {
0030314          acc_t *icmp_acc;
0030315          icmp_hdr_t *icmp_hdr;
0030316          icmp_port_t *icmp_port;
0030317 
0030318          assert(0 <= port_nr && port_nr < ip_conf_nr);
0030319          icmp_port= &icmp_port_table[port_nr];
0030320          pack= icmp_err_pack(pack, &icmp_hdr);
icmp_err_pack()

icmp_err_pack(pack, icmp_hdr) creates an icmp message and encapsulates the message in an ip header.

In order to do this, icmp_err_pack() first cuts out everything except the ip header and the first 8 bytes of data from the ip packet pack, icmp_err_pack()'s first parameter. Next, icmp_err_pack() zeroizes most of the fields (all fields except ih_chksum; the other fields must be set by the calling function) of the icmp header icmp_hdr, icmp_err_pack()'s second parameter, and appends the remains of the ip packet to the icmp header to create an icmp message. Finally, icmp_err_pack() creates an ip header and appends the newly created icmp message to this ip header.


0030321          if (pack == NULL)
0030322                   return;
0030323          icmp_hdr->ih_type= ICMP_TYPE_REDIRECT;
0030324          icmp_hdr->ih_code= code;
0030325          icmp_hdr->ih_hun.ihh_gateway= gw;
The sender should have sent the packet to the gateway gw, icmp_snd_redirect()'s third parameter, rather than this system.


0030326          icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
0030327                   (u16_t *)&icmp_hdr->ih_type, 2);
Recalculate the checksum for the icmp header using the new ih_type and ih_code values (which are 1 byte apiece).


oneC_sum()


A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030328          icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
0030329                   (u16_t *)&icmp_hdr->ih_hun.ihh_gateway, 4);
Recalculate the checksum for the icmp header using the new ihh_gateway value (which is 4 bytes).


oneC_sum()


A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030330          enqueue_pack(icmp_port, pack);
enqueue_pack()

enqueue_pack enqueues an outgoing icmp packet in an icmp port's write queue and then calls icmp_write() to send the packet out.

It is doubtful that the queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030331 }
0030332 
0030333 PUBLIC void icmp_snd_unreachable(port_nr, pack, code)
0030334 int port_nr;
0030335 acc_t *pack;
0030336 int code;
icmp_snd_unreachable()

icmp_snd_unreachable(port_nr, pack, code) builds an icmp unreachable packet (partially using the ip packet pack, icmp_snd_unreachable()'s second parameter) and then places the icmp unreachable packet in the outgoing queue. Icmp unreachable packets are sent if the network, host, or port number specified by the ip packet pack is unreachable.

The function first calls icmp_err_pack() to build a generic icmp packet, sets the ih_type field of the icmp header to ICMP_TYPE_DST_UNRCH and recalibrates the checksum (since the type and code fields of the icmp header have changed) of the icmp header before placing the packet in the icmp port's write queue.


0030337 {
0030338          acc_t *icmp_acc;
0030339          icmp_hdr_t *icmp_hdr;
0030340          icmp_port_t *icmp_port;
0030341 
0030342          assert(0 <= port_nr && port_nr < ip_conf_nr);
0030343          icmp_port= &icmp_port_table[port_nr];
Find the icmp port whose index within icmp_port_table[] is port_nr.


0030344          pack= icmp_err_pack(pack, &icmp_hdr);
icmp_err_pack()

icmp_err_pack(pack, icmp_hdr) creates an icmp message and encapsulates the message in an ip header.

In order to do this, icmp_err_pack() first cuts out everything except the ip header and the first 8 bytes of data from the ip packet pack, icmp_err_pack()'s first parameter. Next, icmp_err_pack() zeroizes most of the fields (all fields except ih_chksum; the other fields must be set by the calling function) of the icmp header icmp_hdr, icmp_err_pack()'s second parameter, and appends the remains of the ip packet to the icmp header to create an icmp message. Finally, icmp_err_pack() creates an ip header and appends the newly created icmp message to this ip header.


0030345          if (pack == NULL)
0030346                   return;
0030347          icmp_hdr->ih_type= ICMP_TYPE_DST_UNRCH;
0030348          icmp_hdr->ih_code= code;
0030349          icmp_hdr->ih_chksum= ~oneC_sum(~icmp_hdr->ih_chksum,
0030350                   (u16_t *)&icmp_hdr->ih_type, 2);
The checksum needs adjustment since both the ih_type and ih_code fields have changed. Note that both ih_type and ih_code are of type u8_t (i.e., they are 8 bits).


oneC_sum()


A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030351          enqueue_pack(icmp_port, pack);
enqueue_pack()

enqueue_pack enqueues an outgoing icmp packet in an icmp port's write queue and then calls icmp_write() to send the packet out.

It is doubtful that the queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030352 }
0030353 
0030354 PRIVATE void process_data(icmp_port, data)
0030355 icmp_port_t *icmp_port;
0030356 acc_t *data;
process_data() / icmp

process_data() is called by icmp_putdata() to handle received icmp packets. The icmp packet is handed off by process_data() to the function responsible for the icmp packet's type (e.g., icmp_dst_unreach() handles icmp packets of type ICMP_TYPE_DST_UNRCH).


0030357 {
0030358          ip_hdr_t *ip_hdr;
0030359          icmp_hdr_t *icmp_hdr;
0030360          acc_t *icmp_data;
0030361          int ip_hdr_len;
0030362          size_t pack_len;
0030363 
Lines 30365-30371 extract the size of the ip header and lines 30379-30388 eliminate this header from the packet so that the icmp header fields may be accessed.


0030364          /* Align entire packet */
0030365          data= bf_align(data, BUF_S, 4);
bf_align()

If data is not already packed and aligned, bf_align(acc, size, alignment) packs size (bf_align's second parameter) bytes from acc, bf_align()'s first parameter and a linked list of accessors (i.e., a packet), by calling bf_pack(). This packing is necessary to ensure that all of the fields from a header are easily accessed. For example, the ip code aligns a packet's header contained in the accessors before accessing the various ip header fields.

For a detailed description of the network service's buffer management, click here.


0030366 
0030367          data= bf_packIffLess(data, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030368          ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030369          DIFBLOCK(0x10, (ip_hdr->ih_dst & HTONL(0xf0000000)) == HTONL(0xe0000000),
0030370                   printf("got multicast packet\n"));
0030371          ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
The lower 4 bits of the ih_vers_ihl field is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


0030372 
0030373          if (ip_hdr_len>IP_MIN_HDR_SIZE)
If the header's size according to the ih_vers_ihl field is less than the minimum allowable size of an ip header, there's a problem.


0030374          {
0030375                   data= bf_packIffLess(data, ip_hdr_len);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030376                   ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030377          }
0030378 
0030379          pack_len= bf_bufsize(data);
bf_bufsize()

bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0030380          pack_len -= ip_hdr_len;
0030381          if (pack_len < ICMP_MIN_HDR_LEN)
If the size of the packet without the ip header is smaller than the minimum allowable size of an icmp message, there's a problem.


0030382          {
0030383                   DBLOCK(1, printf("got an incomplete icmp packet\n"));
0030384                   bf_afree(data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030385                   return;
0030386          }
0030387 
0030388          icmp_data= bf_cut(data, ip_hdr_len, pack_len);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030389 
0030390          icmp_data= bf_packIffLess (icmp_data, ICMP_MIN_HDR_LEN);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030391          icmp_hdr= (icmp_hdr_t *)ptr2acc_data(icmp_data);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030392 
0030393          if ((u16_t)~icmp_pack_oneCsum(icmp_data))
Verify that the checksum of the icmp message is correct.


icmp_pack_oneCsum()


icmp_pack_oneCsum() computes the checksum of an icmp message (icmp header plus the ip header of the problem ip packet plus the first 8 bytes of the problem packet). It accomplishes this by computing the checksum (by calling oneC_sum()) of each of the message's buffers.

Note that a checksum is used to determine if errors occurred during the transmission of data.

icmp_pack_oneCsum() is very similar to udp's pack_oneCsum(). The two functions should have been consolidated into one.


0030394          {
0030395                   DBLOCK(1, printf(
0030396                            "got packet with bad checksum (= 0x%x, 0x%x)\n",
0030397                            icmp_hdr->ih_chksum,
0030398                            (u16_t)~icmp_pack_oneCsum(icmp_data)));
0030399                   bf_afree(data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030400                   bf_afree(icmp_data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030401                   return;
0030402          }
0030403 
0030404          switch (icmp_hdr->ih_type)
0030405          {
0030406          case ICMP_TYPE_ECHO_REPL:
0030407                   break;
From freesoft.org:

"Ping is implemented using the required ICMP Echo function, documented in RFC 792 that all hosts should implement. Of course, administrators can disable ping messages (this is rarely a good idea, unless security considerations dictate that the host should be unreachable anyway), and some implementations have (gasp) even been known not to implement all required functions. However, ping is usually a better bet than almost any other network software."

It's somewhat surprising that the ping service is not implemented in the Minix network service.


0030408          case ICMP_TYPE_DST_UNRCH:
0030409                   icmp_dst_unreach (icmp_port, data, ip_hdr_len, ip_hdr,
0030410                            icmp_data, pack_len, icmp_hdr);
icmp_dst_unreach()

icmp_dest_unreach() is called by process_data() to handle either host-unreachable or a network-unreachable icmp packets that are received from routers.

If the icmp message is valid, ipr_destunch() is called to mark the appropriate entry in the output routing table as unreachable.


0030411                   break;
0030412          case ICMP_TYPE_SRC_QUENCH:
Icmp source quench messages are ignored, which may be the best policy for handling them (and certainly the easiest).


0030413                   /* Ignore src quench ICMPs */
0030414                   DBLOCK(2, printf("ignoring SRC QUENCH ICMP.\n"));
0030415                   break;
0030416          case ICMP_TYPE_REDIRECT:
0030417                   icmp_redirect (icmp_port, ip_hdr, icmp_data, pack_len,
0030418                            icmp_hdr);
icmp_redirect()

icmp_redirect() is called by process_data() to handle icmp redirect messages that are received. If the icmp redirect is properly constructed, ipr_redirect() is called to update the routing table for the destination host or network.


0030419                   break;
0030420          case ICMP_TYPE_ECHO_REQ:
0030421                   icmp_echo_request(icmp_port, data, ip_hdr_len, ip_hdr,
0030422                            icmp_data, pack_len, icmp_hdr);
icmp_echo_request()

icmp_echo_request() is called by process_data() to handle echo requests (pings) that have been received by the system.

If the icmp message is valid, an icmp echo reply packet is assembled and enqueued in the outgoing queue.


0030423                   return;
0030424          case ICMP_TYPE_ROUTER_ADVER:
0030425                   icmp_router_advertisement(icmp_port, icmp_data, pack_len,
0030426                            icmp_hdr);
icmp_router_advertisement()

icmp_router_advertisement() is called by process_data() to handle router advertisement messages.

If the icmp message is valid, ipr_add_oroute() is called to insert each of the routes into the output routing table.

A router advertisement message has the following format:

       0                   1                   2                   3

0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Type=9 | Code=0 | Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Num Addrs |Addr Entry Size| Lifetime |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
| . |
| . |



0030427                   break;
0030428          case ICMP_TYPE_ROUTE_SOL:
0030429                   break;       /* Should be handled by a routing deamon. */
A routing daemon (misspelling above) is a background process which is responsible for routing. Minix uses irdpd (Internet Router Discovery Protocol Daemon) to find routers on the network. For Linux and FreeBSD, the most common routing daemon is "routed", which uses the RIP routing protocol.


0030430          case ICMP_TYPE_TIME_EXCEEDED:
0030431                   icmp_time_exceeded (icmp_port, data, ip_hdr_len, ip_hdr,
0030432                            icmp_data, pack_len, icmp_hdr);
icmp_time_exceeded()

icmp_time_exceeded() is called by process_data() to handle time-exceeded icmp messages.

If the icmp message is valid, ipr_ttl_exc() is called to either increase the ttl of the appropriate entry in the output routing table or mark the entry as unreachable.


0030433                   break;
0030434          default:
0030435                   DBLOCK(1, printf("got an unknown icmp (%d) from ",
0030436                            icmp_hdr->ih_type);
0030437                            writeIpAddr(ip_hdr->ih_src); printf("\n"));
0030438                   break;
0030439          }
The icmp packet is no longer needed. Free the icmp packet.


0030440          bf_afree(data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030441          bf_afree(icmp_data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030442 }
0030443 
0030444 PRIVATE void icmp_echo_request(icmp_port, ip_data, ip_len, ip_hdr,
0030445          icmp_data, icmp_len, icmp_hdr)
0030446 icmp_port_t *icmp_port;
0030447 acc_t *ip_data, *icmp_data;
0030448 int ip_len, icmp_len;
0030449 ip_hdr_t *ip_hdr;
0030450 icmp_hdr_t *icmp_hdr;
icmp_echo_request()

icmp_echo_request() is called by process_data() to handle echo requests (pings) that have been received by the system.

If the icmp message is valid, an icmp echo reply packet is assembled and enqueued in the outgoing queue.


0030451 {
0030452          acc_t *repl_ip_hdr, *repl_icmp;
0030453          icmp_hdr_t *repl_icmp_hdr;
0030454          i32_t tmp_chksum;
0030455          u16_t u16;
0030456 
0030457          if (icmp_hdr->ih_code != 0)
The code for an icmp echo request message is always 0.


0030458          {
0030459                   DBLOCK(1,
0030460                   printf("got an icmp echo request with unknown code (%d)\n",
0030461                            icmp_hdr->ih_code));
0030462                   bf_afree(ip_data);
0030463                   bf_afree(icmp_data);
0030464                   return;
0030465          }
0030466          if (icmp_len < ICMP_MIN_HDR_LEN + sizeof(icmp_id_seq_t))
icmp_id_seq_t is a sequence number present in icmp echo request and reply messages (ping) to differentiate messages.


0030467          {
0030468                   DBLOCK(1, printf("got an incomplete icmp echo request\n"));
0030469                   bf_afree(ip_data);
0030470                   bf_afree(icmp_data);
0030471                   return;
0030472          }
Line 30473 creates the ip header for the icmp echo reply message and lines 30474-30492 create the icmp header. Line 30493 then extracts the payload of the icmp echo request message and appends this data to the icmp header just created.


0030473          repl_ip_hdr= make_repl_ip(ip_hdr, ip_len);
make_repl_ip()

make_repl_ip() is called by icmp_echo_request(), which is called by process_data() to handle echo requests (ping). make_repl_ip() simply creates the ip header for the icmp echo reply message.


0030474          repl_icmp= bf_memreq (ICMP_MIN_HDR_LEN);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0030475 assert (repl_icmp->acc_length == ICMP_MIN_HDR_LEN);
0030476          repl_icmp_hdr= (icmp_hdr_t *)ptr2acc_data(repl_icmp);
0030477          repl_icmp_hdr->ih_type= ICMP_TYPE_ECHO_REPL;
0030478          repl_icmp_hdr->ih_code= 0;
0030479 
0030480          DBLOCK(2,
0030481          printf("ih_chksum= 0x%x, ih_type= 0x%x, repl->ih_type= 0x%x\n",
0030482                   icmp_hdr->ih_chksum, *(u16_t *)&icmp_hdr->ih_type,
0030483                   *(u16_t *)&repl_icmp_hdr->ih_type));
Lines 30484-30489 determine the checksum of the icmp echo reply message. Since only the ih_type field of the icmp header has changed (from ICMP_TYPE_ECHO_REQ to ICMP_TYPE_ECHO_REPL), only the change to this field must be considered.


0030484          tmp_chksum= (~icmp_hdr->ih_chksum & 0xffff) -
0030485                   (i32_t)*(u16_t *)&icmp_hdr->ih_type+
0030486                   *(u16_t *)&repl_icmp_hdr->ih_type;
0030487          tmp_chksum= (tmp_chksum >> 16) + (tmp_chksum & 0xffff);
0030488          tmp_chksum= (tmp_chksum >> 16) + (tmp_chksum & 0xffff);
0030489          repl_icmp_hdr->ih_chksum= ~tmp_chksum;
0030490          DBLOCK(2, printf("sending chksum 0x%x\n", repl_icmp_hdr->ih_chksum));
0030491 
0030492          repl_ip_hdr->acc_next= repl_icmp;
0030493          repl_icmp->acc_next= bf_cut (icmp_data, ICMP_MIN_HDR_LEN,
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030494                   icmp_len - ICMP_MIN_HDR_LEN);
0030495 
0030496          bf_afree(ip_data);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030497          bf_afree(icmp_data);
0030498 
0030499          enqueue_pack(icmp_port, repl_ip_hdr);
enqueue_pack()

enqueue_pack enqueues an outgoing icmp packet in an icmp port's write queue and then calls icmp_write() to send the packet out.

It is doubtful that the queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030500 }
0030501 
0030502 PRIVATE u16_t icmp_pack_oneCsum(icmp_pack)
0030503 acc_t *icmp_pack;
icmp_pack_oneCsum()

icmp_pack_oneCsum() computes the checksum of an icmp message (icmp header plus the ip header of the problem ip packet plus the first 8 bytes of the problem packet). It accomplishes this by computing the checksum (by calling oneC_sum()) of each of the message's buffers.

Note that a checksum is used to determine if errors occurred during the transmission of data.

icmp_pack_oneCsum() is very similar to udp's pack_oneCsum(). The two functions should have been consolidated into one.


0030504 {
0030505          u16_t prev;
0030506          int odd_byte;
0030507          char *data_ptr;
0030508          int length;
0030509          char byte_buf[2];
0030510 
0030511          assert (icmp_pack);
0030512 
0030513          prev= 0;
0030514 
0030515          odd_byte= FALSE;
0030516          for (; icmp_pack; icmp_pack= icmp_pack->acc_next)
Add the checksums of the buffers.

The trickiest part of this loop is handling the (possible) odd byte at the end of a buffer. If there is an odd number of bytes in a buffer, the odd byte at the end is checksummed with the first byte of the next buffer and then the remaining bytes of the buffer are checksummed together.


0030517          {
0030518                   data_ptr= ptr2acc_data(icmp_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030519                   length= icmp_pack->acc_length;
0030520 
0030521                   if (!length)
0030522                            continue;
0030523                   if (odd_byte)
0030524                   {
0030525                            byte_buf[1]= *data_ptr;
0030526                            prev= oneC_sum(prev, (u16_t *)byte_buf, 2);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030527                            data_ptr++;
0030528                            length--;
0030529                            odd_byte= FALSE;
0030530                   }
0030531                   if (length & 1)
0030532                   {
0030533                            odd_byte= TRUE;
0030534                            length--;
0030535                            byte_buf[0]= data_ptr[length];
0030536                   }
0030537                   if (!length)
0030538                            continue;
0030539                   prev= oneC_sum (prev, (u16_t *)data_ptr, length);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030540          }
0030541          if (odd_byte)
0030542                   prev= oneC_sum (prev, (u16_t *)byte_buf, 1);
oneC_sum()

A checksum is used to determine if errors occurred during the transmission of data. The checksum algorithm used by oneC_sum() (which is also the Internet standard) is described by RFC 1071.

Essentially, the algorithm goes through data and adds all the bytes together (using one's complement addition). The high 16 bits of the resulting 32 bit value is then added to the low 16 bits (again, using one's complement addition). The checksum field is then set to the one's complement of this 16 bit sum. (Recall that the one's complement of 0xF0F0 is 0x0F0F.) Since AND'ing any 16 bit number and its 16 bit one's complement will equal 0xFFFF, the checksum of the packet (without the checksum field) AND'ed with the checksum field will equal 0xFFFF (provided the packet was not corrupted after the checksum field was calculated). For example, the checksum of a udp header (including the checksum field) will equal 0xFFFF if the packet was not corrupted in delivery.


From RFC 1071:

In outline, the Internet checksum algorithm is fairly simple:

(1) Adjacent octets to be checksummed are paired to form 16-bit
integers, and the 1's complement sum of these 16-bit integers is
formed.

(2) To generate a checksum, the checksum field itself is cleared,
the 16-bit 1's complement sum is computed over the octets
concerned, and the 1's complement of this sum is placed in the
checksum field.

(3) To check a checksum, the 1's complement sum is computed over the
same set of octets, including the checksum field. If the result
is all 1 bits (-0 in 1's complement arithmetic), the check
succeeds.

Below is a "C" code algorithm that describes the process above. This algorithm is also from RFC 1071. Note that count is the running count of all the bytes in the data and checksum is the return value.



{
/* Compute Internet Checksum for "count" bytes
* beginning at location "addr".
*/
register long sum = 0;

while( count > 1 ) {
/* This is the inner loop */
sum += * (unsigned short) addr++;
count -= 2;
}

/* Add left-over byte, if any */
if( count > 0 )
sum += * (unsigned char *) addr;

/* Fold 32-bit sum to 16 bits */
while (sum>>16)
sum = (sum & 0xffff) + (sum >> 16);

checksum = ~sum;
}



0030543          return prev;
0030544 }
0030545 
0030546 PRIVATE acc_t *make_repl_ip(ip_hdr, ip_len)
0030547 ip_hdr_t *ip_hdr;
0030548 int ip_len;
make_repl_ip()

make_repl_ip() is called by icmp_echo_request(), which is called by process_data() to handle echo requests (ping). make_repl_ip() simply creates the ip header for the icmp echo reply message.


0030549 {
0030550          ip_hdr_t *repl_ip_hdr;
0030551          acc_t *repl;
0030552          int repl_hdr_len;
0030553 
0030554          if (ip_len>IP_MIN_HDR_SIZE)
0030555          {
0030556                   DBLOCK(1, printf("ip_hdr options NOT supported (yet?)\n"));
0030557                   ip_len= IP_MIN_HDR_SIZE;
0030558          }
0030559 
0030560          repl_hdr_len= IP_MIN_HDR_SIZE;
0030561 
0030562          repl= bf_memreq(repl_hdr_len);
Allocate a buffer for the ip header which will encapsulate the icmp echo reply message.


bf_memreq()


After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0030563 assert (repl->acc_length == repl_hdr_len);
0030564 
Some of the fields of the echo reply message are copied from the icmp echo request message just received.


0030565          repl_ip_hdr= (ip_hdr_t *)ptr2acc_data(repl);
0030566 
0030567          repl_ip_hdr->ih_vers_ihl= repl_hdr_len >> 2;
0030568          repl_ip_hdr->ih_tos= ip_hdr->ih_tos;
0030569          repl_ip_hdr->ih_ttl= ICMP_DEF_TTL;
0030570          repl_ip_hdr->ih_proto= IPPROTO_ICMP;
0030571          repl_ip_hdr->ih_dst= ip_hdr->ih_src;
The source of the icmp echo request message becomes the destination of the icmp echo reply message.


0030572          repl_ip_hdr->ih_flags_fragoff= 0;
0030573 
0030574          return repl;
0030575 }
0030576 
0030577 PRIVATE void enqueue_pack(icmp_port, reply_ip_hdr)
0030578 icmp_port_t *icmp_port;
0030579 acc_t *reply_ip_hdr;
enqueue_pack()

enqueue_pack enqueues an outgoing icmp packet in an icmp port's write queue and then calls icmp_write() to send the packet out.

It is doubtful that the queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030580 {
0030581          reply_ip_hdr->acc_ext_link= 0;
0030582 
icp_head_queue and icp_tail_queue are the head and tail of the write queue for the icmp layer.


0030583          if (icmp_port->icp_head_queue)
0030584          {
0030585                   icmp_port->icp_tail_queue->acc_ext_link=
0030586                            reply_ip_hdr;
0030587          }
0030588          else
0030589          {
0030590                   icmp_port->icp_head_queue= reply_ip_hdr;
0030591          }
0030592          reply_ip_hdr->acc_ext_link= NULL;
0030593          icmp_port->icp_tail_queue= reply_ip_hdr;
0030594 
0030595          if (!(icmp_port->icp_flags & ICPF_WRITE_IP))
0030596                   icmp_write(icmp_port);
icmp_write()

icmp_write() loops through an icmp port's write queue, each time placing the head of the queue into the icp_write_pack field of the icmp port and then calling ip_write() to send this icmp packet out.

After functions build an icmp packet, they call enqueue_pack() to send the icmp packet out rather than call icmp_write() directly. enqueue_pack() enqueues the icmp packet if another icmp packet is currently being sent out.

It is doubtful that the write queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030597 }
0030598 
0030599 PRIVATE void icmp_write(icmp_port)
0030600 icmp_port_t *icmp_port;
icmp_write()

icmp_write() loops through an icmp port's write queue, each time placing the head of the queue into the icp_write_pack field of the icmp port and then calling ip_write() to send this icmp packet out.

After functions build an icmp packet, they call enqueue_pack() to send the icmp packet out rather than call icmp_write() directly. enqueue_pack() enqueues the icmp packet if another icmp packet is currently being sent out.

It is doubtful that the write queue mentioned above will ever be more than a single icmp packet. ip_write() was rewritten and currently only returns NW_OK. If the packet is valid, ip_write() always moves the packet to the ip layer even if the packet isn't immediately sent out the ethernet or the psip interface.


0030601 {
0030602          int result;
0030603 
0030604 assert (!(icmp_port->icp_flags & ICPF_WRITE_IP));
0030605 
0030606          while (icmp_port->icp_head_queue != NULL)
icp_head_queue and icp_tail_queue are the head and tail of the write queue for the icmp layer.


0030607          {
0030608                   assert(icmp_port->icp_write_pack == NULL);
0030609                   icmp_port->icp_write_pack= icmp_port->icp_head_queue;
0030610                   icmp_port->icp_head_queue= icmp_port->icp_head_queue->
0030611                            acc_ext_link;
0030612 
0030613                   icmp_port->icp_flags |= ICPF_WRITE_IP;
An icmp write is in progress. The flag is cleared on line 30622 if the write operation is successful.


0030614 
0030615                   result= ip_write(icmp_port->icp_ipfd,
0030616                            bf_bufsize(icmp_port->icp_write_pack));
Since ip_write() never returns NW_SUSPEND (see line 30617), icmp_write() never returns without clearing the icmp port's ICPF_WRITE_IP flag (and icmp_write() will never set the ICPF_WRITE_SP flag).


ip_write()


ip_write() simply gets an ip packet from a higher layer and then calls ip_send().

For example, after assembling an ip packet and placing the packet in the write queue of the appropriate udp port, udp's restart_write_fd() calls ip_write(), which then calls udp_get_data() to get the packet from the queue.

For a write to a udp file descriptor, ip_write()'s position in the big picture is as follows:

It is important to note that ip_write() ALWAYS RETURNS NW_OK! In previous versions of the network service, ip_write() returned other values (including NW_SUSPEND).


bf_bufsize()


bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0030617                   if (result == NW_SUSPEND)
0030618                   {
0030619                            icmp_port->icp_flags |= ICPF_WRITE_SP;
0030620                            return;
0030621                   }
0030622                   icmp_port->icp_flags &= ~ICPF_WRITE_IP;
Prepare for the next icmp packet in the icp_head_queue queue.


0030623          }
0030624 }
0030625 
0030626 PRIVATE void icmp_buffree(priority)
0030627 int priority;
icmp_buffree()

icmp_buffree(priority) is called by bf_memreq() if bf_memreq() does not have enough accessors to satisfy a buffer request.

If priority, icmp_buffree()'s only parameter, is ICMP_PRI_QUEUE (#define'd as 1), all icmp packets in the write queue of an icmp port are freed. If priority is any other value, icmp_buffree() will do nothing.

Note that there is not likely to be more than a single icmp packet in the write queue of any icmp port


0030628 {
0030629          acc_t *tmp_acc;
0030630          int i;
0030631          icmp_port_t *icmp_port;
0030632 
0030633          if (priority == ICMP_PRI_QUEUE)
0030634          {
0030635                   for (i=0, icmp_port= icmp_port_table; i<ip_conf_nr;
0030636                            i++, icmp_port++)
Free the icmp packets in each of the icmp port's write queues.

Note that there is not likely to be more than a single icmp packet in the write queue of any icmp port.


0030637                   {
0030638                            while(icmp_port->icp_head_queue)
0030639                            {
0030640                                     tmp_acc= icmp_port->icp_head_queue;
0030641                                     icmp_port->icp_head_queue=
0030642                                              tmp_acc->acc_ext_link;
0030643                                     bf_afree(tmp_acc);
0030644                            }
0030645                   }
0030646          }
0030647 }
0030648 
0030649 #ifdef BUF_CONSISTENCY_CHECK
0030650 PRIVATE void icmp_bufcheck()
0030651 {
0030652          int i;
0030653          icmp_port_t *icmp_port;
0030654          acc_t *pack;
0030655 
0030656          for (i= 0, icmp_port= icmp_port_table; i<ip_conf_nr; i++, icmp_port++)
0030657          {
0030658                   for (pack= icmp_port->icp_head_queue; pack;
0030659                            pack= pack->acc_ext_link)
0030660                   {
0030661                            bf_check_acc(pack);
0030662                   }
0030663                   bf_check_acc(icmp_port->icp_write_pack);
0030664          }
0030665 }
0030666 #endif
0030667 
0030668 PRIVATE void icmp_dst_unreach(icmp_port, ip_pack, ip_hdr_len, ip_hdr, icmp_pack,
0030669          icmp_len, icmp_hdr)
0030670 icmp_port_t *icmp_port;
0030671 acc_t *ip_pack;
0030672 int ip_hdr_len;
0030673 ip_hdr_t *ip_hdr;
0030674 acc_t *icmp_pack;
0030675 int icmp_len;
0030676 icmp_hdr_t *icmp_hdr;
icmp_dst_unreach()

icmp_dest_unreach() is called by process_data() to handle either host-unreachable or a network-unreachable icmp packets that are received from routers.

If the icmp message is valid, ipr_destunch() is called to mark the appropriate entry in the output routing table as unreachable.


0030677 {
0030678          acc_t *old_ip_pack;
0030679          ip_hdr_t *old_ip_hdr;
0030680          int ip_port_nr;
0030681          ipaddr_t dst, mask;
0030682 
0030683          if (icmp_len < 8 + IP_MIN_HDR_SIZE)
0030684          {
0030685                   DBLOCK(1, printf("dest unrch with wrong size\n"));
0030686                   return;
0030687          }
Lines 30688-30690 extract the ip header of the ip packet whose destination was unreachable.


0030688          old_ip_pack= bf_cut (icmp_pack, 8, icmp_len-8);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030689          old_ip_pack= bf_packIffLess(old_ip_pack, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030690          old_ip_hdr= (ip_hdr_t *)ptr2acc_data(old_ip_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030691 
0030692          if (old_ip_hdr->ih_src != ip_hdr->ih_dst)
Verify that this system did indeed send out the ip packet whose destination was unreachable.


0030693          {
0030694                   DBLOCK(1, printf("dest unrch based on wrong packet\n"));
0030695                   bf_afree(old_ip_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030696                   return;
0030697          }
0030698 
0030699          ip_port_nr= icmp_port->icp_ipport;
The icp_ipport field of an icmp port is the associated ip port number of the icmp port.


0030700 
0030701          switch(icmp_hdr->ih_code)
0030702          {
0030703          case ICMP_NET_UNRCH:
A message with this code is likely sent by a core router that cannot find the destination network in its routing table.


0030704                   dst= old_ip_hdr->ih_dst;
0030705                   mask= ip_get_netmask(dst);
ip_get_netmask()

ip_get_netmask(hostaddr) simply returns the natural subnet mask of hostaddr, the only parameter to ip_get_netmask().

For a class A network, the subnet mask is 255.000.000.000 (0xff000000);
for a class B network, the subnet mask is 255.255.000.000 (0xffff0000);
for a class C network, the subnet mask is 255.255.255.000 (0xffffff00);
for a zero network type (0.xx.xx.xx), the subnet mask is (0x00000000).


0030706                   ipr_destunrch (ip_port_nr, dst & mask, mask,
0030707                            IPR_UNRCH_TIMEOUT);
ipr_destunrch()

ipr_destunrch(port_nr, dest, netmask, timeout) searches for a route in the output routing table and, if one is found, changes the distance of the route to ORTD_UNREACHABLE (#define'd as 512 - a very large number).

ipr_destunrch() is called from icmp_dst_unreach().


0030708                   break;
0030709          case ICMP_HOST_UNRCH:
A message with this code is sent by a destination router that cannot find the destination ip address in its arp table.


0030710                   ipr_destunrch (ip_port_nr, old_ip_hdr->ih_dst, (ipaddr_t)-1,
0030711                            IPR_UNRCH_TIMEOUT);
ipr_destunrch()

ipr_destunrch(port_nr, dest, netmask, timeout) searches for a route in the output routing table and, if one is found, changes the distance of the route to ORTD_UNREACHABLE (#define'd as 512 - a very large number).

ipr_destunrch() is called from icmp_dst_unreach().


0030712                   break;
0030713          case ICMP_PORT_UNRCH:
The network is up and the destination host is up but the port is unreachable. This message is sent by the destination host.


0030714                   /* At the moment we don't do anything with this information.
0030715                    * It should be handed to the appropriate transport layer.
0030716                    */
0030717                   break;
0030718          default:
0030719                   DBLOCK(1, printf("icmp_dst_unreach: got strange code %d from ",
0030720                            icmp_hdr->ih_code);
0030721                            writeIpAddr(ip_hdr->ih_src);
0030722                            printf("; original destination: ");
0030723                            writeIpAddr(old_ip_hdr->ih_dst);
0030724                            printf("; protocol: %d\n",
0030725                            old_ip_hdr->ih_proto));
0030726                   break;
0030727          }
0030728          bf_afree(old_ip_pack);
No need to keep the ip packet containing the icmp message. The information has been processed by ipr_destunrch().


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030729 }
0030730 
0030731 PRIVATE void icmp_time_exceeded(icmp_port, ip_pack, ip_hdr_len, ip_hdr,
0030732          icmp_pack, icmp_len, icmp_hdr)
0030733 icmp_port_t *icmp_port;
0030734 acc_t *ip_pack;
0030735 int ip_hdr_len;
0030736 ip_hdr_t *ip_hdr;
0030737 acc_t *icmp_pack;
0030738 int icmp_len;
0030739 icmp_hdr_t *icmp_hdr;
icmp_time_exceeded()

icmp_time_exceeded() is called by process_data() to handle time-exceeded icmp messages.

If the icmp message is valid, ipr_ttl_exc() is called to either increase the ttl of the appropriate entry in the output routing table or mark the entry as unreachable.


0030740 {
0030741          acc_t *old_ip_pack;
0030742          ip_hdr_t *old_ip_hdr;
0030743          int ip_port_nr;
0030744 
0030745          if (icmp_len < 8 + IP_MIN_HDR_SIZE)
When an ip packet has been sent back due to an expired ttl, the ip header of the expired ip packet is sent back to the destination. The icmp header fields make up the remaining 8 bytes.


0030746          {
0030747                   DBLOCK(1, printf("time exceeded with wrong size\n"));
0030748                   return;
0030749          }
Lines 30750-30752 extract the ip header of the expired ip packet.


0030750          old_ip_pack= bf_cut (icmp_pack, 8, icmp_len-8);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030751          old_ip_pack= bf_packIffLess(old_ip_pack, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030752          old_ip_hdr= (ip_hdr_t *)ptr2acc_data(old_ip_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030753 
0030754          if (old_ip_hdr->ih_src != ip_hdr->ih_dst)
Verify that this system did indeed send out the ip packet that expired.


0030755          {
0030756                   DBLOCK(1, printf("time exceeded based on wrong packet\n"));
0030757                   bf_afree(old_ip_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030758                   return;
0030759          }
0030760 
0030761          ip_port_nr= icmp_port->icp_ipport;
The icp_ipport field of an icmp port is the associated ip port number of the icmp port.


0030762 
0030763          switch(icmp_hdr->ih_code)
0030764          {
0030765          case ICMP_TTL_EXC:
0030766                   ipr_ttl_exc (ip_port_nr, old_ip_hdr->ih_dst, (ipaddr_t)-1,
0030767                            IPR_TTL_TIMEOUT);
ipr_ttl_exc()

ipr_ttl_exc(port_nr, dest, netmask, timeout) finds a route in the output routing table whose destination is dest, ipr_ttl_exc()'s second parameter, and, if a route is found, increases the distance of the route by a multiple of 2 if the result is less than IP_MAX_TTL (#define'd as 255) and increases the distance by one if the result is greater than IP_MAX_TTL.

ipr_ttl_exc() is called by icmp_time_exceeded() upon receipt of a time-exceeded icmp message.


0030768                   break;
0030769          case ICMP_FRAG_REASSEM:
Time-exceeded icmp messages with this code are sent by the destination if the maximum allowable time to reassemble a fragmented packet has been exceeded. Since this doesn't indicate a total breakdown of network reachability (it is likely a less significant problem), the time-exceeded icmp message is ignored.


0030770                   /* Ignore reassembly time-outs. */
0030771                   break;
0030772          default:
0030773                   DBLOCK(1, printf("got strange code: %d\n",
0030774                            icmp_hdr->ih_code));
0030775                   break;
0030776          }
0030777          bf_afree(old_ip_pack);
No need to keep the ip packet containing the icmp message. The information has been processed by ipr_ttl_exc() (if appropriate).


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030778 }
0030779 
0030780 PRIVATE void icmp_router_advertisement(icmp_port, icmp_pack, icmp_len, icmp_hdr)
0030781 icmp_port_t *icmp_port;
0030782 acc_t *icmp_pack;
0030783 int icmp_len;
0030784 icmp_hdr_t *icmp_hdr;
icmp_router_advertisement()

icmp_router_advertisement() is called by process_data() to handle router advertisement messages.

If the icmp message is valid, ipr_add_oroute() is called to insert each of the routes into the output routing table.

A router advertisement message has the following format:

       0                   1                   2                   3

0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Type=9 | Code=0 | Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Num Addrs |Addr Entry Size| Lifetime |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
| . |
| . |



0030785 {
0030786          int entries;
0030787          int entry_size;
0030788          u16_t lifetime;
0030789          int i;
0030790          char *bufp;
0030791 
0030792          if (icmp_len < 8)
A router advertisement icmp message will consist of at least the ih_type, ih_code, ih_chksum, and ihh_ram fields. These fields total 8 bytes. The size of the router advertisements themselves must also be accounted for (see lines 30808-30812 and lines 30822-30822).


0030793          {
0030794                   DBLOCK(1,
0030795                   printf("router advertisement with wrong size (%d)\n",
0030796                            icmp_len));
0030797                   return;
0030798          }
0030799          if (icmp_hdr->ih_code != 0)
As specified in RFC 1256, the code for router advertisements is always 0.


0030800          {
0030801                   DBLOCK(1,
0030802                   printf("router advertisement with wrong code (%d)\n",
0030803                            icmp_hdr->ih_code));
0030804                   return;
0030805          }
The iram_na (Icmp Router Advertisement Message _ Number of Addresses) field holds the number of router addresses in the router advertisement message. The iram_aes (Address Entry Size) field holds the size of each router address entry in bytes (the first 4 bytes is the router address and the second 4 bytes is the preference).


0030806          entries= icmp_hdr->ih_hun.ihh_ram.iram_na;
0030807          entry_size= icmp_hdr->ih_hun.ihh_ram.iram_aes * 4;
0030808          if (entries < 1)
There is no point to having a router advertisement message with 0 entries.


0030809          {
0030810                   DBLOCK(1, printf(
0030811                   "router advertisement with wrong number of entries (%d)\n",
0030812                            entries));
0030813                   return;
0030814          }
0030815          if (entry_size < 8)
An entry must have at least 8 bytes; 4 bytes for the router address and 4 bytes for the preference.


0030816          {
0030817                   DBLOCK(1, printf(
0030818                   "router advertisement with wrong entry size (%d)\n",
0030819                            entry_size));
0030820                   return;
0030821          }
0030822          if (icmp_len < 8 + entries * entry_size)
A router advertisement icmp message will consist of the ih_type, ih_code, ih_chksum, and ihh_ram fields (which total 8 bytes) plus the router advertisements. Verify that the icmp message is at least this size.

It would appear that this test and the test on line 30792 could have been combined.


0030823          {
0030824                   DBLOCK(1,
0030825                            printf("router advertisement with wrong size\n");
0030826                            printf(
0030827                            "\t(entries= %d, entry_size= %d, icmp_len= %d)\n",
0030828                            entries, entry_size, icmp_len));
0030829                   return;
0030830          }
0030831          lifetime= ntohs(icmp_hdr->ih_hun.ihh_ram.iram_lt);
The iram_lt (LifeTime) field of the router advertisement message specifies the lifetime of the router advertisements.


htons() / ntohs() / htonl() / ntohl()


From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0030832          if (lifetime > 9000)
As specified in RFC 1256, the maximum allowable lifetime for a router advertisement is 9000 seconds.


0030833          {
0030834                   DBLOCK(1, printf(
0030835                            "router advertisement with wrong lifetime (%d)\n",
0030836                            lifetime));
0030837                   return;
0030838          }
0030839          for (i= 0, bufp= (char *)&icmp_hdr->ih_dun.uhd_data[0]; i< entries; i++,
0030840                   bufp += entry_size)
Add an output routing table entry for each entry in the router advertisement message.


0030841          {
0030842                   ipr_add_oroute(icmp_port->icp_ipport, HTONL(0L), HTONL(0L),
0030843                            *(ipaddr_t *)bufp, lifetime * HZ, 1, 0,
0030844                            ntohl(*(i32_t *)(bufp+4)), NULL);
ipr_add_oroute()

ipr_add_oroute() adds an output route to the main output routing table and, if successful, returns a reference to the new entry in the last parameter (if not NULL). ipr_add_oroute() finds either an empty entry, an expired entry, an entry with the same port, network, subnet mask, and smaller distance, or the oldest entry to use for a new dynamic route. Only an empty entry, an expired entry, or the oldest entry can be used for a new static route.

For a detailed description of the layout of the main output routing table (and specifically, the nextnw, nextgw, and nextdist fields), click here.


0030845          }
The icmp packet icmp_pack should probably be freed here.


0030846 }
0030847                   
0030848 PRIVATE void icmp_redirect(icmp_port, ip_hdr, icmp_pack, icmp_len, icmp_hdr)
0030849 icmp_port_t *icmp_port;
0030850 ip_hdr_t *ip_hdr;
0030851 acc_t *icmp_pack;
0030852 int icmp_len;
0030853 icmp_hdr_t *icmp_hdr;
icmp_redirect()

icmp_redirect() is called by process_data() to handle icmp redirect messages that are received. If the icmp redirect is properly constructed, ipr_redirect() is called to update the routing table for the destination host or network.


0030854 {
0030855          acc_t *old_ip_pack;
0030856          ip_hdr_t *old_ip_hdr;
0030857          int ip_port_nr;
0030858          ipaddr_t dst, mask;
0030859 
0030860          if (icmp_len < 8 + IP_MIN_HDR_SIZE)
0030861          {
0030862                   DBLOCK(1, printf("redirect with wrong size\n"));
0030863                   return;
0030864          }
0030865          old_ip_pack= bf_cut (icmp_pack, 8, icmp_len-8);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030866          old_ip_pack= bf_packIffLess(old_ip_pack, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030867          old_ip_hdr= (ip_hdr_t *)ptr2acc_data(old_ip_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030868 
0030869          ip_port_nr= icmp_port->icp_ipport;
0030870 
0030871          switch(icmp_hdr->ih_code)
An icmp redirect message can either instruct the system that there is a more direct path to a network or a host.


0030872          {
0030873          case ICMP_REDIRECT_NET:
0030874                   dst= old_ip_hdr->ih_dst;
0030875                   mask= ip_get_netmask(dst);
ip_get_netmask()

ip_get_netmask(hostaddr) simply returns the natural subnet mask of hostaddr, the only parameter to ip_get_netmask().

For a class A network, the subnet mask is 255.000.000.000 (0xff000000);
for a class B network, the subnet mask is 255.255.000.000 (0xffff0000);
for a class C network, the subnet mask is 255.255.255.000 (0xffffff00);
for a zero network type (0.xx.xx.xx), the subnet mask is (0x00000000).


0030876                   ipr_redirect (ip_port_nr, dst & mask, mask,
0030877                            ip_hdr->ih_src, icmp_hdr->ih_hun.ihh_gateway,
0030878                            IPR_REDIRECT_TIMEOUT);
ipr_redirect()

ipr_redirect(port_nr, dest, netmask, old_gateway, new_gateway, timeout) attempts to find a route for the destination dest, ipr_redirect()'s second parameter, in the output routing table. If a dynamic route whose gateway is old_gateway (ipr_redirect()'s fourth parameter) is found, this route is marked as unreachable and a new route with values of port_nr, dest, netmask, new_gateway, and timeout is added to the output routing table.


0030879                   break;
0030880          case ICMP_REDIRECT_HOST:
0030881                   ipr_redirect (ip_port_nr, old_ip_hdr->ih_dst, (ipaddr_t)-1,
0030882                            ip_hdr->ih_src, icmp_hdr->ih_hun.ihh_gateway,
0030883                            IPR_REDIRECT_TIMEOUT);
ipr_redirect()

ipr_redirect(port_nr, dest, netmask, old_gateway, new_gateway, timeout) attempts to find a route for the destination dest, ipr_redirect()'s second parameter, in the output routing table. If a dynamic route whose gateway is old_gateway (ipr_redirect()'s fourth parameter) is found, this route is marked as unreachable and a new route with values of port_nr, dest, netmask, new_gateway, and timeout is added to the output routing table.


0030884                   break;
0030885          default:
0030886                   DBLOCK(1, printf("got strange code: %d\n",
0030887                            icmp_hdr->ih_code));
0030888                   break;
0030889          }
0030890          bf_afree(old_ip_pack);
No need to keep the ip packet containing the icmp message. The information has been processed by ipr_redirect().


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030891 }
0030892 
0030893 PRIVATE acc_t *icmp_err_pack(pack, icmp_hdr)
0030894 acc_t *pack;
0030895 icmp_hdr_t **icmp_hdr;
icmp_err_pack()

icmp_err_pack(pack, icmp_hdr) creates an icmp message and encapsulates the message in an ip header.

In order to do this, icmp_err_pack() first cuts out everything except the ip header and the first 8 bytes of data from the ip packet pack, icmp_err_pack()'s first parameter. Next, icmp_err_pack() zeroizes most of the fields (all fields except ih_chksum; the other fields must be set by the calling function) of the icmp header icmp_hdr, icmp_err_pack()'s second parameter, and appends the remains of the ip packet to the icmp header to create an icmp message. Finally, icmp_err_pack() creates an ip header and appends the newly created icmp message to this ip header.


0030896 {
0030897          ip_hdr_t *ip_hdr;
An understanding of the ip header fields is critical to understanding this function.


ip_hdr_t


struct ip_hdr_t is the structure of an ip header. "ih" (e.g., ih_src, ih_dst) stands for "Ip Header".

ip_hdr_t is declared in /include/net/gen/ip_hdr.h:

typedef struct ip_hdr

{
u8_t ih_vers_ihl, ih_tos;
u16_t ih_length, ih_id, ih_flags_fragoff;
u8_t ih_ttl, ih_proto;
u16_t ih_hdr_chk;
ipaddr_t ih_src, ih_dst;
} ip_hdr_t;

ih_vers_ihl: The lower 4 bits is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


ih_tos: tos stands for "Type Of Service" and is the priority of the ip packet. A value of zero is the lowest priority. Both UDP and TCP have a default TOS of zero.

#define TCP_DEF_TOS 0
#define UDP_TOS 0


ih_length: The length of the entire ip packet, including the ip header.


ih_id: The value of ih_id for the first packet sent out is determined by ip_init() and is equal to the number of clock ticks since reboot (i.e., the value returned by get_time) and is incremented for each packet sent out. This value is used to combine fragments at the receiving end if fragmentation has occurred.


ih_flags_fragoff: ih_flags_fragoff is a combination of flags and a (possible) fragmentation offset ("fragoff").

If the packet should not be fragmented, ih_flags_fragoff is set to IH_DONT_FRAG. If there are additional fragments (e.g., the 3rd fragment of 4 fragments), ih_flags_fragoff is set to IH_MORE_FRAGS.

If the packet is indeed just a fragment of a packet, this value indicates the starting byte position (in 8 byte increments) of the original ip packet's data. So for example, if an ip packet of data size (not including the ip header) is broken up into two fragments of 1496 and 504 bytes each, the first fragment would have a fragmentation offset of 0 bytes and the second fragment would have a fragmentation offset of 1496 bytes and ih_flags_fragoff is therefore 187 (1496 / 8 = 187).


ih_ttl: "Time to live" for the packet. As a packet is routed to the destination, each router decrements the packet's ttl. When the ttl reaches 0, the router sends an "icmp unreachable" packet to the source. The ttl is designed to prevent packets that can't reach their destination from indefinitely bouncing around between routers. UDP's default TTL is 30:

#define UDP_TTL 30

Note that the Minix code also uses this value as a timeout value (in seconds). This code was written before the ttl field was redefined to be strictly a hope count. The original IP RFC defines the ttl field as the time to live in seconds.


ih_proto: The protocol of the ip packet. For example, if the packet is a udp packet, ih_proto will be 17. If the packet is a tcp packet, ih_proto will be 6.


ih_hdr_chk: Checksum for the header.


ih_src, ih_dst: Source and destination ip address of the ip packet.


IP HEADER (as given by RFC 791)


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



0030898          acc_t *ip_pack, *icmp_pack, *tmp_pack;
0030899          int ip_hdr_len, icmp_hdr_len;
0030900          size_t size;
0030901          ipaddr_t dest, netmask;
0030902          nettype_t nettype;
0030903 
0030904          pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030905          ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030906 
0030907          /* If the IP protocol is ICMP or the fragment offset is non-zero,
0030908           * drop the packet. Also check if the source address is valid.
0030909           */
As stated in RFC 1122, if the problem packet was one of the following:

1) an icmp packet
2) a fragment but was not the first fragment
3) a packet that had a loopback or non-specific (broadcast or multicast) address

do not send an icmp packet back to the problem packet's source. Lines 30910-30930 check for these cases.


0030910          if (ip_hdr->ih_proto == IPPROTO_ICMP ||
0030911                   (ntohs(ip_hdr->ih_flags_fragoff) & IH_FRAGOFF_MASK) != 0)
0030912          {
0030913                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030914                   return NULL;
0030915          }
0030916          dest= ip_hdr->ih_src;
0030917          nettype= ip_nettype(dest);
ip_nettype()

ip_nettype(ipaddr) returns the network type (which will be of type nettype_t) of ipaddr, the only parameter to ip_nettype().

The nettype_t enum typedef is declared in inet/generic/ip_int.h. Each type's associated ip address range is included in the comments.

typedef enum nettype
{
IPNT_ZERO, /* 0.xx.xx.xx */
IPNT_CLASS_A, /* 1.xx.xx.xx .. 126.xx.xx.xx */
IPNT_LOCAL, /* 127.xx.xx.xx */
IPNT_CLASS_B, /* 128.xx.xx.xx .. 191.xx.xx.xx */
IPNT_CLASS_C, /* 192.xx.xx.xx .. 223.xx.xx.xx */
IPNT_CLASS_D, /* 224.xx.xx.xx .. 239.xx.xx.xx */
IPNT_CLASS_E, /* 240.xx.xx.xx .. 247.xx.xx.xx */
IPNT_MARTIAN, /* 248.xx.xx.xx .. 254.xx.xx.xx + */
IPNT_BROADCAST /* 255.255.255.255 */
} nettype_t;



0030918          netmask= ip_netmask(nettype);
ip_netmask()

ip_netmask() returns a nettype's associated subnet mask.

For a class A network, the subnet mask is 255.000.000.000 (0xff000000);
for a class B network, the subnet mask is 255.255.000.000 (0xffff0000);
for a class C network, the subnet mask is 255.255.255.000 (0xffffff00);
for a zero network type (0.xx.xx.xx), the subnet mask is (0x00000000).

Note that the returned subnet mask is in network format.


0030919          if ((nettype != IPNT_CLASS_A && nettype != IPNT_LOCAL &&
0030920                   nettype != IPNT_CLASS_B && nettype != IPNT_CLASS_C) ||
0030921                   (dest & ~netmask) == 0 || (dest & ~netmask) == ~netmask)
0030922          {
0030923 #if !CRAMPED
0030924                   printf("icmp_err_pack: invalid source address: ");
0030925                   writeIpAddr(dest);
0030926                   printf("\n");
0030927 #endif
0030928                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030929                   return NULL;
0030930          }
0030931 
0030932          /* Take the IP header and the first 64 bits of user data. */
An icmp message has the following format:



The message is encapsulated within an ip packet.


0030933          size= ntohs(ip_hdr->ih_length);
The ih_length field of an ip header is the length of the header plus the data.


htons() / ntohs() / htonl() / ntohl()


From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0030934          ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
The lower 4 bits of ih_vers_ihl is the length of the ip header plus options (if there are any) shifted by 2 bit positions (i.e., the actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

IH_IHL_MASK is #define'd in /include/net/gen/ip_hdr.h:

#define IH_IHL_MASK 0xf


0030935          if (size < ip_hdr_len || bf_bufsize(pack) < size)
The length of the header as specified in the header should be less than the length of the header plus the data as specified in the header. Otherwise, there's a problem.


0030936          {
0030937 #if !CRAMPED
0030938                   printf("icmp_err_pack: wrong packet size:\n");
0030939                   printf("\thdrlen= %d, ih_length= %d, bufsize= %d\n",
0030940                            ip_hdr_len, size, bf_bufsize(pack));
0030941 #endif
0030942                   bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030943                   return NULL;
0030944          }
0030945          if (ip_hdr_len + 8 < size)
0030946                   size= ip_hdr_len+8;
Extract the ip header plus the first 8 bytes of data. This becomes the new packet. The original packet is no longer needed.


0030947          tmp_pack= bf_cut(pack, 0, size);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0030948          bf_afree(pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0030949          pack= tmp_pack;
0030950          tmp_pack= NULL;
0030951 
An understanding of the icmp header is helpful to understanding the rest of this function.


icmp_hdr_t


An icmp header has the following structure:

typedef struct icmp_hdr

{
u8_t ih_type, ih_code;
u16_t ih_chksum;
union
{
u32_t ihh_unused;
icmp_id_seq_t ihh_idseq;
ipaddr_t ihh_gateway;
icmp_ram_t ihh_ram;
icmp_pp_t ihh_pp;
} ih_hun;
union
{
icmp_ip_id_t ihd_ipid;
u8_t uhd_data[1];
} ih_dun;
} icmp_hdr_t;


Note that the "union" keyword specifies that only one field within the block will be used. For example, for router advertisements, only the ihh_ram field from the first union and the uhd_data[1] field from the second union are used.

ih_type, ih_code: ih_type, as the name suggests, is the type of icmp message. ih_code is a code that is specific to the messages type. Below are the possible types and their associates codes (note that the codes are indented):

#define ICMP_TYPE_ECHO_REPL       0

#define ICMP_TYPE_DST_UNRCH 3
# define ICMP_NET_UNRCH 0
# define ICMP_HOST_UNRCH 1
# define ICMP_PROTOCOL_UNRCH 2
# define ICMP_PORT_UNRCH 3
# define ICMP_FRAGM_AND_DF 4
# define ICMP_SOURCE_ROUTE_FAILED 5
#define ICMP_TYPE_SRC_QUENCH 4
#define ICMP_TYPE_REDIRECT 5
# define ICMP_REDIRECT_NET 0
# define ICMP_REDIRECT_HOST 1
# define ICMP_REDIRECT_TOS_AND_NET 2
# define ICMP_REDIRECT_TOS_AND_HOST 3
#define ICMP_TYPE_ECHO_REQ 8
#define ICMP_TYPE_ROUTER_ADVER 9
#define ICMP_TYPE_ROUTE_SOL 10
#define ICMP_TYPE_TIME_EXCEEDED 11
# define ICMP_TTL_EXC 0
# define ICMP_FRAG_REASSEM 1
Most of these are self-explanatory. It is unclear what function the ICMP_TYPE_ROUTE_SOL icmp message performs.


u16_t ih_chksum: The checksum of the icmp message.


icmp_id_seq_t ihh_idseq:

typedef struct icmp_id_seq

{
u16_t iis_id, iis_seq;
} icmp_id_seq_t;
u16_t iis_id, iis_seq:

The iis_id and the iis_seq fields are used to match echo replies to echo requests.


ipaddr_t ihh_gateway: The redirect gateway.


icmp_ram_t ihh_ram:

typedef struct icmp_ram		/* RFC 1256 */

{
u8_t iram_na;
u8_t iram_aes;
u16_t iram_lt;
} icmp_ram_t;
Note that "ram" stands for "Router Advertisement Message". Only router advertisement icmp messages use this struct.

u8_t iram_na: (Number of Addresses) The number of router addresses advertised in the message.

u8_t iram_aes: (Address Entry Size) The number of 32-bit words of information per router address (2 in the diagram below; the first 32-bits is for the router address and the second 32-bits is the preference of the router).

u16_t iram_lt: (LifeTime) Lifetime of the router address in seconds.


icmp_pp_t ihh_pp: Not used by the network service.
icmp_ip_id_t ihd_ipid: Not used by the network service.


u8_t uhd_data[1]: Although this field shows that it has a length of a single byte, as much data as necessary may be placed here. This field is the location of the router advertisements (see figure below) as well as the ip header of returned ip packets and may be as long as necessary .

ICMP Router Advertisement Message


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Type | Code | Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Num Addrs |Addr Entry Size| Lifetime |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[1] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Router Address[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Preference Level[2] |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
| . |
| . |



0030952          /* Create a minimal size ICMP hdr. */
0030953          icmp_hdr_len= offsetof(icmp_hdr_t, ih_dun);
0030954          icmp_pack= bf_memreq(icmp_hdr_len);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0030955          pack= bf_append(icmp_pack, pack);
On line 30946, everything except the ip header and the first 8 bytes of data were cut off the packet pack. Append the new packet to the icmp header.


bf_append()


bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):



the resulting linked list is as follows:






0030956          size += icmp_hdr_len;
The size is now the length of the icmp header plus the length of the ip header plus the 8 bytes.


0030957          pack= bf_packIffLess(pack, icmp_hdr_len);
bf_packIffLess()

If the data in a linked list of accessors is less than min_len (the second parameter), bf_packIffLess(pack, min_len) packs the data by calling bf_pack().

bf_packIffLess() is often called to ensure that a packet's header is in a single contiguous buffer so that the individual fields of the header can be easily accessed.

For a detailed description of the network service's buffer management, click here.


0030958          *icmp_hdr= (icmp_hdr_t *)ptr2acc_data(pack);
All fields of the icmp header except ih_chksum will be filled in by the calling function (e.g., icmp_snd_unreachable()).


ptr2acc_data()


The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030959          (*icmp_hdr)->ih_type= 0;
0030960          (*icmp_hdr)->ih_code= 0;
0030961          (*icmp_hdr)->ih_chksum= 0;
0030962          (*icmp_hdr)->ih_hun.ihh_unused= 0;
0030963          (*icmp_hdr)->ih_chksum= ~icmp_pack_oneCsum(pack);
0030964 
0030965          /* Create an IP header */
An understanding of the ip header is necessary to understand the remainder of this function.


ip_hdr_t


struct ip_hdr_t is the structure of an ip header. "ih" (e.g., ih_src, ih_dst) stands for "Ip Header".

ip_hdr_t is declared in /include/net/gen/ip_hdr.h:

typedef struct ip_hdr

{
u8_t ih_vers_ihl, ih_tos;
u16_t ih_length, ih_id, ih_flags_fragoff;
u8_t ih_ttl, ih_proto;
u16_t ih_hdr_chk;
ipaddr_t ih_src, ih_dst;
} ip_hdr_t;

ih_vers_ihl: The lower 4 bits is the length of the header plus options (if there are any) shifted by 2 bit positions (i.e., its actual length is 4 times as great as the value stored in ih_vers_ihl). An example of an option is a router list that a packet should follow to its destination.

The upper four bits is the version number (e.g., IPv4).


ih_tos: tos stands for "Type Of Service" and is the priority of the ip packet. A value of zero is the lowest priority. Both UDP and TCP have a default TOS of zero.

#define TCP_DEF_TOS 0
#define UDP_TOS 0


ih_length: The length of the entire ip packet, including the ip header.


ih_id: The value of ih_id for the first packet sent out is determined by ip_init() and is equal to the number of clock ticks since reboot (i.e., the value returned by get_time) and is incremented for each packet sent out. This value is used to combine fragments at the receiving end if fragmentation has occurred.


ih_flags_fragoff: ih_flags_fragoff is a combination of flags and a (possible) fragmentation offset ("fragoff").

If the packet should not be fragmented, ih_flags_fragoff is set to IH_DONT_FRAG. If there are additional fragments (e.g., the 3rd fragment of 4 fragments), ih_flags_fragoff is set to IH_MORE_FRAGS.

If the packet is indeed just a fragment of a packet, this value indicates the starting byte position (in 8 byte increments) of the original ip packet's data. So for example, if an ip packet of data size (not including the ip header) is broken up into two fragments of 1496 and 504 bytes each, the first fragment would have a fragmentation offset of 0 bytes and the second fragment would have a fragmentation offset of 1496 bytes and ih_flags_fragoff is therefore 187 (1496 / 8 = 187).


ih_ttl: "Time to live" for the packet. As a packet is routed to the destination, each router decrements the packet's ttl. When the ttl reaches 0, the router sends an "icmp unreachable" packet to the source. The ttl is designed to prevent packets that can't reach their destination from indefinitely bouncing around between routers. UDP's default TTL is 30:

#define UDP_TTL 30

Note that the Minix code also uses this value as a timeout value (in seconds). This code was written before the ttl field was redefined to be strictly a hope count. The original IP RFC defines the ttl field as the time to live in seconds.


ih_proto: The protocol of the ip packet. For example, if the packet is a udp packet, ih_proto will be 17. If the packet is a tcp packet, ih_proto will be 6.


ih_hdr_chk: Checksum for the header.


ih_src, ih_dst: Source and destination ip address of the ip packet.


IP HEADER (as given by RFC 791)


0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+



0030966          ip_hdr_len= IP_MIN_HDR_SIZE;
The ip header will be the minimum length since the header will include no ip options.


0030967 
0030968          ip_pack= bf_memreq(ip_hdr_len);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0030969          ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0030970 
0030971          ip_hdr->ih_vers_ihl= ip_hdr_len >> 2;
0030972          ip_hdr->ih_tos= 0;
As described above, the "tos" stands for "Type Of Service" and is the priority of a packet. Icmp packets have low priorities.


0030973          ip_hdr->ih_length= htons(ip_hdr_len + size);
htons() / ntohs() / htonl() / ntohl()

From htons(3):

"htons() converts a 16-bit quantity from host byte order to network byte order."

Different CPU architectures group multiple bytes differently. For example, on a "little-endian" machine (an example of which is the Intel CPU), the value 0x1234 is stored in memory as 0x3412. However, on a "big-endian" machine, the value 0x1234 is stored in memory as 0x1234.

It is important that values in a header are sent across a network in a consistent manner independent of the architecture of the sending or receiving system. For this reason, a standard was chosen. The standard chosen was big-endian although it could have just as well been little-endian.

htons() is defined in /include/net/hton.h, as:
#define htons(x) (_tmp=(x), ((_tmp>>8) & 0xff) | ((_tmp<<8) & 0xff00))

ntohs() converts a 16-bit quantity from network byte order to host byte order, the reverse of htons().

htonl() and ntohl() are identical to htons() and ntohs() except that they convert 32-bit quantities instead of 16-bit quantities.

Processes generally supply header information when sending packets. The data in these fields is converted to the network format (i.e., big-endian) by the process before the process copies the data to the network service.


0030974          ip_hdr->ih_flags_fragoff= 0;
0030975          ip_hdr->ih_ttl= ICMP_DEF_TTL;
0030976          ip_hdr->ih_proto= IPPROTO_ICMP;
0030977          ip_hdr->ih_dst= dest;
dest was set to the ip address of the problem ip packet on line 30916.


0030978 
Append the icmp message to the ip header. In other words, the icmp message becomes the payload (data) of the ip packet. The newly created ip packet is then returned.


0030979          assert(ip_pack->acc_next == NULL);
0030980          ip_pack->acc_next= pack;
0030981          return ip_pack;
0030982 }
0030983 
0030984 /*
0030985  * $PchId: icmp.c,v 1.8 1996/12/17 07:53:34 philip Exp $
0030986  */