Please wait until the page is fully downloaded and then press the "Expand" button or the blue line numbers.

0037001 /*
0037002 generic/ip_eth.c
0037003 
0037004 Ethernet specific part of the IP implementation
0037005 
0037006 Created:       Apr 22, 1993 by Philip Homburg
0037007 
0037008 Copyright 1995 Philip Homburg
0037009 */
ip_eth.c contains the code that handles the passing of packets and configuration data between the ip layer and the ethernet layer. The first two functions in ipeth_.c, ipeth_init() and ipeth_main(), are initialization functions that are called during the initialization of the network service. The remainder of the functions in ip_eth.c move packets between the ip layer and the ethernet layer.


0037010 
0037011 #include "inet.h"
0037012 #include "type.h"
0037013 #include "arp.h"
0037014 #include "assert.h"
0037015 #include "buf.h"
0037016 #include "clock.h"
0037017 #include "eth.h"
0037018 #include "event.h"
0037019 #include "ip.h"
0037020 #include "ip_int.h"
0037021 
0037022 THIS_FILE
0037023 
0037024 typedef struct xmit_hdr
0037025 {
0037026          time_t xh_time;
0037027          ipaddr_t xh_ipaddr;
0037028 } xmit_hdr_t;
xmit_hdr_t

If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037029 
0037030 PRIVATE ether_addr_t broadcast_ethaddr= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
0037031 PRIVATE ipaddr_t broadcast_ipaddr= 0xFFFFFFFFL;
0037032 
0037033 FORWARD void do_eth_read ARGS(( ip_port_t *port ));
0037034 FORWARD acc_t *get_eth_data ARGS(( int fd, size_t offset,
0037035          size_t count, int for_ioctl ));
0037036 FORWARD int put_eth_data ARGS(( int fd, size_t offset,
0037037          acc_t *data, int for_ioctl ));
0037038 FORWARD void ipeth_main ARGS(( ip_port_t *port ));
0037039 FORWARD void ipeth_set_ipaddr ARGS(( ip_port_t *port ));
0037040 FORWARD void ipeth_restart_send ARGS(( ip_port_t *ip_port ));
0037041 FORWARD int ipeth_send ARGS(( struct ip_port *ip_port, ipaddr_t dest,
0037042          acc_t *pack, int broadcast ));
0037043 FORWARD void ipeth_arp_reply ARGS(( int ip_port_nr, ipaddr_t ipaddr,
0037044          ether_addr_t *dst_ether_ptr ));
0037045 FORWARD int ipeth_update_ttl ARGS(( time_t enq_time, time_t now,
0037046          acc_t *eth_pack ));
0037047 FORWARD void ip_eth_arrived ARGS(( int port, acc_t *pack,
0037048          size_t pack_size ));
0037049 
0037050 
0037051 PUBLIC int ipeth_init(ip_port)
ipeth_init()

If an ip port's underlying data-link layer is ethernet, ipeth_init() is called by ip_init() during the ip port's initialization. ipeth_init() calls eth_open() to acquire an ethernet file descriptor and then initializes several ethernet-dependent fields of the ip port (e.g., ip_dl.dl_eth.de_flags).


0037052 ip_port_t *ip_port;
0037053 {
0037054          assert(BUF_S >= sizeof(xmit_hdr_t));
0037055          assert(BUF_S >= sizeof(eth_hdr_t));
0037056 
0037057          ip_port->ip_dl.dl_eth.de_fd= eth_open(ip_port->
0037058                   ip_dl.dl_eth.de_port, ip_port->ip_port,
0037059                   get_eth_data, put_eth_data, ip_eth_arrived);
eth_open()

eth_open(port, srfd, get_userdata, put_userdata, put_pkt) finds an ethernet file descriptor that is free and associates the file descriptor with an ethernet port whose index within eth_port_table[] is port, eth_open()'s first parameter.

eth_open() is called by the ip code, the arp code and is called if an ethernet device file (e.g., /dev/eth) is opened directly.

Here are the relationships between various file descriptors and ports:






0037060          if (ip_port->ip_dl.dl_eth.de_fd < 0)
0037061          {
0037062                   DBLOCK(1, printf("ip.c: unable to open eth port\n"));
0037063                   return -1;
0037064          }
To understand ipeth_init(), it is necessary to understand ip ports.


ip_port / ip_port_table[]


For every interface listed in inet.conf, there is a single ip port. For example, for the following inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be an ip port associated with the ethernet interface and an ip port associated with the psip interface. Each of these ip ports is a struct ip_port (see below) and each ip_port struct is in ip_port_table[]. So, for the example inet.conf file above, ip_port_table[] will have 2 elements; ip_port_table[0] will be for the ethernet interface and ip_port_table[1] will be for the psip interface.

Each element in ip_port_table[] is associated with several ip file descriptors. For example, the udp code (during initialization) will open up an ip file descriptor and this ip file descriptor will be associated with one of the elements in ip_port_table[].



typedef struct ip_port

{
int ip_flags, ip_dl_type;
int ip_port;
union
{
struct
{
int de_state;
int de_flags;
int de_port;
int de_fd;
acc_t *de_frame;
acc_t *de_q_head;
acc_t *de_q_tail;
acc_t *de_arp_head;
acc_t *de_arp_tail;
} dl_eth;
struct
{
int ps_port;
acc_t *ps_send_head;
acc_t *ps_send_tail;
} dl_ps;
} ip_dl;
ipaddr_t ip_ipaddr;
ipaddr_t ip_netmask;
ipaddr_t ip_subnetmask;
u16_t ip_frame_id;
u16_t ip_mss;
ip_dev_t ip_dev_main;
ip_dev_t ip_dev_set_ipaddr;
ip_dev_send_t ip_dev_send;
acc_t *ip_loopb_head;
acc_t *ip_loopb_tail;
event_t ip_loopb_event;
struct ip_fd *ip_proto_any;
struct ip_fd *ip_proto[IP_PROTO_HASH_NR];
} ip_port_t;



int ip_flags:

The possible ip_flags are #define'd in ip_int.h:

#define IPF_EMPTY 0x0
#define IPF_CONFIGURED 0x1
#define IPF_IPADDRSET 0x2
#define IPF_NETMASKSET 0x4

After the initialization of the ip port, ip_flags is set to IPF_CONFIGURED. If the "ifconfig -h host-IP-address" command is issued, ip_ioctl() sets the IPF_IPADDRSET flag before setting the ip address and (optionally) the subnet mask.


int ip_dl_type:

"dl" stands for "data link" (layer). ip_dl_type is set to the corresponding data link layer type of the port. These types include NETTYPE_ETH (ethernet) and NETTYPE_PSIP (psip).


int ip_port:

The port number of the ip device. For example, for a system with the following /etc/inet.conf file:

eth0 DP8390 0 { default; };
psip1;

there will be 2 ports: port 0 for the ethernet device and port 1 for the psip device.

Note that this port will not necessarily be the same as dl_eth.de_port (see below).


struct dl_eth: The dl_eth struct is used (instead of dl_ps) if the underlying data link layer device of this port is an ethernet device.


int de_state:

The possible de_state values are #define'd in ip_int.h:

#define IES_EMPTY 0x0
#define IES_SETPROTO 0x1
#define IES_GETIPADDR 0x2
#define IES_MAIN 0x3
#define IES_ERROR 0x4

When the ip port is being initialized, de_state changes in quick succession from IES_EMPTY to IES_SETPROTO to IES_GETIPADDR before entering IES_MAIN, which is its normal operational state.


int de_flags:

de_flags is initialized to IEF_EMPTY. Note that "SP" stands for "SusPend".

#define IEF_EMPTY 0x1
#define IEF_SUSPEND 0x8
#define IEF_READ_IP 0x10
#define IEF_READ_SP 0x20
#define IEF_WRITE_SP 0x80


int de_port:

The ethernet port number. For example, if there were two ethernet devices, one ethernet device would have port 0 and the other would have port 1, regardless of how many psip devices were on the system.

This value is initialized in ip_init(). Also see the initial comments in ip_config.c for a description of ip_conf[].

Note that this port will not necessarily be the same as ip_port (see above).


int de_fd:

Initialized by calling eth_open(), de_fd is the ip port's associated ethernet file descriptor.


acc_t *de_frame:
acc_t *de_q_head:
acc_t *de_q_tail:


The queueing for ethernet packets being sent out by the ethernet task is somewhat convoluted. If no ethernet packets are waiting to be sent out by the ethernet task (driver),
eth_write_port() stores an ethernet packet in an ethernet port's etp_wr_pack field until the packet is sent off by the ethernet task. After the ethernet task successfully sends the packet off, this field is set to NULL (either by eth_write_port() or write_int()). If the ethernet task cannot immediately send the ethernet packet off, the packet remains in etp_wr_pack. If another packet arrives for the ip port to send off to the ethernet port, the ip port encapsulates the ip packet with an ethernet header and the resulting ethernet packet is placed in the dl_eth.de_frame field of the ip port. If the ip port has additional packets that it wishes to send out, the packets are placed in the dl_eth.de_q_head/dl_eth.de_q_tail queue until the ethernet packets in etp_wr_pack and dl_eth.de_frame are sent out.

It's important to note that neither etp_wr_pack nor dl_eth.de_frame are linked lists (i.e., queues). They each hold only a single ethernet packet.


acc_t *de_arp_head:
acc_t *de_arp_tail:


If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.


struct dl_ps:
int ps_port:
acc_t *ps_send_head:
acc_t *ps_send_tail:


The dl_ps struct is used (instead of dl_eth) if the underlying data link layer device is a psip device. Coverage of psip is not included in this documentation.


ipaddr_t ip_ipaddr:
ipaddr_t ip_netmask:
ipaddr_t ip_subnetmask:


The ip address of a port can be set in two ways, either with RARP or through the "ifconfig -h host-IP-address" command. If set by the ifconfig command, a message requesting an NWIOSIPCONF (Set IP CONFiguration) is sent to the appropriate ip device (e.g., /dev/ip00, causing ip_ioctl() to be called. The user then passes in a nwio_ipconf struct which contains either the ip address or the subnet mask or both.

The netmask is simply a reflection of the class to which the ip address belongs. For example, if the ip address is 194.77.33.5, then it is a class C address and its netmask is therefore 255.255.255.0. See ip_nettype() for more information.


u16_t ip_frame_id:

ip_frame_id is initialized to the time at which the ip port was configured and incremented each time a packet is sent out. The ih_id field of each packet's ip header is set to ip_frame_id. If a packet is fragmented, the receiver can properly order the framents.


u16_t ip_mss:

If the ip_port_table[] element has an underlying ethernet layer, ip_mss is initialized to ETH_MAX_PACK_SIZE-ETH_HDR_SIZE (1514-14=1500), the size of the payload of an ethernet packet in bytes. If the size of the resulting ip packet is too large, the code fragments the packet.


ip_dev_t ip_dev_main:

ip_dev_main is initialized to ipeth_main() for ethernet devices. This function is called in ip_init().


ip_dev_t ip_dev_set_ipaddr:

ip_dev_set_ipaddr is initialized to ipeth_set_ipaddr() for ethernet devices.


ip_dev_send_t ip_dev_send:

ip_dev_send is initialized to ipeth_send() for ethernet devices or ipps_send() for psip devices.


acc_t *ip_loopb_head:
acc_t *ip_loopb_tail:
event_t ip_loopb_event:


Ip packets destined for the loopback address (127.0.0.1) or destined for the ip address of the ip port itself are placed in the ip_loopb_head/ip_loopb_tail before being delivered back to the ip port.

ip_loopb_event is an event that has been placed in the system-wide event queue.


struct ip_fd *ip_proto_any:
struct ip_fd *ip_proto[IP_PROTO_HASH_NR]:


For a description of ip_proto_any and ip_proto[], click here.


0037065          ip_port->ip_dl.dl_eth.de_state= IES_EMPTY;
0037066          ip_port->ip_dl.dl_eth.de_flags= IEF_EMPTY;
0037067          ip_port->ip_dl.dl_eth.de_q_head= NULL;
0037068          ip_port->ip_dl.dl_eth.de_q_tail= NULL;
0037069          ip_port->ip_dl.dl_eth.de_arp_head= NULL;
0037070          ip_port->ip_dl.dl_eth.de_arp_tail= NULL;
0037071          ip_port->ip_dev_main= ipeth_main;
0037072          ip_port->ip_dev_set_ipaddr= ipeth_set_ipaddr;
0037073          ip_port->ip_dev_send= ipeth_send;
0037074          ip_port->ip_mss= ETH_MAX_PACK_SIZE-ETH_HDR_SIZE;
0037075          return 0;
0037076 }
0037077 
0037078 PRIVATE void ipeth_main(ip_port)
0037079 ip_port_t *ip_port;
ipeth_main()

ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.


0037080 {
0037081          int result, i;
0037082          ip_fd_t *ip_fd;
0037083 
0037084          switch (ip_port->ip_dl.dl_eth.de_state)
0037085          {
0037086          case IES_EMPTY:
0037087                   ip_port->ip_dl.dl_eth.de_state= IES_SETPROTO;
0037088 
0037089                   result= eth_ioctl(ip_port->ip_dl.dl_eth.de_fd, NWIOSETHOPT);
If the ethernet file descriptor was opened by the ip code, the following flags and type will be set:

nweo_flags= NWEO_COPY|NWEO_EN_BROAD|NWEO_EN_MULTI|NWEO_TYPESPEC;
nweo_type= HTONS(ETH_IP_PROTO);

If successful, eth_ioctl() changes de_flags to IES_SETPROTO.


eth_ioctl()


The actions of eth_ioctl(fd, req) depend on req, eth_ioctl()'s second parameter:

NWIOSETHOPT (NetWork IO Set ETHernet OPTions):

If req is NWIOSETHOPT, eth_ioctl() configures the ethernet file descriptor fd (eth_ioctl()'s first parameter), which can then be used by a higher layer (e.g., ip, arp).

NWIOGETHSTAT (NetWork IO Get ETHernet STATs):

Only the arp code calls eth_ioctl() with the second parameter set to NWIOGETHSTAT. In this case, the ap_ethaddr field of the arp port is set to the ethernet address of the ethernet file descriptor's underlying ethernet port.


0037090                   if (result == NW_SUSPEND)
0037091                            ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND;
0037092                   if (result<0)
0037093                   {
0037094                            DBLOCK(1, printf("eth_ioctl(..,%lx)=%d\n",
0037095                                     NWIOSETHOPT, result));
0037096                            return;
0037097                   }
0037098                   if (ip_port->ip_dl.dl_eth.de_state != IES_SETPROTO)
0037099                            return;
0037100                   /* drops through */
0037101          case IES_SETPROTO:
0037102                   result= arp_set_cb(ip_port->ip_dl.dl_eth.de_port,
0037103                            ip_port->ip_port,
0037104                            ipeth_arp_reply);
arp_set_cb()

During the initialization of the network service (and, more specifically, the initialization of the ip layer), arp_set_cb(eth_port, ip_port, arp_func) is called to initialize the arp port associated with the ethernet port eth_port, arp_set_cb()'s first parameter. After initializing the arp port, arp_set_cb() calls arp_main().

It is unclear what the "cb" in the function name stands for.


0037105                   if (result != NW_OK)
0037106                   {
0037107 #if !CRAMPED
0037108                            printf("ipeth_main: arp_set_cb failed: %d\n",
0037109                                     result);
0037110 #endif
0037111                            return;
0037112                   }
0037113 
0037114                   /* Wait until the interface is configured up. */
0037115                   ip_port->ip_dl.dl_eth.de_state= IES_GETIPADDR;
0037116                   if (!(ip_port->ip_flags & IPF_IPADDRSET))
The ip address for the port must have been previously configured. This can be done with the ifconfig utility, which (through ip_ioctl()) sets the IPF_IPADDRSET flag.


0037117                   {
0037118                            ip_port->ip_dl.dl_eth.de_flags |= IEF_SUSPEND;
0037119                            return;
0037120                   }
0037121 
0037122                   /* fall through */
0037123          case IES_GETIPADDR:
0037124                   ip_port->ip_dl.dl_eth.de_state= IES_MAIN;
0037125                   for (i=0, ip_fd= ip_fd_table; i<IP_FD_NR; i++, ip_fd++)
0037126                   {
0037127                            if (!(ip_fd->if_flags & IFF_INUSE))
0037128                            {
0037129                                     continue;
0037130                            }
0037131                            if (ip_fd->if_port != ip_port)
0037132                            {
0037133                                     continue;
0037134                            }
0037135                            if (ip_fd->if_flags & IFF_GIPCONF_IP)
0037136                            {
0037137                                     ip_ioctl (i, NWIOGIPCONF);
0037138                            }
If one of the upper layers (e.g., udp) was not able to finish its initialization because the underlying ip port did not have an ip address configured, try again. For example, the udp code calls ip_ioctl() for each udp port with an argument of NWIOGIPCONF in order to set the up_ipaddr field of the udp port. If the ip address of the underlying ip port wasn't configured, the call to ip_ioctl() needs to be made again.


0037139                   }
0037140                   do_eth_read(ip_port);
Process any ethernet packets that have arrived at the ethernet file descriptor that is associated with this ip port.


do_eth_read()


do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.


0037141                   return;
0037142 #if !CRAMPED
0037143          default:
0037144                   ip_panic(( "unknown state: %d",
0037145                            ip_port->ip_dl.dl_eth.de_state));
0037146 #endif
0037147          }
0037148 }
0037149 
0037150 PRIVATE acc_t *get_eth_data (fd, offset, count, for_ioctl)
0037151 int fd;
0037152 size_t offset;
0037153 size_t count;
0037154 int for_ioctl;
get_eth_data()

get_eth_data(fd, offset, count, for_ioctl) is (indirectly) called by a number of functions within the ethernet code, including eth_write(). get_eth_data() performs one of several tasks, depending on the state of the ip port and the value of count, get_eth_data()'s third parameter.

If the state of the ip port is IES_MAIN (its state during normal operations) and count is nonzero, get_eth_data() returns the packet from the de_frame field of the ip port. In this way, eth_write() gets the packet from the ip code to send off to eth_send().

If count is zero, get_eth_data() does something different. After eth_write() calls eth_send() (and the ethernet frame is therefore delivered), eth_write() calls the ethernet's reply_thr_get() with count equal to zero. If the ethernet file descriptor was opened up by the ip code, reply_thr_get() is simply a wrapper for eth_get_data(). In this scenario, get_eth_data() sets the ip port's de_frame field to null (since eth_send() just passed this packet to the ethernet driver) and calls ipeth_restart_send() if there are any ethernet packets that the ip code is waiting to send.

If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.

When ipeth_main() calls eth_ioctl() the first time, eth_ioctl() in turn (indirectly) calls eth_get_data() to get the nwio_ethopt struct constructed by eth_get_data().

If count is zero and the ip port's state is IES_PROTO, get_eth_data() calls ipeth_main() if additional initialization is necessary.


0037155 {
0037156          ip_port_t *ip_port;
0037157          acc_t *data;
0037158          int result;
0037159 
0037160          ip_port= &ip_port_table[fd];
The parameter name "fd" is somewhat unfortunate since fd is not an ip file descriptor but an ip port. More generally, the ef_srfd field of an ethernet file descriptor corresponds to an upper-layer port, not an upper-layer file descriptor.


0037161 
0037162          switch (ip_port->ip_dl.dl_eth.de_state)
0037163          {
0037164          case IES_SETPROTO:
0037165                   if (!count)
If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.

If count is zero, get_eth_data() calls ipeth_main() if additional initialization is necessary.


0037166                   {
0037167                            result= (int)offset;
0037168                            if (result<0)
0037169                            {
0037170                                     ip_port->ip_dl.dl_eth.de_state= IES_ERROR;
0037171                                     break;
0037172                            }
0037173                            if (ip_port->ip_dl.dl_eth.de_flags & IEF_SUSPEND)
0037174                                     ipeth_main(ip_port);
ipeth_main()

ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.


0037175                            return NW_OK;
0037176                   }
0037177                   assert ((!offset) && (count == sizeof(struct nwio_ethopt)));
If the ip port's state is IES_PROTO (its configuration state), get_user_data() handles an initialization-related task. If count, get_eth_data()'s third parameter, is not zero (0), get_eth_data() sets various fields of an nwio_ethopt struct appropriate for the ip protocol and then returns a pointer to the struct.

When ipeth_main() calls eth_ioctl() the first time, eth_ioctl() in turn (indirectly) calls eth_get_data() to get the nwio_ethopt struct constructed below.


0037178                   {
0037179                            struct nwio_ethopt *ethopt;
nwio_ethopt_t

The nwio_ethopt struct is a field within the eth_fd_t struct (ethernet file descriptor) and is also used to configure ethernet file descriptors.

nwio_ethopt_t ef_ethopt:

typedef struct nwio_ethopt

{
u32_t nweo_flags;
ether_addr_t nweo_multi, nweo_rem;
ether_type_t nweo_type;
} nwio_ethopt_t;

nweo_flags:

If the ethernet file descriptor was opened by the ip code, the following flags and type will be set:

nweo_flags= NWEO_COPY|NWEO_EN_BROAD|NWEO_EN_MULTI|NWEO_TYPESPEC;
nweo_type= HTONS(ETH_IP_PROTO);


#define NWEO_NOFLAGS 0x0000L

#define NWEO_ACC_MASK 0x0003L
#define NWEO_EXCL 0x00000001L
#define NWEO_SHARED 0x00000002L
#define NWEO_COPY 0x00000003L

From ip(4):

"If NWEO_SHARED is selected, then multiple channels (which all must select NWEO_SHARED) can use the same Ethernet type and they can all send packets. However, incoming packets will be delivered to at most one of them."

Note that, for whatever reason, NWEO_EXCL behaves exactly as NWEO_COPY. Every ethernet file descriptor so configured receives a copy of an incoming packet.

The access flags are important when an ethernet packet is being read.


#define NWEO_LOC_MASK 0x0010L

#define NWEO_BROAD_MASK 0x0020L
#define NWEO_EN_BROAD 0x00000020L
#define NWEO_DI_BROAD 0x00200000L

NWEO_EN_BROAD enables the receipt of broadcast packets.

#define NWEO_MULTI_MASK 0x0040L
#define NWEO_EN_MULTI 0x00000040L
#define NWEO_DI_MULTI 0x00400000L

NWEO_EN_MULTI enables the receipt of multicast packets. The nweo_multi field does not appear to be used in any meaningful way.

#define NWEO_PROMISC_MASK 0x0080L
#define NWEO_EN_PROMISC 0x00000080L
#define NWEO_DI_PROMISC 0x00800000L

If an ethernet file descriptor is in promiscuous mode, the file descriptor not only accepts any packet regardless of destination ethernet address but can also send out packets with any source ethernet address (not just the ethernet card's address).

If this is not the case, use the ethernet port's ethernet address.

#define NWEO_REM_MASK 0x0100L
#define NWEO_REMSPEC 0x00000100L
#define NWEO_REMANY 0x01000000L

From ip(4):

"NWEO_REMSPEC restricts sending and receiving of packets to the single remote computer specified in the nweo_rem field."

If the NWEO_REMANY flag is set, an ethernet packet may have any destination.

#define NWEO_TYPE_MASK 0x0200L
#define NWEO_TYPESPEC 0x00000200L
#define NWEO_TYPEANY 0x02000000L

From ip(4):

"NWEO_TYPESPEC restricts sending and receiving of packets to the type specified in nweo_type."

If the NWEO_TYPESPEC flag is set, the nweo_type field (see below) may be one of the following:

#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800

#define NWEO_RW_MASK 0x1000L
#define NWEO_RWDATONLY 0x00001000L
#define NWEO_RWDATALL 0x10000000L

From ip(4):

"If the Ethernet header is completely specified by the nweo_flags (i.e.,
all of NWEO_EN_LOC, NWEO_DI_BROAD, NWEO_DI_MULTI, NWEO_DI_PROMISC,
NWEO_REMSPEC and NWEO_TYPESPEC are specified), then NWEO_RWDATONLY can be
used to send and receive only the data part of an Ethernet packet."

The default for the ethernet file descriptors opened by the ip and arp layers is NWEO_RWDATALL.

ether_addr_t nweo_multi:

This field is not used in any meaningful way.


nweo_rem:

Used with the NWEO_REMSPEC flag (see above).


font color=green>ether_type_t nweo_type:

Used with the NWEO_TYPESPEC flag (see above). The nweo_type field may be one of the following:

#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800


0037180                            acc_t *acc;
0037181 
0037182                            acc= bf_memreq(sizeof(*ethopt));
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0037183                            ethopt= (struct nwio_ethopt *)ptr2acc_data(acc);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037184                            ethopt->nweo_flags= NWEO_COPY|NWEO_EN_BROAD|
0037185                                     NWEO_EN_MULTI|NWEO_TYPESPEC;
0037186                            ethopt->nweo_type= HTONS(ETH_IP_PROTO);
0037187                            return acc;
0037188                   }
0037189 
0037190          case IES_MAIN:
0037191                   if (!count)
After eth_write() calls eth_send() (and the ethernet frame is therefore delivered), eth_write() calls the ethernet's reply_thr_get() with count equal to zero. If the ethernet file descriptor was opened up by the ip code, reply_thr_get() is simply a wrapper for eth_get_data(). In this scenario, get_eth_data() sets the ip port's de_frame field to null (since eth_send() just passed this packet to the ethernet driver) and calls ipeth_restart_send() if there are any ethernet packets that the ip code is waiting to send.



0037192                   {
0037193                            result= (int)offset;
0037194                            if (result<0)
0037195                                     ip_warning(( "error on write: %d\n", result ));
The ethernet packet in the ip port's de_frame field was either successfully sent to the ethernet task or there was a problem and it was not possible to send it to the task. In either case, the packet is no longer needed.


0037196                            bf_afree (ip_port->ip_dl.dl_eth.de_frame);
The ethernet packet in this field has been delivered to the ethernet task by eth_write_port().


bf_afree()


After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0037197                            ip_port->ip_dl.dl_eth.de_frame= 0;
0037198 
0037199                            if (ip_port->ip_dl.dl_eth.de_flags & IEF_WRITE_SP)
If a packet could not be delivered to the ethernet task (because a previous packet was already occupying the de_frame field) and was instead queued, the IEF_WRITE_SP flag will be set. Call ipeth_restart_send() to try to send these out.


0037200                            {
0037201                                     ip_port->ip_dl.dl_eth.de_flags &=
0037202                                              ~IEF_WRITE_SP;
0037203                                     ipeth_restart_send(ip_port);
ipeth_restart_send()

ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.

ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.


0037204                            }
0037205                            return NW_OK;
0037206                   }
If the state of the ip port is IES_MAIN (its state during normal operations) and count is nonzero, get_eth_data() returns the packet from the de_frame field of the ip port. In this way, eth_write() gets the packet from the ip code to hand off to eth_send(), which calls eth_write_port() (if the packet isn't destined for the loopback address), which hands the ethernet packet off to the ethernet driver.


0037207                   data= bf_cut (ip_port->ip_dl.dl_eth.de_frame, offset, count);
bf_cut()

If a section of a linked list needs to be duplicated, bf_cut(data, offset, length) is called. For example, if a section of length 50 starting at an offset of 75 of the linked list below needs to be duplicated, bf_cut(data, 75, 50) is called:



Note that the original linked list remains unchanged and that acc_linkC for all the accessors in the new linked list is one.

If length (the second parameter) is zero, simply duplicate the first accessor in the linked list but set acc_length=0 and acc_next=null. In other words, create a linked list of length one accessor whose acc_length is 0.

bf_cut() is used in a number of scenarios, including cutting a received ethernet packet to size.

For a full description of the network service's buffer management, click here.



0037208                   assert (data);
0037209                   return data;
0037210          default:
0037211 #if !CRAMPED
0037212                   printf(
0037213                   "get_eth_data(%d, 0x%d, 0x%d) called but ip_state=0x%x\n",
0037214                            fd, offset, count, ip_port->ip_dl.dl_eth.de_state);
0037215 #endif
0037216                   break;
0037217          }
0037218          return 0;
0037219 }
0037220 
0037221 PRIVATE int put_eth_data (port, offset, data, for_ioctl)
0037222 int port;
0037223 size_t offset;
0037224 acc_t *data;
0037225 int for_ioctl;
put_eth_data()

put_eth_data(port, offset, data, for_ioctl) is called only by reply_thr_put() with data, put_eth_data()'s third parameter, set to null. If there are no ethernet packets waiting to be delivered to the ip port, put_eth_data() simply clears the IEF_READ_IP flag. If data is null and there are ethernet packets waiting to be delivered to the ip port, put_eth_data() calls do_eth_read() to process the packets.


0037226 {
0037227          ip_port_t *ip_port;
0037228          acc_t *pack;
0037229          int result;
0037230 
0037231          ip_port= &ip_port_table[port];
0037232 
0037233          assert(0);
0037234 
0037235          if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP)
0037236          {
0037237                   if (!data)
data, put_eth_data()'s third parameter, will always be null since reply_thr_put() is the only function that calls put_eth_data().


0037238                   {
0037239                            result= (int)offset;
0037240                            if (result<0)
If the operation was not successful, return NW_OK anyway.


0037241                            {
0037242                                     DBLOCK(1, printf(
0037243                                     "ip.c: put_eth_data(..,%d,..)\n", result));
0037244                                     return NW_OK;
0037245                            }
0037246                            if (ip_port->ip_dl.dl_eth.de_flags & IEF_READ_SP)
The IEF_READ_SP flag is set if further ethernet packets are waiting to be handed off to the ip code.


0037247                            {
0037248                                     ip_port->ip_dl.dl_eth.de_flags &=
0037249                                                   ~(IEF_READ_IP|IEF_READ_SP);
0037250                                     do_eth_read(ip_port);
do_eth_read()

do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.


0037251                            }
0037252                            else
0037253                                     ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP;
No additional ethernet packets are waiting to be handed off to the ip layer so clear the IEF_READ_IP flag.


0037254                            return NW_OK;
0037255                   }
The code will never reach this point. reply_thr_put() is the only function that calls put_eth_data() and it calls put_eth_data() with its third argument (data) set to null.


0037256                   assert (!offset);
0037257                   /* Warning: the above assertion is illegal; puts and
0037258                    gets of data can be brokenup in any piece the server
0037259                    likes. However we assume that the server is eth.c
0037260                    and it transfers only whole packets. */
0037261                   ip_eth_arrived(port, data, bf_bufsize(data));
ip_eth_arrived()

ip_eth_arrived() is called by the ethernet code (e.g., packet2user()) to hand off a packet to the ip code. ip_eth_arrived() strips off the ethernet header before handing the packet off to ip_arrived() (if the packet is not an ethernet broadcast packet) or ip_arrived_broadcast() (if it is).


0037262                   return NW_OK;
0037263          }
0037264 #if !CRAMPED
0037265          printf("ip_port->ip_dl.dl_eth.de_state= 0x%x",
0037266                   ip_port->ip_dl.dl_eth.de_state);
0037267          ip_panic (( "strange status" ));
0037268 #endif
0037269 }
0037270 
0037271 PRIVATE void ipeth_set_ipaddr(ip_port)
0037272 ip_port_t *ip_port;
ipeth_set_ipaddr()

ipeth_set_ipaddr() calls arp_set_ipaddr() to set the ap_ipaddr field of the ip port's associated arp port. If the ip port has not finished initializing, ipeth_main() is called to finish this initialization.


0037273 {
0037274          arp_set_ipaddr (ip_port->ip_dl.dl_eth.de_port, ip_port->ip_ipaddr);
arp_set_ipaddr()

arp_set_ipaddr(eth_port, ipaddr) is called only from ipeth_set_ipaddr(), which is (indirectly) called by ip_ioctl().

arp_set_ipaddr() simply sets the ap_ipaddr field of an arp port whose index within the arp_port_table[] is eth_port, arp_set_ipaddr()'s first parameter, to the ip address ipaddr, arp_set_ipaddr()'s second parameter.


0037275          if (ip_port->ip_dl.dl_eth.de_state == IES_GETIPADDR)
0037276                   ipeth_main(ip_port);
ipeth_main()

ipeth_main() helps initialize an ip port whose underlying link layer device is an ethernet device. ipeth_main() calls eth_ioctl(), which configures the ethernet file descriptor that corresponds to the ip port and then calls arp_set_cb() to initialize the arp port that is associated with this ip port. After the initialization, ipeth_main() calls do_eth_read() to process any ethernet packets that have arrived at the ethernet file descriptor that was just opened.


0037277 }
0037278 
0037279 PRIVATE int ipeth_send(ip_port, dest, pack, broadcast)
0037280 struct ip_port *ip_port;
0037281 ipaddr_t dest;
0037282 acc_t *pack;
0037283 int broadcast;
ipeth_send()

ipeth_send() is called (indirectly) by ip_send() to send out a packet to a destination address on the same subnet as the ip port from which it is sent or to send out a packet to a broadcast address. ipeth_send() first creates an ethernet header to prepend to the ip packet and then, if there are no packets waiting to be sent out, calls eth_send() in an attempt to send the packet to the ethernet task immediately. If eth_send() is not able to send the ethernet packet immediately, eth_write() is called to queue the packet. If there are already ethernet packets waiting to be sent out, eth_send() and eth_write() are not called and the packet is queued (i.e., placed in the de_q_head queue of the ip port).udp write path

For a write to a udp device (e.g., /dev/udp), the code takes the following path:


sr_rwio()
udp_write()
restart_write_fd()
ip_write()
ip_send()
if (packet is destined to a system on the local ethernet network) {
ipeth_send()
if (no previous packet being processed by ethernet task)
eth_send()
if (eth_send() can't immediately send packet)
eth_write()
}
else if (packet must be routed)
oroute_frag()
else if (packet ist destined for a local destination)
ev_enqueue()



0037284 {
0037285          int r;
0037286          acc_t *eth_pack, *tail;
0037287          size_t pack_size;
0037288          eth_hdr_t *eth_hdr;
0037289          xmit_hdr_t *xmit_hdr;
0037290          ipaddr_t hostpart;
0037291          time_t t;
0037292 
0037293          /* Start optimistic: the arp will succeed without blocking and the
0037294           * ethernet packet can be sent without blocking also. Start with
0037295           * the allocation of the ethernet header.
0037296           */
Address Resolution Protocol (ARP)

ARP Protocol Overview

The Address Resolution Protocol (ARP), documented in RFC 826, translates a system's 32-bit IP address to its corresponding 48-bit Ethernet address.

ARP works by broadcasting a packet to all hosts attached to an Ethernet segment. The packet contains the ip address of the system with which the sender wishes to communicate. Only the host with this ip address answers the packet, sending its ethernet address in the response.

Hosts typically keep a cache of ARP responses (called the ARP table), based on the assumption that ip-to-hardware address mappings rarely change.



0037297          eth_pack= bf_memreq(sizeof(*eth_hdr));
The allocated accessor will hold the ethernet header.


bf_memreq()


After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0037298          assert(eth_pack->acc_next == NULL);
0037299          eth_pack->acc_next= pack;
The ethernet header is prepended to the data (which generally consists of an ip header, a udp or tcp header, and the payload).


0037300          pack_size= bf_bufsize(eth_pack);
0037301          if (pack_size<ETH_MIN_PACK_SIZE)
Fill the ethernet packet if its size is less than the minimum.



0037302          {
0037303                   tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0037304                   eth_pack= bf_append(eth_pack, tail);
bf_append()

bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):



the resulting linked list is as follows:






0037305          }
0037306          eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.eth_hdr_t

An ethernet header is fairly simple. The eth_hdr_t typedef is declared in server/ip/gen/eth_hdr.h:

typedef struct eth_hdr

{
ether_addr_t eh_dst;
ether_addr_t eh_src;
ether_type_t eh_proto;
} eth_hdr_t;
ether_addr_t eh_dst: The destination ethernet address.

ether_addr_t eh_src: The source ethernet address.

ether_type_t eh_proto: The protocol of the layer above. The three possibilities are:

#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800

An ethernet frame also has a CRC (Cyclic Redundancy Check) at its end to enable the receiving system to determine if corruption occured during transit.

An ethernet MAC (physical) address is a 48 bit number. This number is broken down into two halves: 22 of the first 24-bits identify the vendor of the Ethernet board (called the "Organizationally Unique Identifier") and the second 24-bits form a serial number assigned by the vendor. This guarantees that no two Ethernet cards have the same MAC address. One of the remaining bits indicate if the packet is a multicast or broadcast packet and the other is used for vendor-specific applications (e.g., NetBEUI).


+--+--+--+--+--+--+
| destination MAC |
+--+--+--+--+--+--+
| source MAC |
+--+--+--+--+--+--+
|08 00|
+--+--+-----------+
| |
. IP .
. packet .
. .
| |
+--+--+--+--+-----+
| CRC |
+--+--+--+--+




0037307 
0037308          /* Lookup the ethernet address */
0037309          if (broadcast)
0037310                   eth_hdr->eh_dst= broadcast_ethaddr;
The variable broadcast_ethaddr is set to { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } on line 37030. This is the ethernet broadcast address (not to be confused with an ip broadcast address).


0037311          else
0037312          {
0037313                   if ((dest & ip_port->ip_subnetmask) !=
0037314                            (ip_port->ip_ipaddr & ip_port->ip_subnetmask))
Verify that the destination ip address is in the same subnet as the ip address of the ip port. For example, if 192.168.1.1/255.255.255.0 is the ip address of the ip port and 192.168.1.2/255.255.255.0 is the destination ip address:

(192.168.1.1 & 255.255.255.0) != 192.168.1.2 & 255.255.255.0
192.168.1.0 != 192.168.1.0

This is not a true statement. Therefore, ip_panic() would not be called.


0037315                   {
0037316 #if !CRAMPED
0037317                            ip_panic(( "invalid destination" ));
0037318 #endif
0037319                   }
0037320 
0037321                   hostpart= (dest & ~ip_port->ip_subnetmask);
An ip address is made up of a network part and a host part. For example, for the ip address/subnet pair 192.168.1.60/255.255.255.0, the network part is 192.168.1.0 and the host part is 60.

192.168.1.60 & ~(255.255.255.0) = 192.168.1.60 & 0.0.0.255 = 60


0037322 
0037323                   assert(hostpart != 0);
0037324                   assert(dest != ip_port->ip_ipaddr);
0037325 
0037326                   r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port,
0037327                            dest, &eth_hdr->eh_dst);
arp_ip_eth()

arp_ip_eth(eth_port, ipaddr, ethaddr) looks for an entry in the arp table that matches ipaddr, the second parameter, and if it finds it, returns the corresponding ethernet address in ethaddr, the third parameter. If arp_ip_eth() does not find a valid entry in the arp table for the ip address, it sends out an arp broadcast in an attempt to find the ethernet address for the ip address and returns NW_SUSPEND.


0037328                   if (r == NW_SUSPEND)
xmit_hdr_t

If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037329                   {
0037330                            /* Unfortunately, the arp takes some time, use
0037331                             * the ethernet header to store the next hop
0037332                             * ip address and the current time.
0037333                             */
0037334                            xmit_hdr= (xmit_hdr_t *)eth_hdr;
Since the ethernet header that we allocated on line 37297 will not immediately be used to hold an ethernet header, it can be used initially as a xmit_hdr.


xmit_hdr_t


If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037335                            xmit_hdr->xh_time= get_time();
get_time()

get_time() returns the number of clock ticks since reboot.

Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.


0037336                            xmit_hdr->xh_ipaddr= dest;
0037337                            eth_pack->acc_ext_link= NULL;
Set acc_ext_link to NULL since the packet will be the last in the de_arp_head/de_arp_tail queue.

On lines 37338 - 37345, the packet is placed in the de_arp_head/de_arp_tail queue.


0037338                            if (ip_port->ip_dl.dl_eth.de_arp_head == NULL)
0037339                                     ip_port->ip_dl.dl_eth.de_arp_head= eth_pack;
0037340                            else
0037341                            {
0037342                                     ip_port->ip_dl.dl_eth.de_arp_tail->
0037343                                              acc_ext_link= eth_pack;
0037344                            }
0037345                            ip_port->ip_dl.dl_eth.de_arp_tail= eth_pack;
0037346                            return NW_OK;
0037347                   }
0037348                   if (r == EDSTNOTRCH)
EDSTNOTRCH is #define'd in /include/errno.h:

#define EDSTNOTRCH (_SIGN 56) /* destination not reachable */



0037349                   {
0037350                            bf_afree(eth_pack);
0037351                            return EDSTNOTRCH;
0037352                   }
0037353                   assert(r == NW_OK);
0037354          }
Regardless whether the packet is a broadcast packet or it is not, the code continues from here. The ARP resolution succeeded and the ethernet header destination address is filled.


0037355 
0037356          /* If we have no write in progress, we can try to send the ethernet
0037357           * packet using eth_send. If the IP packet is larger than mss,
0037358           * unqueue the packet and let ipeth_restart_send deal with it.
0037359           */
0037360          pack_size= bf_bufsize(eth_pack);
bf_bufsize()

bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0037361          if (ip_port->ip_dl.dl_eth.de_frame == NULL && pack_size <=
0037362                   ip_port->ip_mss + sizeof(*eth_hdr))
If there are no ethernet packets in the de_frame queue waiting to be sent out (i.e., de_frame == NULL) and the size of the packet is smaller or equal to the maximum size, send the packet out.

If the size of the ethernet packet is too large, ipeth_restart_send() is called (see lines 37400-37401) to split up the ethernet packet into two smaller packets and then attempt to send out the smaller packets.


0037363          {
0037364                   r= eth_send(ip_port->ip_dl.dl_eth.de_fd,
0037365                            eth_pack, pack_size);
eth_send()

eth_send() does a couple of checks and sets some of the fields of the ethernet header before passing the packet off to ev_enqueue() (if the packet is destined for the local loopback) or eth_write_port() (if it is not).


0037366                   if (r == NW_OK)
0037367                            return NW_OK;
0037368 
Blocking in eth_send() occurs if the write queue for the ethernet port is not empty.

The queueing of packets waiting to be sent out an ip port is complicated. Click here for a detailed explanation.


0037369                   /* A non-blocking send is not possible, start a regular
0037370                    * send.
0037371                    */
0037372                   assert(r == NW_WOULDBLOCK);
0037373                   ip_port->ip_dl.dl_eth.de_frame= eth_pack;
0037374                   r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size);
This call to eth_write() will return NW_SUSPEND since there is still a packet in the ethernet file descriptor's etp_wr_pack field. The ethernet file descriptor's EPF_MORE2WRITE flag will be set and etp_write_count will be set to pack_size.


0037375                   if (r == NW_SUSPEND)
0037376                   {
0037377                            assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037378                                     IEF_WRITE_SP));
0037379                            ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP;
0037380                   }
0037381                   assert(r == NW_OK || r == NW_SUSPEND);
0037382                   return NW_OK;
0037383          }
0037384 
0037385          /* Enqueue the packet, and store the current time, in the
0037386           * room for the ethernet source address.
0037387           */
0037388          t= get_time();
get_time()

get_time() returns the number of clock ticks since reboot.

Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.


0037389          assert(sizeof(t) <= sizeof(eth_hdr->eh_src));
0037390          memcpy(&eth_hdr->eh_src, &t, sizeof(t));
As the note above (37385-37386) says, the time computed by get_time() is copied to the ethernet header. This value is extracted and analyzed in ipeth_restart_send() on line 37455.

This value represents the time that the packet was enqueued (which happens in the next lines). The packet will be dropped if the ttl (e.g., for udp, the ttl is half a second) expires before the packet is sent off.


0037391 
Lines 37392-37399 place the ethernet packet at the appropriate place in the linked list of ethernet packets waiting to be sent out the ip port.

The queueing of packets waiting to be sent out an ip port is complicated. Click here for a detailed explanation.


0037392          eth_pack->acc_ext_link= NULL;
0037393          if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037394                   ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037395          else
0037396          {
0037397                   ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link= eth_pack;
0037398          }
0037399          ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037400          if (ip_port->ip_dl.dl_eth.de_frame == NULL)
0037401                   ipeth_restart_send(ip_port);
There will be no packet in the de_frame field if and only if the ethernet packet eth_pack was too large to be immediately sent out (see lines 37361-37362).


ipeth_restart_send()


ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.

ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.


0037402          return NW_OK;
0037403 }
0037404 
0037405 PRIVATE void ipeth_restart_send(ip_port)
0037406 ip_port_t *ip_port;
ipeth_restart_send()

ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.

ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.


0037407 {
0037408          time_t now, enq_time;
0037409          int r;
0037410          acc_t *eth_pack, *ip_pack, *next_eth_pack, *next_part, *tail;
0037411          size_t pack_size;
0037412          eth_hdr_t *eth_hdr, *next_eth_hdr;
0037413 
0037414          now= get_time();
get_time()

get_time() returns the number of clock ticks since reboot.

Several of the clients (eth, arp, ip, tcp, and udp) use get_time() to determine an appropriate timeout value for a given operation. For example, the arp code calls get_time() to determine an appropriate amount of time to wait for a response from an arp request before giving up.


0037415 
0037416          while (ip_port->ip_dl.dl_eth.de_q_head != NULL)
Go through the ip port's linked list of ethernet packets that are waiting to be sent out.

The queueing of packets that are waiting to be sent out an ip port is complicated. Click here for a detailed explanation.


0037417          {
0037418                   eth_pack= ip_port->ip_dl.dl_eth.de_q_head;
0037419                   ip_port->ip_dl.dl_eth.de_q_head= eth_pack->acc_ext_link;
0037420 
0037421                   eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.eth_hdr_t

An ethernet header is fairly simple. The eth_hdr_t typedef is declared in server/ip/gen/eth_hdr.h:

typedef struct eth_hdr

{
ether_addr_t eh_dst;
ether_addr_t eh_src;
ether_type_t eh_proto;
} eth_hdr_t;
ether_addr_t eh_dst: The destination ethernet address.

ether_addr_t eh_src: The source ethernet address.

ether_type_t eh_proto: The protocol of the layer above. The three possibilities are:

#define ETH_RARP_PROTO 0x8035
#define ETH_ARP_PROTO 0x806
#define ETH_IP_PROTO 0x800

An ethernet frame also has a CRC (Cyclic Redundancy Check) at its end to enable the receiving system to determine if corruption occured during transit.

An ethernet MAC (physical) address is a 48 bit number. This number is broken down into two halves: 22 of the first 24-bits identify the vendor of the Ethernet board (called the "Organizationally Unique Identifier") and the second 24-bits form a serial number assigned by the vendor. This guarantees that no two Ethernet cards have the same MAC address. One of the remaining bits indicate if the packet is a multicast or broadcast packet and the other is used for vendor-specific applications (e.g., NetBEUI).


+--+--+--+--+--+--+
| destination MAC |
+--+--+--+--+--+--+
| source MAC |
+--+--+--+--+--+--+
|08 00|
+--+--+-----------+
| |
. IP .
. packet .
. .
| |
+--+--+--+--+-----+
| CRC |
+--+--+--+--+




0037422 
0037423                   pack_size= bf_bufsize(eth_pack);
bf_bufsize()

bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0037424 
0037425                   if (pack_size > ip_port->ip_mss+sizeof(*eth_hdr))
If the ethernet packet is larger than the allowable size (i.e., larger than the mss field plus an ethernet header), break the ethernet packet into two packets. This is accomplished by splitting the encapsulated ip packet into two fragments, creating another ethernet header, and then encapsulating the second ip fragment with the newly created ethernet header.


0037426                   {
0037427                            /* Split the IP packet */
0037428                            ip_pack= eth_pack->acc_next;
0037429                            next_part= ip_pack;
0037430                            ip_pack= ip_split_pack(ip_port, &next_part,
0037431                                                        ip_port->ip_mss);
ip_split_pack()

ip_split_pack(ip_port, ref_last, first_size) is called by ipeth_restart_send() to split up an ip packet into fragments if the packet is too large (i.e., the packet is greater than the maximum ethernet packet size without the header (1514-14 bytes)). ip_split_pack() returns a reference to the first fragment and returns a reference to the second fragment in ref_last, ip_split_pack()'s second parameter. ref_last is also used to pass in the packet that is to be split.


0037432                            if (ip_pack == NULL)
If the packet's empty, just return.


0037433                            {
0037434                                     bf_afree(eth_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0037435                                     continue;
0037436                            }
0037437 
Lines 37438-37452 create a new ethernet header, encapsulate the second fragment of the newly split ip packet with the new ethernet header, and place the new ethernet packet in the linked list of the ip port's ethernet packets waiting to be sent out.


0037438                            /* Allocate new ethernet header */
0037439                            next_eth_pack= bf_memreq(sizeof(*next_eth_hdr));
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0037440                            next_eth_hdr= (eth_hdr_t *)ptr2acc_data(next_eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037441                            *next_eth_hdr= *eth_hdr;
0037442                            next_eth_pack->acc_next= next_part;
The ethernet header for the second ip fragment will be the same as the ethernet header for the first ip fragment.


0037443 
Place the newly created ethernet packet in the ip port's linked list of ethernet packets waiting to be sent out.


0037444                            next_eth_pack->acc_ext_link= NULL;
0037445                            if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037446                                     ip_port->ip_dl.dl_eth.de_q_head= next_eth_pack;
0037447                            else
0037448                                     ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037449                                                                 next_eth_pack;
0037450                            ip_port->ip_dl.dl_eth.de_q_tail= next_eth_pack;
0037451 
0037452                            eth_pack->acc_next= ip_pack;
0037453                            pack_size= bf_bufsize(eth_pack);
Calculate the new size of the first ethernet packet (not the ethernet packet just created). This value is used in eth_send() (see line 37477).


bf_bufsize()


bf_bufsize() returns the total buffer size of a linked list of accessors (i.e., the sum of acc_length for the accessors in a linked list).

For a detailed description of the network service's buffer management, click here.


0037454                   }
0037455 
enq_time is the time that the ethernet packet was queued and is placed in the source field of the ethernet header on line 37390. If more than a second has transpired since the ethernet packet was placed in the queue, update the ethernet packet's encapsulated ip packet's ttl and the checksum of its ip header . If the ttl expired, discard the packet.

HZ is #define'd in include/minix/const.h:

#define HZ 60 /* clock freq (software settable on IBM-PC) */


0037456                   memcpy(&enq_time, &eth_hdr->eh_src, sizeof(enq_time));
0037457                   if (enq_time + HZ < now)
0037458                   {
0037459                            r= ipeth_update_ttl(enq_time, now, eth_pack);
ipeth_update_ttl()

ipeth_update_ttl(enq_time, now, eth_pack) adjusts the ttl of the encapsulated ip packet's ip header of ethernet packet eth_pack, the third parameter of ipeth_update_ttl(), and recalculates the checksum of the ip header to reflect this change.

If the ip packet's ttl has already expired, ipeth_update_ttl() returns ETIMEDOUT.


0037460                            if (r == ETIMEDOUT)
Discard the packet and free up the packet's buffers if the ttl of the ethernet packet's encapculated ip packet has expired.

ETIMEDOUT is #define'd in /include/errno.h:

#define ETIMEDOUT (_SIGN 61) /* connection timed out */



0037461                            {       
0037462                                     ip_warning(( "should send ICMP ttl exceded" ));
0037463                                     bf_afree(eth_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0037464                                     continue;
0037465                            }
0037466                            assert(r == NW_OK);
0037467                   }
0037468 
An ethernet packet must be at least ETH_MIN_PACK_SIZE (#define'd as 60 in include/net/gen/ether.h) bytes. If not, add some zeroes onto the end of the packet.


0037469                   if (pack_size<ETH_MIN_PACK_SIZE)
0037470                   {
0037471                            tail= bf_memreq(ETH_MIN_PACK_SIZE-pack_size);
bf_memreq()

After the buffers have been initialized, accessors[] looks like the following:



bf_memreq() allocates accessors to the caller. For example, if 1514 bytes of buffer space are requested immediately after the network process starts and each buffer is 512 bytes (the default), then accessors[] will look like the following:



Note that three elements of accessors[] have been removed from buf512_freelist and that the head of the chain of the 3 accessors is returned by bf_memreq(). Also note that the acc_linkC and buf_linkC fields have been set to one and acc_length and acc_offset have been set to their appropriate values.

So what happens if there are not enough buffers on the buf512_freelist to satisfy a request? On lines 2280-2290 of buf.c, functions that free buffers for the specific clients (e.g., eth_buffree()) are called until there are enough buffers on buf512_freelist.

For a complete description of the network service's buffer management, click here.


0037472                            eth_pack= bf_append(eth_pack, tail);
bf_append()

bf_append() appends one accessor linked list to another accessor linked list. For example, if the payload of an ethernet packet (1500 bytes) is appended to an ethernet header (14 bytes):



the resulting linked list is as follows:






0037473                   }
0037474 
0037475                   assert(ip_port->ip_dl.dl_eth.de_frame == NULL);
0037476 
0037477                   r= eth_send(ip_port->ip_dl.dl_eth.de_fd, eth_pack, pack_size);
eth_send()

eth_send() does a couple of checks and sets some of the fields of the ethernet header before passing the packet off to ev_enqueue() (if the packet is destined for the local loopback) or eth_write_port() (if it is not).


0037478                   if (r == NW_OK)
0037479                            continue;
0037480 
0037481                   /* A non-blocking send is not possible, start a regular
0037482                    * send.
0037483                    */
The ethernet task is already trying to send out a packet. Place the packet in the de_frame field of the ip port.

The queueing of packets that are waiting to be sent out an ip port is complicated. Click here for a detailed explanation.


0037484                   assert(r == NW_WOULDBLOCK);
0037485                   ip_port->ip_dl.dl_eth.de_frame= eth_pack;
0037486                   r= eth_write(ip_port->ip_dl.dl_eth.de_fd, pack_size);
eth_write()

If a few tests (e.g., a test to determine if the ethernet packet is either too large or too small) have positive results and the ethernet task is not attempting to send an ethernet packet (i.e., etp_wr_pack is null) and the packet is coming from the ip code, eth_write(fd, count) passes the ethernet packet stored in the dl_eth.de_frame field of the ip port associated with the ethernet file descriptor fd, eth_write()'s first parameter, to eth_send().

If the packet is coming from the arp code (i.e., an arp-request or an arp-reply is being sent out), eth_write() calls arp_getdata() to create the ethernet packet before passing the newly created packet off to eth_send().

If the ethernet task is attempting to send an ethernet packet, eth_write() sets the ethernet port's EPF_MORE2WRITE flag and returns NW_SUSPEND.


0037487                   if (r == NW_SUSPEND)
As described above, eth_write() returns NW_SUSPEND if the ethernet task is still delivering a previous packet.

The IEF_WRITE_SP flag is eventually cleared by eth_get_data() on line 37200. ipeth_restart_send() is then called to attempt to send out the packets waiting to be sent out.


0037488                   {
0037489                            assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037490                                     IEF_WRITE_SP));
0037491                            ip_port->ip_dl.dl_eth.de_flags |= IEF_WRITE_SP;
0037492                            return;
0037493                   }
0037494                   assert(r == NW_OK);
0037495          }
0037496 }
0037497 
0037498 
0037499 PRIVATE void ipeth_arp_reply(ip_port_nr, ipaddr, eth_addr)
0037500 int ip_port_nr;
0037501 ipaddr_t ipaddr;
0037502 ether_addr_t *eth_addr;
ipeth_arp_reply()

ipeth_arp_reply() is called (indirectly) by client_reply() under one of the following circumstances:

1) An arp-reply packet has been received in response to a previous arp-request packet that this system sent out.

2) An arp-request packet has timed out. In this case, eth_addr, ipeth_arp_reply()'s third parameter, will be NULL.

3) An arp-request packet has been received that contains the information requested by a previous arp-request packet that this system sent out.

ipeth_arp_reply() searches the queue of ethernet packets waiting for arp resolution for the ip address of the arp-request/arp-reply. If the arp resolution timed out, the packet is discarded. If the arp resolution was successful, the ethernet packet is moved to the queue of packets waiting to be sent out and ipeth_restart_send() is called to send out the packets.


0037503 {
0037504          acc_t *prev, *eth_pack;
0037505          int r;
0037506          xmit_hdr_t *xmit_hdr;
0037507          ip_port_t *ip_port;
0037508          time_t t;
0037509          eth_hdr_t *eth_hdr;
0037510          ether_addr_t tmp_eth_addr;
0037511 
0037512          assert (ip_port_nr >= 0 && ip_port_nr < ip_conf_nr);
0037513          ip_port= &ip_port_table[ip_port_nr];
0037514 
0037515          for (;;)
0037516          {
0037517                   for (prev= 0, eth_pack= ip_port->ip_dl.dl_eth.de_arp_head;
0037518                            eth_pack;
0037519                            prev= eth_pack, eth_pack= eth_pack->acc_ext_link)
ipaddr, ipeth_arp_reply()'s second parameter, is the ip address whose corresponding ethernet address has been resolved or is the target ip address of an arp-request that has just timed out. Find all of the ethernet packets in the de_arp_head/de_arp_tail queue whose destination is this ip address and place the ethernet packet in the queue of outgoing ethernet packets (i.e., the de_q_head/de_q_tail queue) if the arp-request did not time out and discard the packet if the arp-request did time out.


xmit_hdr_t


If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037520                   {
0037521                            xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037522                            if (xmit_hdr->xh_ipaddr == ipaddr)
0037523                                     break;
0037524                   }
0037525 
0037526                   if (eth_pack == NULL)
0037527                   {
0037528                            /* No packet found. */
0037529                            break;
0037530                   }
0037531 
0037532                   /* Delete packet from the queue. */
The arp-request has been either answered (perhaps through an arp-request received from another system) or has timed out. Remove the ethernet packet from the de_arp_head/de_arp_tail queue.


0037533                   if (prev == NULL)
0037534                   {
0037535                            ip_port->ip_dl.dl_eth.de_arp_head=
0037536                                     eth_pack->acc_ext_link;
0037537                   }
0037538                   else
0037539                   {
0037540                            prev->acc_ext_link= eth_pack->acc_ext_link;
0037541                            if (prev->acc_ext_link == NULL)
0037542                                     ip_port->ip_dl.dl_eth.de_arp_tail= prev;
0037543                   }
0037544 
0037545                   if (eth_addr == NULL)
eth_addr, ipeth_arp_reply()'s third parameter, will be NULL if an arp-request packet has timed out.


0037546                   {
0037547                            /* Destination is unreachable, delete packet. */
0037548                            bf_afree(eth_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0037549                            continue;
0037550                   }
0037551 
0037552                   /* Fill in the ethernet address and put the packet on the
0037553                    * transmit queue.
0037554                    */
The ethernet packet was removed from the de_arp_head/de_arp_tail queue. Now that the destination ethernet address is known, place the ethernet packet on the queue of packets waiting to be sent out (i.e., the de_q_head/de_q_tail queue).


0037555                   t= xmit_hdr->xh_time;
0037556                   eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037557                   eth_hdr->eh_dst= *eth_addr;
The destination ethernet address was acquired by either an arp-reply (the usual case) or an arp-request.


0037558                   memcpy(&eth_hdr->eh_src, &t, sizeof(t));
Before the packet is actually sent out (and the source ethernet address is placed in eh_src), the eh_src field is used to ensure that the packet did not time out.


0037559 
0037560                   eth_pack->acc_ext_link= NULL;
0037561                   if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037562                            ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037563                   else
0037564                   {
0037565                            ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037566                                     eth_pack;
0037567                   }
0037568                   ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037569          }
0037570 
0037571          /* Try to get some more ARPs in progress. */
As unlikely as it is, this while loop attempts to find the corresponding ethernet addresses for ip addresses by calling arp_ip_eth(). The loop is exited when arp_ip_eth() returns NW_SUSPEND, which will likely be the first time. The only scenario where arp_ip_eth() returns anything other than NW_SUSPEND is when a matching entry is found in the arp table.


0037572          while (ip_port->ip_dl.dl_eth.de_arp_head)
0037573          {
0037574                   eth_pack= ip_port->ip_dl.dl_eth.de_arp_head;
0037575                   xmit_hdr= (xmit_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.xmit_hdr_t

If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037576                   r= arp_ip_eth(ip_port->ip_dl.dl_eth.de_port,
0037577                            xmit_hdr->xh_ipaddr, &tmp_eth_addr);
arp_ip_eth()

arp_ip_eth(eth_port, ipaddr, ethaddr) looks for an entry in the arp table that matches ipaddr, the second parameter, and if it finds it, returns the corresponding ethernet address in ethaddr, the third parameter. If arp_ip_eth() does not find a valid entry in the arp table for the ip address, it sends out an arp broadcast in an attempt to find the ethernet address for the ip address and returns NW_SUSPEND.


0037578                   if (r == NW_SUSPEND)
0037579                            break;                            /* Normal case */
0037580 
0037581                   /* Dequeue the packet */
0037582                   ip_port->ip_dl.dl_eth.de_arp_head= eth_pack->acc_ext_link;
If the code reaches this point, either the destination was declared unreachable or an ethernet address for the ip address has been found. Adjust the de_arp_head queue accordingly.


0037583 
0037584                   if (r == EDSTNOTRCH)
0037585                   {
0037586                            bf_afree(eth_pack);
bf_afree()

After a chain of accessors is no longer needed, the chain (and not simply the single accessor passed as the parameter) can be freed by calling bf_free(). However, if either acc_linkC or buf_linkC of one of the accessors in the linked list is not equal to one (1), the entire chain will not be freed. For example, if buf_afree(acc1) is called for the following chain:



Then the resulting chain will be:



bf_afree() returns acc1 (accessors[63]) to acc_freelist (recall that acc_freelist is the linked list of acc_t's without an associated buffer). However, buffers512[127] cannot be freed because acc2 (accessors[64]) still references it.

bf_afree() is called after an accessor's associated data is no longer needed (for example, after a packet has been sent off by the ethernet driver).


0037587                            continue;
0037588                   }
0037589                   assert(r == NW_OK);
0037590 
0037591                   /* Fill in the ethernet address and put the packet on the
0037592                    * transmit queue.
0037593                    */
As was done above (lines 37555-38569), place the ethernet packet on the queue of packets waiting to be sent out (i.e., the de_q_head/de_q_tail queue).


0037594                   t= xmit_hdr->xh_time;
xmit_hdr_t

If the arp table (i.e., arp cache) does not contain an entry for a given ip address, arp_ip_eth() returns NW_SUSPEND and the outgoing packet is placed in the ip port's de_arp_head/de_arp_tail queue. Before being placed in this queue, the packet is encapsulated in a "xmit" header (as opposed to an ethernet header).

xmit_hdr is declared in generic/ip_eth.c:

typedef struct xmit_hdr

{
time_t xh_time;
ipaddr_t xh_ipaddr;
} xmit_hdr_t;
where xh_time is the time at which the packet is placed in the de_arp_head/de_arp_tail queue and xh_ipaddr is the destination ip address of the packet.



0037595                   eth_hdr= (eth_hdr_t *)ptr2acc_data(eth_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037596                   eth_hdr->eh_dst= tmp_eth_addr;
0037597                   memcpy(&eth_hdr->eh_src, &t, sizeof(t));
0037598 
0037599                   eth_pack->acc_ext_link= NULL;
0037600                   if (ip_port->ip_dl.dl_eth.de_q_head == NULL)
0037601                            ip_port->ip_dl.dl_eth.de_q_head= eth_pack;
0037602                   else
0037603                   {
0037604                            ip_port->ip_dl.dl_eth.de_q_tail->acc_ext_link=
0037605                                     eth_pack;
0037606                   }
0037607                   ip_port->ip_dl.dl_eth.de_q_tail= eth_pack;
0037608          }
0037609 
0037610          /* Restart sending ethernet packets. */
0037611          if (ip_port->ip_dl.dl_eth.de_frame == NULL)
0037612                   ipeth_restart_send(ip_port);
ipeth_restart_send()

ipeth_restart_send() attempts to send out the packets in an ip port's linked list of ethernet packets waiting to be sent out. If the ethernet packets are too large, ipeth_restart_send() calls ip_split_pack() to split the ethernet packet's encapsulated ip packet into two fragments.

ipeth_restart_send() is called in a number of places within ip_eth.c. For example, ipeth_restart_send() is called if an arp-reply is received for a previous arp-request sent out by the system. Since the destination ethernet address for an ethernet packet is now known, an attempt to send out the packet can be made.


0037613 }
0037614 
0037615 PRIVATE int ipeth_update_ttl(enq_time, now, eth_pack)
0037616 time_t enq_time;
0037617 time_t now;
0037618 acc_t *eth_pack;
ipeth_update_ttl()

ipeth_update_ttl(enq_time, now, eth_pack) adjusts the ttl of the encapsulated ip packet's ip header of ethernet packet eth_pack, the third parameter of ipeth_update_ttl(), and recalculates the checksum of the ip header to reflect this change.

If the ip packet's ttl has already expired, ipeth_update_ttl() returns ETIMEDOUT.


0037619 {
0037620          int ttl_diff;
0037621          ip_hdr_t *ip_hdr;
0037622          u32_t sum;
0037623          u16_t word;
0037624          acc_t *ip_pack;
0037625 
0037626          ttl_diff= (now-enq_time)/HZ;
Calculate the difference in time (in seconds).


0037627          enq_time += ttl_diff*HZ;
0037628          assert(enq_time <= now && enq_time + HZ > now);
0037629 
Lines 37630-37635 extract the encapsulated ip packet's ip header so that the header's fields can be analyzed and altered.


0037630          ip_pack= eth_pack->acc_next;
0037631          assert(ip_pack->acc_length >= sizeof(*ip_hdr));
0037632          assert(ip_pack->acc_linkC == 1 &&
0037633                   ip_pack->acc_buffer->buf_linkC == 1);
0037634 
0037635          ip_hdr= (ip_hdr_t *)ptr2acc_data(ip_pack);
ptr2acc_data()

The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037636          if (ip_hdr->ih_ttl <= ttl_diff)
0037637                   return ETIMEDOUT;
Determine if the ip header's ttl has expired while in the queue.

The ttl is a somewhat tricky field. Let's say that a udp packet is being sent out of an ethernet interface and that this udp packet (which was, obviously, encapsulated by an ip header and an ethernet header) was enqueued for 5 seconds before it was ready to be sent out. The default ttl for udp packets is 30 (UDP_TTL); therefore, this packet will now have a ttl of 25. After the packet is sent out of the ethernet interface, each router that encounters the packet decrements the ttl field. If the packet has traversed 25 routers and still hasn't reached its destination, this 25th router will drop the packet.

As the example above shows, the ttl field can be both an upper-limit on time and an upper-limit on hop-count. The ttl field in this instance is an upper-limit on time.

Note that the meaning of the ttl field (according to various RFC's) has changed. Initially, the ttl field was an upper-limit on the time spent in transit. The meaning of the ttl field was later changed by the IETF developers to be the upper-limit on the hops needed to reach the destination. In the Minix network service code, the ttl can have both meanings, which adds to the confusion. For example, if the network service receives a packet on one interface and then has to route the packet out another interface, the ttl field is decremented (in other words, the ttl is regarded as a hop-count).


0037638          sum= (u16_t)~ip_hdr->ih_hdr_chk;
0037639          word= *(u16_t *)&ip_hdr->ih_ttl;
0037640          if (word > sum)
0037641                   sum += 0xffff - word;
0037642          else
0037643                   sum -= word;
0037644          ip_hdr->ih_ttl -= ttl_diff;
0037645          word= *(u16_t *)&ip_hdr->ih_ttl;
0037646          sum += word;
0037647          if (sum > 0xffff)
0037648                   sum -= 0xffff;
0037649          assert(!(sum & 0xffff0000));
0037650          ip_hdr->ih_hdr_chk= ~sum;
0037651          assert(ip_hdr->ih_ttl > 0);
0037652          return NW_OK;
0037653 }
0037654 
0037655 PRIVATE void do_eth_read(ip_port)
do_eth_read()

do_eth_read(ip_port) repeatedly calls eth_read() until all of the ethernet packets in the read queue of the ethernet file descriptor associated with the ip port ip_port, do_eth_read()'s only parameter, have been passed up to the ip layer.


0037656 ip_port_t *ip_port;
0037657 {
0037658          int result;
0037659 
0037660          assert(!(ip_port->ip_dl.dl_eth.de_flags & IEF_READ_IP));
0037661 
0037662          for (;;)
0037663          {
0037664                   ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_IP;
0037665 
0037666                   result= eth_read (ip_port->ip_dl.dl_eth.de_fd,
0037667                            ETH_MAX_PACK_SIZE);
eth_read()

eth_read() attempts to deliver all of the ethernet packets in an ethernet file descriptor's read queue to its associated ip port or arp port or sr file descriptor and returns NW_SUSPEND when there are no more ethernet packets to deliver.


0037668                   if (result == NW_SUSPEND)
eth_read() returns NW_SUSPEND if there isn't a packet waiting or the packet has expired. The IEF_READ_SP flag is eventually cleared by put_eth_data() (see lines 37248-37249).


0037669                   {
0037670                            assert(!(ip_port->ip_dl.dl_eth.de_flags &
0037671                                                               IEF_READ_SP));
0037672                            ip_port->ip_dl.dl_eth.de_flags |= IEF_READ_SP;
0037673                            return;
0037674                   }
If this point in the code is reached, eth_read() successfully processed the packet. Clear the IEF_READ_IP flag.


0037675                   ip_port->ip_dl.dl_eth.de_flags &= ~IEF_READ_IP;
0037676                   if (result<0)
eth_read() either returns NW_OK (if a packet was successfully processed or there was a problem with the packet) or NW_SUSPEND (if there was no packet waiting). Since NW_SUSPEND was handled above, at this point in the code, the variable result will never be negative.


0037677                   {
0037678                            return;
0037679                   }
0037680          }
0037681 }
0037682 
0037683 PRIVATE void ip_eth_arrived(port, pack, pack_size)
0037684 int port;
0037685 acc_t *pack;
0037686 size_t pack_size;
ip_eth_arrived()

ip_eth_arrived() is called by the ethernet code (e.g., packet2user()) to hand off a packet to the ip code. ip_eth_arrived() strips off the ethernet header before handing the packet off to ip_arrived() (if the packet is not an ethernet broadcast packet) or ip_arrived_broadcast() (if it is).udp read path

eth_arrive() 

ip_eth_arrived()

if (unicast packet)
ip_arrived()
else if (ethernet broadcast packet)
ip_arrived_broadcast()

if (packet must be input routed)
hand off packet to destination ip port
else
ip_port_arrive() {
packet2user()
udp_ip_arrived()
}



0037687 {
0037688          int broadcast;
0037689          ip_port_t *ip_port;
0037690 
0037691          ip_port= &ip_port_table[port];
Find the ip port whose index within ip_port_table[] is port, ip_eth_arrived()'s first parameter.


0037692          broadcast= (*(u8_t *)ptr2acc_data(pack) & 1);
The ethernet broadcast address is ff:ff:ff:ff:ff:ff. The broadcast address is a special case of a multicast address, which has the low-order bit of the high-order byte set (i.e., 01:00:00:00:00:00).


ptr2acc_data()


The macro ptr2acc_data is #define'd in inet/generic/buf.h as:

#define ptr2acc_data(/* acc_t * */ a) (bf_temporary_acc=(a), \
(&bf_temporary_acc->acc_buffer->buf_data_p[bf_temporary_acc-> \
acc_offset]))

ptr2acc_data() simply returns a pointer to the actual data within an accessor.

ptr2acc_data() is usually called so that the fields of a header (e.g., ip header) can be analyzed.


0037693 
0037694          pack= bf_delhead(pack, ETH_HDR_SIZE);
Strip off the ethernet header before passing the packet off to ip_arrived() or ip_arrived_broadcast().


bf_delhead()


If only the beginning of a linked list can be freed, bf_delhead() is called. If acc_linkC and buf_linkC are one for all of the relevant accessors and their associated buffers in the linked list, the operation is straight-forward:



bf_delhead() is often called to remove the header (e.g., ip header) from a packet.

For a detailed description of the network service's buffer management, click here.


0037695 
0037696          if (broadcast)
0037697                   ip_arrived_broadcast(ip_port, pack);
ip_arrived_broadcast()

If a packet arrives on an ethernet interface, ip_arrived_broadcast() is called from ip_eth_arrived() (instead of ip_arrived()) if the arriving ethernet packet has the broadcast ethernet address (i.e., ff:ff:ff:ff:ff:ff). ip_arrived_broadcast() performs some checks that include verifying that the destination ip address (in addition to the destination ethernet address) is the broadcast address.


0037698          else
0037699                   ip_arrived(ip_port, pack);
ip_arrived()

Depending on the destination ip address of its second parameter,
ip_arrived(ip_port, pack) does one of several things:

1) If the destination ip address is the ip address of the ip port associated with the ethernet port, ip_arrived() calls ip_port_arrive() for the packet.

2) If the destination ip address is the ip address of another ip port, ip_arrived() also calls ip_port_arrived(). This time, however, the first argument passed to ip_port_arrived() is the other port. Note that for this to take place, an input route to the other ip port must exist.

3) If the destination ip address is not the address of another ip port but it is in the same network as another ip port, ip_arrived() sends the packet out the other interface. Again, an input route to the other ip port for this destination must exist.

4) If the destination ip address is not the address of another ip port and it is not in the same network as another ip port, ip_arrived() sends the packet out to the gateway for this network. Again, an input route (including the gateway) to the other ip port for this destination must exist.

5) If the destination ip address is not the ip address of the ip port but an input route for the destination exists and is associated with the same ip port as the packet arrived, an icmp redirect message is sent to the source (provided the source is on the same network) and the packet is then sent. If the source of the ip packet is not on the same network as the ip port, the packet is dropped.

If an ip packet arrives on an ethernet interface, ip_eth_arrived() strips off a packet's ethernet header before handing the packet off to ip_arrived().



0037700 }
0037701 
0037702 /*
0037703  * $PchId: ip_eth.c,v 1.9 1996/12/17 07:55:21 philip Exp $
0037704  */