share/doc/papers/pulldown/2.t

   1 .\"     $Id: 2.t,v 1.1 2001/07/04 05:29:25 itojun Exp $
   2 .\"
   3 .\".ds RH KAME approach
   4 .NH 1
   5 KAME approach
   6 .PP
   7 This section describes the approaches we at the KAME project
   8 took against the problems mentioned in the previous section.
   9 We introduce a new function called
  10 .I m_pulldown,
  11 in place of
  12 .I m_pullup,
  13 for adjusting payload data in the mbuf.
  14 We also change the calling sequence for the protocol input function.
  15 .NH 2
  16 What is the KAME project?
  17 .PP
  18 In the early days of IPv6/IPsec development,
  19 the Japanese research community felt it very important to make
  20 a reference code available in a freely-redistributable form
  21 for educational, research and deployment purposes.
  22 The KAME project is a consortium of 7 Japanese companies and
  23 an academic research group.
  24 The project aims to deliver IPv6/IPsec reference implementation
  25 for 4.4BSD, under BSD license.
  26 The KAME project intends to deliver the most
  27 spec-conformant IPv6/IPsec implementation possible.
  28 .NH 2
  29 m_pulldown function
  30 .PP
  31 Here we introduce a new function,
  32 .I m_pulldown,
  33 to address the 3 problems with
  34 .I m_pullup
  35 that we have described above.
  36 The actual source code is included at the end of this paper.
  37 The function prototype is as follows:
  38 .DS
  39 .SM
  40 \f[CR]struct mbuf *
  41 m_pulldown(m, off, len, offp)
  42         struct mbuf *m;
  43         int off, len;
  44         int *offp;\fP
  45 .NL
  46 .DE
  47 .I m_pulldown
  48 will ensure that the data region in the mbuf chain,
  49 starting at
  50 .I off
  51 and ending at
  52 .I "off + len",
  53 is put into a continuous memory region.
  54 .I len
  55 must be smaller than, or equal to, MCLBYTES (2048 bytes).
  56 The function returns a pointer to an intermediate mbuf in the chain
  57 (we refer to the pointer as \fIn\fP), and puts the new offset in
  58 .I n
  59 to
  60 .I *offp.
  61 If
  62 .I offp
  63 is NULL, the resulting region can be located by
  64 .I "mtod(n, caddr_t)";
  65 if
  66 .I offp
  67 is non-null, it will be located at
  68 .I "mtod(n, caddr_t) + *offp".
  69 The mbuf prior to
  70 .I off
  71 will remain untouched,
  72 so it is safe to keep the pointers to the mbuf chain.
  73 For example, consider the mbuf chain
  74 .nr figure +1
  75 on Figure \n[figure]
  76 .nr figure -1
  77 as the input.
  78 .KF
  79 .PS
  80 define pointer { box ht boxht*1/4 }
  81 define payload { box }
  82 IP: [
  83         IPp: pointer
  84         IPd: payload with .n at bottom of IPp "mbuf1" "50 bytes"
  85 ]
  86 move
  87 TCP: [
  88         TCPp: pointer
  89         TCPd: payload with .n at bottom of TCPp "mbuf2" "20 bytes"
  90 ]
  91 arrow from IP.IPp.center to TCP.TCPp.center
  92 .PE
  93 .ce
  94 .nr figure +1
  95 Figure \n[figure]: mbuf chain before the call to \fIm_pulldown\fP
  96 .KE
  97 If we call
  98 .I m_pulldown
  99 with
 100 .I "off = 40",
 101 .I "len = 10",
 102 and a non-null
 103 .I offp,
 104 the mbuf chain will remain unchanged.
 105 The return value will be a pointer to mbuf1, and
 106 .I *offp
 107 will be
 108 filled with 40.
 109 If we call
 110 .I m_pulldown
 111 with
 112 .I "off = 40",
 113 .I "len = 20",
 114 and null
 115 .I offp,
 116 then the mbuf chain will be modified as shown
 117 .nr figure +1
 118 in Figure \n[figure],
 119 .nr figure -1
 120 by allocating a new mbuf, mbuf3,
 121 into the middle and moving data from both mbuf1 and mbuf2.
 122 The function returns a pointer to mbuf3.
 123 .KF
 124 .PS
 125 define pointer { box ht boxht*1/4 }
 126 define payload { box }
 127 IP: [
 128         IPp: pointer
 129         IPd: payload with .n at bottom of IPp "mbuf1" "40 bytes"
 130 ]
 131 move 0.2;
 132 INT: [
 133         INTp: pointer
 134         INTd: payload with .n at bottom of INTp "mbuf3" "20 bytes"
 135 ]
 136 move 0.2;
 137 TCP: [
 138         TCPp: pointer
 139         TCPd: payload with .n at bottom of TCPp "mbuf2'" "10 bytes"
 140 ]
 141 arrow from IP.IPp.center to INT.INTp.center
 142 arrow from INT.INTp.center to TCP.TCPp.center
 143 .PE
 144 .ce
 145 .nr figure +1
 146 Figure \n[figure]: mbuf chain after call to \fIm_pulldown\fP, with \fIoff = 40\fP and \fIlen = 20\fP
 147 .KE
 148 The
 149 .I m_pulldown
 150 function solves all 3 problems in
 151 .I m_pullup
 152 that were described in the previous section.
 153 .I m_pulldown
 154 does not copy mbufs when copying is not necessary.
 155 Since it does not modify the mbuf chain prior to the speficied offset
 156 .I off,
 157 it is not necessary for the caller to re-initialize the pointers into the mbuf data
 158 region.
 159 With
 160 .I m_pullup,
 161 we always needed to specify the data payload length, starting from the very first byte
 162 in the packet.
 163 With
 164 .I m_pulldown,
 165 we pass
 166 .I off
 167 as the offset to the data payload we are interested in.
 168 This change avoids extra data manipulation when we are only interested in
 169 the intermediate data portion of the packet.
 170 It also eases the assumption regarding total packet header length.
 171 While
 172 .I m_pullup
 173 assumes that the total packet header length is smaller than or equal to MHLEN
 174 (100 bytes),
 175 .I m_pulldown
 176 assumes that single packet header length is smaller than or equal to MCLBYTES
 177 (2048 bytes).
 178 With mbuf framework this is the best we
 179 can do, since there is no way to hold continuous region longer than
 180 MCLBYTES in a standard mbuf chain.
 181 .NH 2
 182 New function prototype for inbound packet processing
 183 .PP
 184 For IPv6 processing, our code does not make a deep function call chain.
 185 Rather, we make a loop in the very last part of
 186 .I ip6_input,
 187 as shown in Figure 8.
 188 IPPROTO_DONE is a pseudo-protocol type value that identifies the end of the
 189 extension header chain.
 190 If more protocol headers exist,
 191 each header processing code will update the pointer variables
 192 and return the next extension header type.
 193 If the final header in the chain has been reached,
 194 IPPROTO_DONE is returned.
 195 .\" figure 8
 196 .nr figure +1
 197 With this code, we no longer have a deep call chain for IPv6/IPsec processing.
 198 Rather,
 199 .I ip6_input
 200 will make calls to each extension header processor
 201 directly.
 202 This avoids the possibility of overflowing the kernel stack due to multiple
 203 extension header processing.
 204 .KF
 205 .PS
 206 A: ellipse "\fIip6_input\fP"
 207 right
 208 move
 209 move
 210 up
 211 move
 212 B: ellipse "\fIrthdr6_input\fP"
 213 move to last ellipse .s
 214 down
 215 C: ellipse "\fIah_input\fP"
 216 D: ellipse "\fIesp_input\fP"
 217 E: ellipse "\fItcp_input\fP"
 218
 219 arrow from 1/4 <A.e, A.ne> to 1/4 <B.w, B.nw>
 220 arrow from 1/4 <B.w, B.sw> to 1/4 <A.e, A.se>
 221
 222 arrow from 1/4 <A.e, A.ne> to 1/4 <C.w, C.nw>
 223 arrow from 1/4 <C.w, C.sw> to 1/4 <A.e, A.se>
 224
 225 arrow from 1/4 <A.e, A.ne> to 1/4 <D.w, D.nw>
 226 arrow from 1/4 <D.w, D.sw> to 1/4 <A.e, A.se>
 227
 228 arrow from 3/8 <A.e, A.ne> to 1/4 <E.w, E.nw>
 229 arrow from 3/8 <E.w, E.sw> to 1/4 <A.e, A.se>
 230 .PE
 231 .ce
 232 .nr figure +1
 233 Figure \n[figure]: KAME avoids function call chain by making a loop in \fIip6_input\fP
 234 .KE
 235 .PP
 236 Regardless of the calling sequence imposed by the
 237 .I pr_input
 238 function prototype, it is important not to use up the kernel
 239 stack region in protocol handlers.
 240 Sometimes it is necessary to decrease the size of kernel stack usage
 241 by using pointer variables and dynamically allocated regions.
 242 .1C
 243 .KF
 244 .DS
 245 .ps 8
 246 .vs 9
 247 \f[CR]struct ip6protosw {
 248         int (*pr_input) __P((struct mbuf **, int *, int));
 249         /* and other members */
 250 };
 251
 252 ip6_input(m)
 253         struct mbuf *m;
 254 {
 255         /* in the very last part */
 256         extern struct ip6protosw inet6sw[];
 257         /* the first one in extension header chain */
 258         nxt = ip6.ip6_nxt;
 259         while (nxt != IPPROTO_DONE)
 260                 nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 261 }
 262
 263 /* in each header processing code */
 264 int
 265 foohdr_input(mp, offp, proto)
 266         struct mbuf **mp;
 267         int *offp;
 268         int proto;
 269 {
 270         /* some processing, may modify mbuf chain */
 271
 272         if (we have more header to go) {
 273                 *mp = newm;
 274                 *offp = nxtoff;
 275                 return nxt;
 276         } else {
 277                 m_freem(newm);
 278                 return IPPROTO_DONE;
 279         }
 280 }\fP
 281 .DE
 282 .NL
 283 .ce
 284 Figure 8: KAME IPv6 header chain processing code.
 285 .KE
 286 .if t .2C