10 Transport Layer Security (TLS) is a Upper Layer Protocol (ULP) that runs over
11 TCP. TLS provides end-to-end data integrity and confidentiality.
16 Creating a TLS connection
17 -------------------------
19 First create a new TCP socket and set the TLS ULP.
23 sock = socket(AF_INET, SOCK_STREAM, 0);
24 setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls"));
26 Setting the TLS ULP allows us to set/get TLS socket options. Currently
27 only the symmetric encryption is handled in the kernel. After the TLS
28 handshake is complete, we have all the parameters required to move the
29 data-path to the kernel. There is a separate socket option for moving
30 the transmit and the receive into the kernel.
34 /* From linux/tls.h */
35 struct tls_crypto_info {
36 unsigned short version;
37 unsigned short cipher_type;
40 struct tls12_crypto_info_aes_gcm_128 {
41 struct tls_crypto_info info;
42 unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE];
43 unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
44 unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE];
45 unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE];
49 struct tls12_crypto_info_aes_gcm_128 crypto_info;
51 crypto_info.info.version = TLS_1_2_VERSION;
52 crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128;
53 memcpy(crypto_info.iv, iv_write, TLS_CIPHER_AES_GCM_128_IV_SIZE);
54 memcpy(crypto_info.rec_seq, seq_number_write,
55 TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
56 memcpy(crypto_info.key, cipher_key_write, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
57 memcpy(crypto_info.salt, implicit_iv_write, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
59 setsockopt(sock, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
61 Transmit and receive are set separately, but the setup is the same, using either
64 Sending TLS application data
65 ----------------------------
67 After setting the TLS_TX socket option all application data sent over this
68 socket is encrypted using TLS and the parameters provided in the socket option.
69 For example, we can send an encrypted hello world record as follows:
73 const char *msg = "hello world\n";
74 send(sock, msg, strlen(msg));
76 send() data is directly encrypted from the userspace buffer provided
77 to the encrypted kernel send buffer if possible.
79 The sendfile system call will send the file's data over TLS records of maximum
84 file = open(filename, O_RDONLY);
86 sendfile(sock, file, &offset, stat.st_size);
88 TLS records are created and sent after each send() call, unless
89 MSG_MORE is passed. MSG_MORE will delay creation of a record until
90 MSG_MORE is not passed, or the maximum record size is reached.
92 The kernel will need to allocate a buffer for the encrypted data.
93 This buffer is allocated at the time send() is called, such that
94 either the entire send() call will return -ENOMEM (or block waiting
95 for memory), or the encryption will always succeed. If send() returns
96 -ENOMEM and some data was left on the socket buffer from a previous
97 call using MSG_MORE, the MSG_MORE data is left on the socket buffer.
99 Receiving TLS application data
100 ------------------------------
102 After setting the TLS_RX socket option, all recv family socket calls
103 are decrypted using TLS parameters provided. A full TLS record must
104 be received before decryption can happen.
109 recv(sock, buffer, 16384);
111 Received data is decrypted directly in to the user buffer if it is
112 large enough, and no additional allocations occur. If the userspace
113 buffer is too small, data is decrypted in the kernel and copied to
116 ``EINVAL`` is returned if the TLS version in the received message does not
117 match the version passed in setsockopt.
119 ``EMSGSIZE`` is returned if the received message is too big.
121 ``EBADMSG`` is returned if decryption failed for any other reason.
123 Send TLS control messages
124 -------------------------
126 Other than application data, TLS has control messages such as alert
127 messages (record type 21) and handshake messages (record type 22), etc.
128 These messages can be sent over the socket by providing the TLS record type
129 via a CMSG. For example the following function sends @data of @length bytes
130 using a record of type @record_type.
134 /* send TLS control message using record_type */
135 static int klts_send_ctrl_message(int sock, unsigned char record_type,
136 void *data, size_t length)
138 struct msghdr msg = {0};
139 int cmsg_len = sizeof(record_type);
140 struct cmsghdr *cmsg;
141 char buf[CMSG_SPACE(cmsg_len)];
142 struct iovec msg_iov; /* Vector of data to send/receive into. */
144 msg.msg_control = buf;
145 msg.msg_controllen = sizeof(buf);
146 cmsg = CMSG_FIRSTHDR(&msg);
147 cmsg->cmsg_level = SOL_TLS;
148 cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
149 cmsg->cmsg_len = CMSG_LEN(cmsg_len);
150 *CMSG_DATA(cmsg) = record_type;
151 msg.msg_controllen = cmsg->cmsg_len;
153 msg_iov.iov_base = data;
154 msg_iov.iov_len = length;
155 msg.msg_iov = &msg_iov;
158 return sendmsg(sock, &msg, 0);
161 Control message data should be provided unencrypted, and will be
162 encrypted by the kernel.
164 Receiving TLS control messages
165 ------------------------------
167 TLS control messages are passed in the userspace buffer, with message
168 type passed via cmsg. If no cmsg buffer is provided, an error is
169 returned if a control message is received. Data messages may be
170 received without a cmsg buffer set.
175 char cmsg[CMSG_SPACE(sizeof(unsigned char))];
176 struct msghdr msg = {0};
177 msg.msg_control = cmsg;
178 msg.msg_controllen = sizeof(cmsg);
180 struct iovec msg_iov;
181 msg_iov.iov_base = buffer;
182 msg_iov.iov_len = 16384;
184 msg.msg_iov = &msg_iov;
187 int ret = recvmsg(sock, &msg, 0 /* flags */);
189 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
190 if (cmsg->cmsg_level == SOL_TLS &&
191 cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
192 int record_type = *((unsigned char *)CMSG_DATA(cmsg));
193 // Do something with record_type, and control message data in
196 // Note that record_type may be == to application data (23).
198 // Buffer contains application data.
201 recv will never return data from mixed types of TLS records.
203 Integrating in to userspace TLS library
204 ---------------------------------------
206 At a high level, the kernel TLS ULP is a replacement for the record
207 layer of a userspace TLS library.
209 A patchset to OpenSSL to use ktls as the record layer is
210 `here <https://github.com/Mellanox/openssl/commits/tls_rx2>`_.
212 `An example <https://github.com/ktls/af_ktls-tool/commits/RX>`_
213 of calling send directly after a handshake using gnutls.
214 Since it doesn't implement a full record layer, control
215 messages are not supported.
220 TLS implementation exposes the following per-namespace statistics
221 (``/proc/net/tls_stat``):
223 - ``TlsCurrTxSw``, ``TlsCurrRxSw`` -
224 number of TX and RX sessions currently installed where host handles
227 - ``TlsCurrTxDevice``, ``TlsCurrRxDevice`` -
228 number of TX and RX sessions currently installed where NIC handles
231 - ``TlsTxSw``, ``TlsRxSw`` -
232 number of TX and RX sessions opened with host cryptography
234 - ``TlsTxDevice``, ``TlsRxDevice`` -
235 number of TX and RX sessions opened with NIC cryptography
237 - ``TlsDecryptError`` -
238 record decryption failed (e.g. due to incorrect authentication tag)
240 - ``TlsDeviceRxResync`` -
241 number of RX resyncs sent to NICs handling cryptography