Merge tag 'pull-loongarch-20241016' of https://gitlab.com/gaosong/qemu into staging
[qemu/armbru.git] / migration / xbzrle.c
blob3eddcf249bb82237c6ac8590c7ab0d5ca338d3aa
1 /*
2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
6 * Authors:
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
16 #include "xbzrle.h"
18 #if defined(CONFIG_AVX512BW_OPT)
19 #include <immintrin.h>
20 #include "host/cpuinfo.h"
22 static int __attribute__((target("avx512bw")))
23 xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
24 uint8_t *dst, int dlen)
26 uint32_t zrun_len = 0, nzrun_len = 0;
27 int d = 0, i = 0, num = 0;
28 uint8_t *nzrun_start = NULL;
29 /* add 1 to include residual part in main loop */
30 uint32_t count512s = (slen >> 6) + 1;
31 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
32 uint32_t count_residual = slen & 0b111111;
33 bool never_same = true;
34 uint64_t mask_residual = 1;
35 mask_residual <<= count_residual;
36 mask_residual -= 1;
37 __m512i r = _mm512_set1_epi32(0);
39 while (count512s) {
40 int bytes_to_check = 64;
41 uint64_t mask = 0xffffffffffffffff;
42 if (count512s == 1) {
43 bytes_to_check = count_residual;
44 mask = mask_residual;
46 __m512i old_data = _mm512_mask_loadu_epi8(r,
47 mask, old_buf + i);
48 __m512i new_data = _mm512_mask_loadu_epi8(r,
49 mask, new_buf + i);
50 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
51 count512s--;
53 bool is_same = (comp & 0x1);
54 while (bytes_to_check) {
55 if (d + 2 > dlen) {
56 return -1;
58 if (is_same) {
59 if (nzrun_len) {
60 d += uleb128_encode_small(dst + d, nzrun_len);
61 if (d + nzrun_len > dlen) {
62 return -1;
64 nzrun_start = new_buf + i - nzrun_len;
65 memcpy(dst + d, nzrun_start, nzrun_len);
66 d += nzrun_len;
67 nzrun_len = 0;
69 /* 64 data at a time for speed */
70 if (count512s && (comp == 0xffffffffffffffff)) {
71 i += 64;
72 zrun_len += 64;
73 break;
75 never_same = false;
76 num = ctz64(~comp);
77 num = (num < bytes_to_check) ? num : bytes_to_check;
78 zrun_len += num;
79 bytes_to_check -= num;
80 comp >>= num;
81 i += num;
82 if (bytes_to_check) {
83 /* still has different data after same data */
84 d += uleb128_encode_small(dst + d, zrun_len);
85 zrun_len = 0;
86 } else {
87 break;
90 if (never_same || zrun_len) {
92 * never_same only acts if
93 * data begins with diff in first count512s
95 d += uleb128_encode_small(dst + d, zrun_len);
96 zrun_len = 0;
97 never_same = false;
99 /* has diff, 64 data at a time for speed */
100 if ((bytes_to_check == 64) && (comp == 0x0)) {
101 i += 64;
102 nzrun_len += 64;
103 break;
105 num = ctz64(comp);
106 num = (num < bytes_to_check) ? num : bytes_to_check;
107 nzrun_len += num;
108 bytes_to_check -= num;
109 comp >>= num;
110 i += num;
111 if (bytes_to_check) {
112 /* mask like 111000 */
113 d += uleb128_encode_small(dst + d, nzrun_len);
114 /* overflow */
115 if (d + nzrun_len > dlen) {
116 return -1;
118 nzrun_start = new_buf + i - nzrun_len;
119 memcpy(dst + d, nzrun_start, nzrun_len);
120 d += nzrun_len;
121 nzrun_len = 0;
122 is_same = true;
127 if (nzrun_len != 0) {
128 d += uleb128_encode_small(dst + d, nzrun_len);
129 /* overflow */
130 if (d + nzrun_len > dlen) {
131 return -1;
133 nzrun_start = new_buf + i - nzrun_len;
134 memcpy(dst + d, nzrun_start, nzrun_len);
135 d += nzrun_len;
137 return d;
140 static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
141 int slen, uint8_t *dst, int dlen);
143 static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
145 static void __attribute__((constructor)) init_accel(void)
147 unsigned info = cpuinfo_init();
148 if (info & CPUINFO_AVX512BW) {
149 accel_func = xbzrle_encode_buffer_avx512;
150 } else {
151 accel_func = xbzrle_encode_buffer_int;
155 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
156 uint8_t *dst, int dlen)
158 return accel_func(old_buf, new_buf, slen, dst, dlen);
161 #define xbzrle_encode_buffer xbzrle_encode_buffer_int
162 #endif
165 page = zrun nzrun
166 | zrun nzrun page
168 zrun = length
170 nzrun = length byte...
172 length = uleb128 encoded integer
174 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
175 uint8_t *dst, int dlen)
177 uint32_t zrun_len = 0, nzrun_len = 0;
178 int d = 0, i = 0;
179 long res;
180 uint8_t *nzrun_start = NULL;
182 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
183 sizeof(long)));
185 while (i < slen) {
186 /* overflow */
187 if (d + 2 > dlen) {
188 return -1;
191 /* not aligned to sizeof(long) */
192 res = (slen - i) % sizeof(long);
193 while (res && old_buf[i] == new_buf[i]) {
194 zrun_len++;
195 i++;
196 res--;
199 /* word at a time for speed */
200 if (!res) {
201 while (i < slen &&
202 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
203 i += sizeof(long);
204 zrun_len += sizeof(long);
207 /* go over the rest */
208 while (i < slen && old_buf[i] == new_buf[i]) {
209 zrun_len++;
210 i++;
214 /* buffer unchanged */
215 if (zrun_len == slen) {
216 return 0;
219 /* skip last zero run */
220 if (i == slen) {
221 return d;
224 d += uleb128_encode_small(dst + d, zrun_len);
226 zrun_len = 0;
227 nzrun_start = new_buf + i;
229 /* overflow */
230 if (d + 2 > dlen) {
231 return -1;
233 /* not aligned to sizeof(long) */
234 res = (slen - i) % sizeof(long);
235 while (res && old_buf[i] != new_buf[i]) {
236 i++;
237 nzrun_len++;
238 res--;
241 /* word at a time for speed, use of 32-bit long okay */
242 if (!res) {
243 /* truncation to 32-bit long okay */
244 unsigned long mask = (unsigned long)0x0101010101010101ULL;
245 while (i < slen) {
246 unsigned long xor;
247 xor = *(unsigned long *)(old_buf + i)
248 ^ *(unsigned long *)(new_buf + i);
249 if ((xor - mask) & ~xor & (mask << 7)) {
250 /* found the end of an nzrun within the current long */
251 while (old_buf[i] != new_buf[i]) {
252 nzrun_len++;
253 i++;
255 break;
256 } else {
257 i += sizeof(long);
258 nzrun_len += sizeof(long);
263 d += uleb128_encode_small(dst + d, nzrun_len);
264 /* overflow */
265 if (d + nzrun_len > dlen) {
266 return -1;
268 memcpy(dst + d, nzrun_start, nzrun_len);
269 d += nzrun_len;
270 nzrun_len = 0;
273 return d;
276 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
278 int i = 0, d = 0;
279 int ret;
280 uint32_t count = 0;
282 while (i < slen) {
284 /* zrun */
285 if ((slen - i) < 2) {
286 return -1;
289 ret = uleb128_decode_small(src + i, &count);
290 if (ret < 0 || (i && !count)) {
291 return -1;
293 i += ret;
294 d += count;
296 /* overflow */
297 if (d > dlen) {
298 return -1;
301 /* nzrun */
302 if ((slen - i) < 2) {
303 return -1;
306 ret = uleb128_decode_small(src + i, &count);
307 if (ret < 0 || !count) {
308 return -1;
310 i += ret;
312 /* overflow */
313 if (d + count > dlen || i + count > slen) {
314 return -1;
317 memcpy(dst + d, src + i, count);
318 d += count;
319 i += count;
322 return d;