1 /* wc_avx - Count the number of newlines with avx2 instructions.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include "ioblksize.h"
23 #include <x86intrin.h>
25 /* Read FD and return a summary. */
26 extern struct wc_lines
27 wc_lines_avx2 (int fd
)
32 __m256i endlines
= _mm256_set1_epi8 ('\n');
36 __m256i avx_buf
[IO_BUFSIZE
/ sizeof (__m256i
)];
37 ssize_t bytes_read
= read (fd
, avx_buf
, sizeof avx_buf
);
39 return (struct wc_lines
) { bytes_read
== 0 ? 0 : errno
, lines
, bytes
};
42 __m256i
*datap
= avx_buf
;
44 while (bytes_read
>= 32)
46 __m256i to_match
= _mm256_load_si256 (datap
);
47 __m256i matches
= _mm256_cmpeq_epi8 (to_match
, endlines
);
48 int mask
= _mm256_movemask_epi8 (matches
);
49 lines
+= __builtin_popcount (mask
);
54 /* Finish up any left over bytes */
55 char *end
= (char *) datap
+ bytes_read
;
56 for (char *p
= (char *) datap
; p
< end
; p
++)