shuf: tiny simplification
[coreutils.git] / src / wc_avx2.c
blobc3f76a625ace2630c65af20516e0565c19df88a7
1 /* wc_avx - Count the number of newlines with avx2 instructions.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "wc.h"
20 #include "system.h"
21 #include "ioblksize.h"
23 #include <x86intrin.h>
25 /* Read FD and return a summary. */
26 extern struct wc_lines
27 wc_lines_avx2 (int fd)
29 intmax_t lines = 0;
30 intmax_t bytes = 0;
32 __m256i endlines = _mm256_set1_epi8 ('\n');
34 while (true)
36 __m256i avx_buf[IO_BUFSIZE / sizeof (__m256i)];
37 ssize_t bytes_read = read (fd, avx_buf, sizeof avx_buf);
38 if (bytes_read <= 0)
39 return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };
41 bytes += bytes_read;
42 __m256i *datap = avx_buf;
44 while (bytes_read >= 32)
46 __m256i to_match = _mm256_load_si256 (datap);
47 __m256i matches = _mm256_cmpeq_epi8 (to_match, endlines);
48 int mask = _mm256_movemask_epi8 (matches);
49 lines += __builtin_popcount (mask);
50 datap += 1;
51 bytes_read -= 32;
54 /* Finish up any left over bytes */
55 char *end = (char *) datap + bytes_read;
56 for (char *p = (char *) datap; p < end; p++)
57 lines += *p == '\n';