1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
/*
* Beautifully overcomplicated version of `sed -i 's/ /\n/g`
*
* This uses AVX2 to find and replace spaces in 32byte chunks,
* because the SSE2 memchr implementation in glibc is too slow.
* It is not at all UTF8-aware.
*/
#include <stdint.h>
#include <unistd.h>
#include <immintrin.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#define PATH "/tmp/wikitext"
int
main(void)
{
char *mem;
int fd;
struct stat st;
size_t bytes;
__m256i *cur, *end;
__m256i ymm0, ymm1, ymm2, ymm3, ymm4;
uint64_t it;
if (stat(PATH, &st) == -1) {
perror("stat");
exit(-1);
}
fd = open(PATH, O_RDWR);
if (fd == -1) {
perror("open " PATH);
}
mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
perror("mmap");
exit(-1);
}
madvise(mem, st.st_size, MADV_SEQUENTIAL|MADV_WILLNEED);
end = (__m256i *)(mem + st.st_size);
/*
* ymm2 = 32x 0010 0000
* ymm3 = 32x 0000 1010
*/
ymm2 = _mm256_set1_epi8(' ');
ymm3 = _mm256_set1_epi8('\n');
/*
* In 32-byte chunks,
* - Load from mmapped file
* - Look for spaces, get a boolean byte mask
* - 'Blend' original vector with a vector of newlines, switching on the byte mask
* ... which turns spaces into newlines.
*/
it = __rdtsc();
for (cur=(__m256i *)mem;cur < end;cur++) {
ymm0 = _mm256_load_si256(cur);
ymm1 = _mm256_cmpeq_epi8(ymm0, ymm2);
ymm4 = _mm256_blendv_epi8(ymm0, ymm3, ymm1);
_mm256_store_si256(cur, ymm4);
}
printf("%10.6f cycles/byte\n",
(double)(__rdtsc() - it) / st.st_size);
if (msync(mem, st.st_size, MS_SYNC) == -1) {
perror("msync");
}
if (fsync(fd) == -1) {
perror("fsync");
}
if (munmap(mem, st.st_size) == -1) {
perror("munmap");
}
close(fd);
}
|