aboutsummaryrefslogtreecommitdiff
path: root/tools/splitwords.c
blob: 5cf7f049c623c8f57994fab989ccc6e1c7ccd9bb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
 * Beautifully overcomplicated version of `sed -i 's/ /\n/g`
 *
 * This uses AVX2 to find and replace spaces in 32byte chunks,
 * because the SSE2 memchr implementation in glibc is too slow.
 * It is not at all UTF8-aware.
 */
#include <stdint.h>
#include <unistd.h>
#include <immintrin.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>

#define PATH "/tmp/wikitext"

int
main(void)
{
    char *mem;
    int fd;
    struct stat st;
    size_t bytes;
    __m256i *cur, *end;
    __m256i ymm0, ymm1, ymm2, ymm3, ymm4;
    uint64_t it;

    if (stat(PATH, &st) == -1) {
        perror("stat");
        exit(-1);
    }

    fd = open(PATH, O_RDWR);
    if (fd == -1) {
        perror("open " PATH);
    }

    mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);

    if (mem == MAP_FAILED) {
        perror("mmap");
        exit(-1);
    }

    madvise(mem, st.st_size, MADV_SEQUENTIAL|MADV_WILLNEED);

    end = (__m256i *)(mem + st.st_size);

    /*
     * ymm2 = 32x 0010 0000 
     * ymm3 = 32x 0000 1010
     */
    ymm2 = _mm256_set1_epi8(' ');
    ymm3 = _mm256_set1_epi8('\n');

    /*
     * In 32-byte chunks,
     *  - Load from mmapped file
     *  - Look for spaces, get a boolean byte mask
     *  - 'Blend' original vector with a vector of newlines, switching on the byte mask
     * ... which turns spaces into newlines.
     */
    it = __rdtsc();
    for (cur=(__m256i *)mem;cur < end;cur++) {
        ymm0 = _mm256_load_si256(cur);
        ymm1 = _mm256_cmpeq_epi8(ymm0, ymm2);
        ymm4 = _mm256_blendv_epi8(ymm0, ymm3, ymm1);
        _mm256_store_si256(cur, ymm4);
    }
    printf("%10.6f cycles/byte\n",
            (double)(__rdtsc() - it) / st.st_size);

    if (msync(mem, st.st_size, MS_SYNC) == -1) {
        perror("msync");
    }

    if (fsync(fd) == -1) {
        perror("fsync");
    }

    if (munmap(mem, st.st_size) == -1) {
        perror("munmap");
    }

    close(fd);
}