diff options
author | Nick Shipp <nick@shipp.ninja> | 2017-06-02 22:49:08 -0400 |
---|---|---|
committer | Nick Shipp <nick@shipp.ninja> | 2017-06-02 22:49:08 -0400 |
commit | 3f2480a08cc7335dda1c50af7f018a5a4c46d49d (patch) | |
tree | 83850c2fa734d6cda164538bfbfec92bc9abc56d /tools | |
parent | 30bd9159921998288c623b0e7e357830c5d62bfb (diff) |
Fix namespace tag name
Diffstat (limited to 'tools')
-rw-r--r-- | tools/splitwords.c | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/tools/splitwords.c b/tools/splitwords.c new file mode 100644 index 0000000..5cf7f04 --- /dev/null +++ b/tools/splitwords.c @@ -0,0 +1,90 @@ +/* + * Beautifully overcomplicated version of `sed -i 's/ /\n/g` + * + * This uses AVX2 to find and replace spaces in 32byte chunks, + * because the SSE2 memchr implementation in glibc is too slow. + * It is not at all UTF8-aware. + */ +#include <stdint.h> +#include <unistd.h> +#include <immintrin.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> +#include <stdio.h> +#include <stdlib.h> + +#define PATH "/tmp/wikitext" + +int +main(void) +{ + char *mem; + int fd; + struct stat st; + size_t bytes; + __m256i *cur, *end; + __m256i ymm0, ymm1, ymm2, ymm3, ymm4; + uint64_t it; + + if (stat(PATH, &st) == -1) { + perror("stat"); + exit(-1); + } + + fd = open(PATH, O_RDWR); + if (fd == -1) { + perror("open " PATH); + } + + mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (mem == MAP_FAILED) { + perror("mmap"); + exit(-1); + } + + madvise(mem, st.st_size, MADV_SEQUENTIAL|MADV_WILLNEED); + + end = (__m256i *)(mem + st.st_size); + + /* + * ymm2 = 32x 0010 0000 + * ymm3 = 32x 0000 1010 + */ + ymm2 = _mm256_set1_epi8(' '); + ymm3 = _mm256_set1_epi8('\n'); + + /* + * In 32-byte chunks, + * - Load from mmapped file + * - Look for spaces, get a boolean byte mask + * - 'Blend' original vector with a vector of newlines, switching on the byte mask + * ... which turns spaces into newlines. + */ + it = __rdtsc(); + for (cur=(__m256i *)mem;cur < end;cur++) { + ymm0 = _mm256_load_si256(cur); + ymm1 = _mm256_cmpeq_epi8(ymm0, ymm2); + ymm4 = _mm256_blendv_epi8(ymm0, ymm3, ymm1); + _mm256_store_si256(cur, ymm4); + } + printf("%10.6f cycles/byte\n", + (double)(__rdtsc() - it) / st.st_size); + + if (msync(mem, st.st_size, MS_SYNC) == -1) { + perror("msync"); + } + + if (fsync(fd) == -1) { + perror("fsync"); + } + + if (munmap(mem, st.st_size) == -1) { + perror("munmap"); + } + + close(fd); +} |