/* * Beautifully overcomplicated version of `sed -i 's/ /\n/g` * * This uses AVX2 to find and replace spaces in 32byte chunks, * because the SSE2 memchr implementation in glibc is too slow. * It is not at all UTF8-aware. */ #include #include #include #include #include #include #include #include #include #include #define PATH "/tmp/wikitext" int main(void) { char *mem; int fd; struct stat st; size_t bytes; __m256i *cur, *end; __m256i ymm0, ymm1, ymm2, ymm3, ymm4; uint64_t it; if (stat(PATH, &st) == -1) { perror("stat"); exit(-1); } fd = open(PATH, O_RDWR); if (fd == -1) { perror("open " PATH); } mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { perror("mmap"); exit(-1); } madvise(mem, st.st_size, MADV_SEQUENTIAL|MADV_WILLNEED); end = (__m256i *)(mem + st.st_size); /* * ymm2 = 32x 0010 0000 * ymm3 = 32x 0000 1010 */ ymm2 = _mm256_set1_epi8(' '); ymm3 = _mm256_set1_epi8('\n'); /* * In 32-byte chunks, * - Load from mmapped file * - Look for spaces, get a boolean byte mask * - 'Blend' original vector with a vector of newlines, switching on the byte mask * ... which turns spaces into newlines. */ it = __rdtsc(); for (cur=(__m256i *)mem;cur < end;cur++) { ymm0 = _mm256_load_si256(cur); ymm1 = _mm256_cmpeq_epi8(ymm0, ymm2); ymm4 = _mm256_blendv_epi8(ymm0, ymm3, ymm1); _mm256_store_si256(cur, ymm4); } printf("%10.6f cycles/byte\n", (double)(__rdtsc() - it) / st.st_size); if (msync(mem, st.st_size, MS_SYNC) == -1) { perror("msync"); } if (fsync(fd) == -1) { perror("fsync"); } if (munmap(mem, st.st_size) == -1) { perror("munmap"); } close(fd); }