diff options
-rw-r--r-- | tools/splitwords.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/tools/splitwords.c b/tools/splitwords.c index 5cf7f04..108b819 100644 --- a/tools/splitwords.c +++ b/tools/splitwords.c @@ -26,7 +26,7 @@ main(void) struct stat st; size_t bytes; __m256i *cur, *end; - __m256i ymm0, ymm1, ymm2, ymm3, ymm4; + __m256i data, mask, space, newline, swap; uint64_t it; if (stat(PATH, &st) == -1) { @@ -39,7 +39,7 @@ main(void) perror("open " PATH); } - mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + mem = (char *)mmap(0, st.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (mem == MAP_FAILED) { perror("mmap"); @@ -51,11 +51,11 @@ main(void) end = (__m256i *)(mem + st.st_size); /* - * ymm2 = 32x 0010 0000 - * ymm3 = 32x 0000 1010 + * space = 32x 0010 0000 + * newline = 32x 0000 1010 */ - ymm2 = _mm256_set1_epi8(' '); - ymm3 = _mm256_set1_epi8('\n'); + space = _mm256_set1_epi8(' '); + newline = _mm256_set1_epi8('\n'); /* * In 32-byte chunks, @@ -66,10 +66,10 @@ main(void) */ it = __rdtsc(); for (cur=(__m256i *)mem;cur < end;cur++) { - ymm0 = _mm256_load_si256(cur); - ymm1 = _mm256_cmpeq_epi8(ymm0, ymm2); - ymm4 = _mm256_blendv_epi8(ymm0, ymm3, ymm1); - _mm256_store_si256(cur, ymm4); + data = _mm256_load_si256(cur); + mask = _mm256_cmpeq_epi8(data, space); + swap = _mm256_blendv_epi8(data, newline, mask); + _mm256_store_si256(cur, swap); } printf("%10.6f cycles/byte\n", (double)(__rdtsc() - it) / st.st_size); |