From 4fe71260e70696a0bb5907c97a2ca049fc23214c Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 27 Nov 2023 02:20:20 -0500 Subject: [PATCH] test going by usize --- src/utils/gen_util.rs | 179 +++++++++++++++++++++++++++++++++++------- 1 file changed, 150 insertions(+), 29 deletions(-) diff --git a/src/utils/gen_util.rs b/src/utils/gen_util.rs index 4b6f8e4e..fcb6a00c 100644 --- a/src/utils/gen_util.rs +++ b/src/utils/gen_util.rs @@ -100,43 +100,54 @@ enum AsciiIterationResult { Remaining(usize), } -/// Greedily add characters to the output until a non-ASCII grapheme is found, or -/// the output is `width` long. +const SIZE_OF_USIZE: usize = std::mem::size_of::(); + +/// Returns `true` if any byte in the word `v` is nonascii (>= 128). +/// Taken from the slice code for determining non_ascii. #[inline] -fn greedy_ascii_add(content: &str, width: NonZeroUsize) -> (String, AsciiIterationResult) { +const fn contains_nonascii(v: usize) -> bool { + const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; SIZE_OF_USIZE]); + (NONASCII_MASK & v) != 0 +} + +/// This should only be called if `width` is smaller than `bytes`, assuming bytes +/// is fully ASCII. If it is not all ASCII, then it doesn't matter. +#[inline] +fn simple_build_ascii_str(bytes: &[u8], width: NonZeroUsize) -> (String, AsciiIterationResult) { let width: usize = width.into(); - let mut text = Vec::with_capacity(width); - - let s = content.as_bytes(); + let mut raw_text = Vec::with_capacity(width); let mut current_index = 0; while current_index < width - 1 { - let current_byte = s[current_index]; + let current_byte = bytes[current_index]; if current_byte.is_ascii() { - text.push(current_byte); + raw_text.push(current_byte); current_index += 1; } else { - debug_assert!(text.is_ascii()); + debug_assert!(raw_text.is_ascii()); let current_index = AsciiIterationResult::Remaining(current_index); - // SAFETY: This conversion is safe to do unchecked, we only push ASCII characters up to - // this point. - let current_text = unsafe { String::from_utf8_unchecked(text) }; + // SAFETY: This conversion is safe to do unchecked, we only push ASCII characters + // up to this point. + let text = unsafe { String::from_utf8_unchecked(raw_text) }; - return (current_text, current_index); + return (text, current_index); } } - // If we made it all the way through, then we probably hit the width limit. - debug_assert!(text.is_ascii()); + debug_assert!(raw_text.is_ascii()); - let current_index = if s[current_index].is_ascii() { + // If the next character is not ASCII, then we may need to still check it. + // Otherwise, we always want to put the ellipsis as the while loop exited after + // the second last character was put, and we know this string is too wide for + // width. + let current_index = if bytes[current_index].is_ascii() { let mut ellipsis = [0; 3]; '…'.encode_utf8(&mut ellipsis); - text.extend_from_slice(&ellipsis); + raw_text.extend_from_slice(&ellipsis); AsciiIterationResult::Complete } else { AsciiIterationResult::Remaining(current_index) @@ -144,9 +155,122 @@ fn greedy_ascii_add(content: &str, width: NonZeroUsize) -> (String, AsciiIterati // SAFETY: This conversion is safe to do unchecked, we only push ASCII characters up to // this point. - let current_text = unsafe { String::from_utf8_unchecked(text) }; + let text = unsafe { String::from_utf8_unchecked(raw_text) }; - (current_text, current_index) + (text, current_index) +} + +/// Read one usize at a time. Based on the `is_ascii` for slices in core. +fn usize_build_ascii_str( + bytes: &[u8], width: NonZeroUsize, align_offset: usize, +) -> (String, AsciiIterationResult) { + let width: usize = width.into(); + let mut raw_text: Vec; + let len = bytes.len(); + + // We always read the first word unaligned, which means `align_offset` is + // 0, we'd read the same value again for the aligned read. + let offset_to_aligned = if align_offset == 0 { + SIZE_OF_USIZE + } else { + align_offset + }; + + const BYTES_PER_WORD: usize = SIZE_OF_USIZE / std::mem::size_of::(); + + let start = bytes.as_ptr(); + + { + // SAFETY: We verify `len < SIZE_OF_USIZE` above. + let first_word = unsafe { (start as *const usize).read_unaligned() }; + if contains_nonascii(first_word) { + return (String::default(), AsciiIterationResult::Remaining(0)); + } else { + // Only bother initializing if the first check succeeds. + raw_text = Vec::with_capacity(width); + + for i in 0..BYTES_PER_WORD { + let c = unsafe { (start as *const u8).add(i).read_unaligned() }; + raw_text.push(c); + } + } + } + + debug_assert!(offset_to_aligned <= len); + + // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the + // middle chunk of the slice. + let mut word_ptr = unsafe { start.add(offset_to_aligned) as *const usize }; + + // `byte_pos` is the byte index of `word_ptr`, used for loop end checks. + let mut byte_pos = offset_to_aligned; + + while byte_pos < len - SIZE_OF_USIZE { + // Sanity check that the read is in bounds + debug_assert!(byte_pos + SIZE_OF_USIZE <= len); + + // SAFETY: We know `word_ptr` is properly aligned (because of `align_offset`), + // and we know that we have enough bytes between `word_ptr` and the end + let word: usize = unsafe { word_ptr.read() }; + + if contains_nonascii(word) { + // SAFETY: We've only added ASCII characters so this is safe. + let text = unsafe { String::from_utf8_unchecked(raw_text) }; + return (text, AsciiIterationResult::Remaining(byte_pos)); + } else { + for i in 0..BYTES_PER_WORD { + let c = unsafe { (word as *const u8).add(i).read_unaligned() }; + raw_text.push(c); + } + } + + byte_pos += SIZE_OF_USIZE; + + // SAFETY: We know that `byte_pos <= len - SIZE_OF_USIZE`, which means that + // after this `add`, `word_ptr` will be at most one-past-the-end. + word_ptr = unsafe { word_ptr.add(1) }; + } + + // Sanity check to ensure there really is only one `usize` left. This should + // be guaranteed by our loop condition. + debug_assert!(byte_pos <= len && len - byte_pos <= SIZE_OF_USIZE); + + let last_index = len - SIZE_OF_USIZE; + + // SAFETY: This relies on `len >= SIZE_OF_USIZE`, which we check at the start. + let last_word = unsafe { (start.add(last_index) as *const usize).read_unaligned() }; + + let current_index = if contains_nonascii(last_word) { + AsciiIterationResult::Remaining(last_index) + } else { + for i in 0..BYTES_PER_WORD { + let c = unsafe { (last_word as *const u8).add(i).read_unaligned() }; + raw_text.push(c); + } + AsciiIterationResult::Complete + }; + + // SAFETY: We've only added ASCII characters so this is safe. + let text = unsafe { String::from_utf8_unchecked(raw_text) }; + + (text, current_index) +} + +/// Continuously add characters to the output until a non-ASCII grapheme is found, or +/// the output is `width` long. +#[inline] +fn build_ascii_str(content: &str, width: NonZeroUsize) -> (String, AsciiIterationResult) { + let bytes = content.as_bytes(); + let len = bytes.len(); + let align_offset = bytes.as_ptr().align_offset(SIZE_OF_USIZE); + + // If we wouldn't gain anything from the word-at-a-time implementation, fall + // back to a scalar loop. + if len < SIZE_OF_USIZE || len < align_offset || SIZE_OF_USIZE < std::mem::align_of::() { + simple_build_ascii_str(bytes, width) + } else { + usize_build_ascii_str(bytes, width, align_offset) + } } /// Truncates a string to the specified width with an ellipsis character. @@ -173,12 +297,11 @@ fn truncate_str>(content: &str, width: U) -> String { // // If we didn't get a complete truncated string, then continue on treating the rest as graphemes. - let (mut text, res) = greedy_ascii_add(content, nz_width); + let (mut text, res) = build_ascii_str(content, nz_width); match res { AsciiIterationResult::Complete => text, AsciiIterationResult::Remaining(current_index) => { let mut curr_width = text.len(); - let mut early_break = false; // This tracks the length of the last added string - note this does NOT match the grapheme *width*. // Since the previous characters are always ASCII, this is always initialized as 1, unless the string @@ -198,18 +321,16 @@ fn truncate_str>(content: &str, width: U) -> String { last_added_str_len = g.len(); text.push_str(g); } else { - early_break = true; + if curr_width == width { + // Remove the last grapheme cluster added. + text.truncate(text.len() - last_added_str_len); + } + text.push('…'); + break; } } - if early_break { - if curr_width == width { - // Remove the last grapheme cluster added. - text.truncate(text.len() - last_added_str_len); - } - text.push('…'); - } text } }