//! 在 ASCII `[u8]` 上的操作。

use crate::ascii;
use crate::fmt::{self, Write};
use crate::iter;
use crate::mem;
use crate::ops;

#[cfg(not(test))]
impl [u8] {
    /// 检查此切片中的所有字节是否都在 ASCII 范围内。
    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
    #[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
    #[must_use]
    #[inline]
    pub const fn is_ascii(&self) -> bool {
        is_ascii(self)
    }

    /// 如果此切片为 [`is_ascii`](Self::is_ascii)，则将其作为 [ASCII characters](`ascii::Char`) 的切片返回，否则返回 `None`。
    ///
    #[unstable(feature = "ascii_char", issue = "110998")]
    #[must_use]
    #[inline]
    pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
        if self.is_ascii() {
            // SAFETY: 刚刚检查它是 ASCII
            Some(unsafe { self.as_ascii_unchecked() })
        } else {
            None
        }
    }

    /// 将此字节片段转换为 ASCII 字符片段，而不检查它们是否有效。
    ///
    ///
    /// # Safety
    ///
    /// 切片中的每个字节都必须在 `0..=127` 中，否则就是 UB。
    #[unstable(feature = "ascii_char", issue = "110998")]
    #[must_use]
    #[inline]
    pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
        let byte_ptr: *const [u8] = self;
        let ascii_ptr = byte_ptr as *const [ascii::Char];
        // SAFETY: 调用者承诺所有字节都是 ASCII
        unsafe { &*ascii_ptr }
    }

    /// 检查两个片是否是 ASCII 大小写不敏感的匹配项。
    ///
    /// 与 `to_ascii_lowercase(a) == to_ascii_lowercase(b)` 相同，但不分配和复制临时文件。
    ///
    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
    #[must_use]
    #[inline]
    pub fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
        self.len() == other.len() && iter::zip(self, other).all(|(a, b)| a.eq_ignore_ascii_case(b))
    }

    /// 将该切片原位转换为其 ASCII 大写形式。
    ///
    /// ASCII 字母 'a' 到 'z' 映射到 'A' 到 'Z'，但是非 ASCII 字母不变。
    ///
    /// 要返回新的大写值而不修改现有值，请使用 [`to_ascii_uppercase`]。
    ///
    ///
    /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
    ///
    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
    #[inline]
    pub fn make_ascii_uppercase(&mut self) {
        for byte in self {
            byte.make_ascii_uppercase();
        }
    }

    /// 将该切片原位转换为其 ASCII 小写等效项。
    ///
    /// ASCII 字母 'A' 到 'Z' 映射到 'a' 到 'z'，但是非 ASCII 字母不变。
    ///
    /// 要返回新的小写值而不修改现有值，请使用 [`to_ascii_lowercase`]。
    ///
    ///
    /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
    ///
    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
    #[inline]
    pub fn make_ascii_lowercase(&mut self) {
        for byte in self {
            byte.make_ascii_lowercase();
        }
    }

    /// 返回一个迭代器，该迭代器产生此转义版本的一个 ASCII 字符串，将其视为一个 ASCII 字符串。
    ///
    ///
    /// # Examples
    ///
    /// ```
    ///
    /// let s = b"0\t\r\n'\"\\\x9d";
    /// let escaped = s.escape_ascii().to_string();
    /// assert_eq!(escaped, "0\\t\\r\\n\\'\\\"\\\\\\x9d");
    /// ```
    #[must_use = "this returns the escaped bytes as an iterator, \
                  without modifying the original"]
    #[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
    pub fn escape_ascii(&self) -> EscapeAscii<'_> {
        EscapeAscii { inner: self.iter().flat_map(EscapeByte) }
    }

    /// 返回删除了前导 ASCII 空白字节的字节切片。
    ///
    /// 'Whitespace' 是指 `u8::is_ascii_whitespace` 使用的定义。
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// #![feature(byte_slice_trim_ascii)]
    ///
    /// assert_eq!(b" \t hello world\n".trim_ascii_start(), b"hello world\n");
    /// assert_eq!(b"  ".trim_ascii_start(), b"");
    /// assert_eq!(b"".trim_ascii_start(), b"");
    /// ```
    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
    pub const fn trim_ascii_start(&self) -> &[u8] {
        let mut bytes = self;
        // Note: 基于模式匹配的方法 (而不是索引) 允许将函数设为 const。
        //
        while let [first, rest @ ..] = bytes {
            if first.is_ascii_whitespace() {
                bytes = rest;
            } else {
                break;
            }
        }
        bytes
    }

    /// 返回删除了尾随 ASCII 空白字节的字节切片。
    ///
    /// 'Whitespace' 是指 `u8::is_ascii_whitespace` 使用的定义。
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// #![feature(byte_slice_trim_ascii)]
    ///
    /// assert_eq!(b"\r hello world\n ".trim_ascii_end(), b"\r hello world");
    /// assert_eq!(b"  ".trim_ascii_end(), b"");
    /// assert_eq!(b"".trim_ascii_end(), b"");
    /// ```
    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
    pub const fn trim_ascii_end(&self) -> &[u8] {
        let mut bytes = self;
        // Note: 基于模式匹配的方法 (而不是索引) 允许将函数设为 const。
        //
        while let [rest @ .., last] = bytes {
            if last.is_ascii_whitespace() {
                bytes = rest;
            } else {
                break;
            }
        }
        bytes
    }

    /// 返回删除了前导和尾随 ASCII 空白字节的字节切片。
    ///
    ///
    /// 'Whitespace' 是指 `u8::is_ascii_whitespace` 使用的定义。
    ///
    /// # Examples
    ///
    /// ```
    /// #![feature(byte_slice_trim_ascii)]
    ///
    /// assert_eq!(b"\r hello world\n ".trim_ascii(), b"hello world");
    /// assert_eq!(b"  ".trim_ascii(), b"");
    /// assert_eq!(b"".trim_ascii(), b"");
    /// ```
    ///
    #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")]
    pub const fn trim_ascii(&self) -> &[u8] {
        self.trim_ascii_start().trim_ascii_end()
    }
}

impl_fn_for_zst! {
    #[derive(Clone)]
    struct EscapeByte impl Fn = |byte: &u8| -> ascii::EscapeDefault {
        ascii::escape_default(*byte)
    };
}

/// 一个字节的转义版本的迭代器。
///
/// 这个 `struct` 是由 [`slice::escape_ascii`] 方法创建的。
/// 有关更多信息，请参见其文档。
#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
#[derive(Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct EscapeAscii<'a> {
    inner: iter::FlatMap<super::Iter<'a, u8>, ascii::EscapeDefault, EscapeByte>,
}

#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
impl<'a> iter::Iterator for EscapeAscii<'a> {
    type Item = u8;
    #[inline]
    fn next(&mut self) -> Option<u8> {
        self.inner.next()
    }
    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        self.inner.size_hint()
    }
    #[inline]
    fn try_fold<Acc, Fold, R>(&mut self, init: Acc, fold: Fold) -> R
    where
        Fold: FnMut(Acc, Self::Item) -> R,
        R: ops::Try<Output = Acc>,
    {
        self.inner.try_fold(init, fold)
    }
    #[inline]
    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
    where
        Fold: FnMut(Acc, Self::Item) -> Acc,
    {
        self.inner.fold(init, fold)
    }
    #[inline]
    fn last(mut self) -> Option<u8> {
        self.next_back()
    }
}

#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
impl<'a> iter::DoubleEndedIterator for EscapeAscii<'a> {
    fn next_back(&mut self) -> Option<u8> {
        self.inner.next_back()
    }
}
#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
impl<'a> iter::FusedIterator for EscapeAscii<'a> {}
#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
impl<'a> fmt::Display for EscapeAscii<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.clone().try_for_each(|b| f.write_char(b as char))
    }
}
#[stable(feature = "inherent_ascii_escape", since = "1.60.0")]
impl<'a> fmt::Debug for EscapeAscii<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("EscapeAscii").finish_non_exhaustive()
    }
}

/// 如果单词 `v` 中的任何字节为 nonascii (>=128)，则返回 `true`。
/// 来自 `../str/mod.rs`，它对 utf8 验证执行类似的操作。
#[inline]
const fn contains_nonascii(v: usize) -> bool {
    const NONASCII_MASK: usize = usize::repeat_u8(0x80);
    (NONASCII_MASK & v) != 0
}

/// ASCII 测试*没有*一次块优化。
///
/// 它经过精心构造以生成漂亮的小代码 -- 它在 `-O` 中比 "obvious" 方式在 `-C opt-level=s` 下生成的代码小。
/// 如果您触摸它，请务必运行 (并在需要时更新) 组装测试。
///
#[unstable(feature = "str_internals", issue = "none")]
#[doc(hidden)]
#[inline]
pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
    while let [rest @ .., last] = bytes {
        if !last.is_ascii() {
            break;
        }
        bytes = rest;
    }
    bytes.is_empty()
}

/// 优化的 ASCII 测试，将使用每次使用一次的操作，而不是一次使用字节的操作 (如果可能)。
///
/// 我们在这里使用的算法非常简单。如果 `s` 太短，我们只检查每个字节并完成它。Otherwise:
///
/// - 读取未对齐负载的第一个单词。。
/// - 对齐指针，读取后续单词，直到对齐负载结束。
/// - 从 `s` 读取未装载的最后一个 `usize`。
///
/// 如果这些负载中的任何一个产生了 `contains_nonascii` (above) 返回 true 的值，则我们知道答案为 false。
///
///
///
#[inline]
const fn is_ascii(s: &[u8]) -> bool {
    const USIZE_SIZE: usize = mem::size_of::<usize>();

    let len = s.len();
    let align_offset = s.as_ptr().align_offset(USIZE_SIZE);

    // 如果我们不能从一次单词的实现中获得任何收益，请回到标量循环。
    //
    // 我们还针对 `size_of::<usize>()` 不足以与 `usize` 对齐的体系结构执行此操作，因为这是一种奇怪的 edge 情况。
    //
    //
    if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
        return is_ascii_simple(s);
    }

    // 我们总是读第一个单词 unaligned，这意味着 `align_offset` 是
    // 0，对于对齐的读取，我们将再次读取相同的值。
    let offset_to_aligned = if align_offset == 0 { USIZE_SIZE } else { align_offset };

    let start = s.as_ptr();
    // SAFETY: 我们在上面验证 `len < USIZE_SIZE`。
    let first_word = unsafe { (start as *const usize).read_unaligned() };

    if contains_nonascii(first_word) {
        return false;
    }
    // 我们在上面对此进行了某种程度的隐式检查。
    // 请注意，`offset_to_aligned` 是 `align_offset` 或 `USIZE_SIZE`，以上均已明确检查了两者。
    //
    debug_assert!(offset_to_aligned <= len);

    // SAFETY: word_ptr 是 (正确对齐的) usize ptr，用于读取切片的中间块。
    //
    let mut word_ptr = unsafe { start.add(offset_to_aligned) as *const usize };

    // `byte_pos` 是 `word_ptr` 的字节索引，用于循环结束检查。
    let mut byte_pos = offset_to_aligned;

    // 偏执狂会检查对齐情况，因为我们将要进行一堆未对齐的负载。实际上，除非有 `align_offset` 中的错误，否则这应该是不可能的。
    // 虽然此方法在 CTFE 中被允许虚假地失败，但如果它没有对齐信息，它应该更早地为 `align_offset` 提供 `usize::MAX`，通过标量路径而不是这个路径发送内容，因此如果可以访问，则此检查应该通过。
    //
    //
    //
    //
    //
    debug_assert!(word_ptr.is_aligned_to(mem::align_of::<usize>()));

    // 读取后续的单词，直到最后一个对齐的单词为止 (不包括最后一个对齐的单词本身)，以便稍后在尾部检查中完成，以确保尾部对于额外的分支 `byte_pos == len` 始终最多为一个 `usize`。
    //
    //
    while byte_pos < len - USIZE_SIZE {
        // 完好无损的检查，以确保读取的范围
        debug_assert!(byte_pos + USIZE_SIZE <= len);
        // 并且我们关于 `byte_pos` 的假设成立。
        debug_assert!(matches!(
            word_ptr.cast::<u8>().guaranteed_eq(start.wrapping_add(byte_pos)),
            // 它们来自相同的分配，因此即使在 CTFE 中也有望始终匹配，但如果它拒绝比较它们也没关系，因为它只是一个调试检查。
            //
            //
            None | Some(true),
        ));

        // SAFETY: 我们知道 `word_ptr` 正确对齐 (因为 `align_offset`)，并且我们知道 `word_ptr` 和末尾之间有足够的字节
        //
        let word = unsafe { word_ptr.read() };
        if contains_nonascii(word) {
            return false;
        }

        byte_pos += USIZE_SIZE;
        // SAFETY: 我们知道 `byte_pos <= len - USIZE_SIZE`，这意味着在此 `add` 之后，`word_ptr` 最多只能是最后一个。
        //
        word_ptr = unsafe { word_ptr.add(1) };
    }

    // 进行健全性检查，确保仅剩 `usize` 个。
    // 这应该由我们的循环条件来保证。
    debug_assert!(byte_pos <= len && len - byte_pos <= USIZE_SIZE);

    // SAFETY: 这依赖于 `len >= USIZE_SIZE`，我们将在开始时对其进行检查。
    let last_word = unsafe { (start.add(len - USIZE_SIZE) as *const usize).read_unaligned() };

    !contains_nonascii(last_word)
}