codon/stdlib/internal/str.codon

# Copyright (C) 2022-2023 Exaloop Inc. <https://exaloop.io>

_MAX: Static[int] = 0x7FFFFFFFFFFFFFFF

@extend
class str:
    # Magic methods

    def __hash__(self) -> int:
        h = 0
        p, n = self.ptr, self.len
        i = 0
        while i < n:
            h = 31 * h + int(p[i])
            i += 1
        return h

    def __lt__(self, other: str) -> bool:
        return self._cmp(other) < 0

    def __le__(self, other: str) -> bool:
        return self._cmp(other) <= 0

    def __gt__(self, other: str) -> bool:
        return self._cmp(other) > 0

    def __ge__(self, other: str) -> bool:
        return self._cmp(other) >= 0

    def __repr__(self) -> str:
        v = _strbuf(len(self) + 2)

        q, qe = "'", "\\'"
        found_single = False
        found_double = False
        for c in self:
            if c == "'":
                found_single = True
            elif c == '"':
                found_double = True

        if found_single and not found_double:
            q, qe = '"', '\\"'

        v.append(q)
        for c in self:
            d = c
            if c == "\n":
                d = "\\n"
            elif c == "\r":
                d = "\\r"
            elif c == "\t":
                d = "\\t"
            elif c == "\\":
                d = "\\\\"
            elif c == q:
                d = qe
            else:
                b = int(c.ptr[0])
                if not (32 <= b <= 126):
                    h = "0123456789abcdef"
                    v.append("\\x")
                    v.append(h[b // 16])
                    v.append(h[b % 16])
                    d = ""
            if d:
                v.append(d)
        v.append(q)
        return v.__str__()

    def __getitem__(self, idx: int) -> str:
        if idx < 0:
            idx += len(self)
        if not (0 <= idx < len(self)):
            raise IndexError("string index out of range")
        return str(self.ptr + idx, 1)

    def __getitem__(self, s: Slice) -> str:
        if s.start is None and s.stop is None and s.step is None:
            return self.__copy__()
        elif s.step is None:
            start, stop, step, length = s.adjust_indices(len(self))
            return str(self.ptr + start, length)
        else:
            start, stop, step, length = s.adjust_indices(len(self))
            return self._make_from_range(start, stop, step, length)

    def _make_from_range(self, start: int, stop: int, step: int, length: int) -> str:
        p = Ptr[byte](length)
        j = 0
        for i in range(start, stop, step):
            p[j] = self.ptr[i]
            j += 1
        return str(p, length)

    def __iter__(self) -> Generator[str]:
        i = 0
        n = len(self)
        while i < n:
            yield str(self.ptr + i, 1)
            i += 1

    def __reversed__(self) -> Generator[str]:
        i = len(self) - 1
        while i >= 0:
            yield str(self.ptr + i, 1)
            i -= 1

    def __mul__(self, x: int) -> str:
        total = x * self.len
        p = Ptr[byte](total)
        n = 0
        for _ in range(x):
            str.memcpy(p + n, self.ptr, self.len)
            n += self.len
        return str(p, total)

    def _cmp(self, other: str) -> int:
        n = min(self.len, other.len)
        i = 0
        while i < n:
            c1 = self.ptr[i]
            c2 = other.ptr[i]
            if c1 != c2:
                return int(c1) - int(c2)
            i += 1
        return self.len - other.len

import algorithms.strings as algorithms

@extend
class str:
    def __contains__(self, pattern: str) -> bool:
        return self.find(pattern) >= 0

    # Helper methods

    def _isdigit(a: byte) -> bool:
        return _C.isdigit(i32(int(a))) != i32(0)

    def _isspace(a: byte) -> bool:
        return _C.isspace(i32(int(a))) != i32(0)

    def _isupper(a: byte) -> bool:
        return _C.isupper(i32(int(a))) != i32(0)

    def _islower(a: byte) -> bool:
        return _C.islower(i32(int(a))) != i32(0)

    def _isalpha(a: byte) -> bool:
        return _C.isalpha(i32(int(a))) != i32(0)

    def _isalnum(a: byte) -> bool:
        return _C.isalnum(i32(int(a))) != i32(0)

    def _toupper(a: byte) -> byte:
        return byte(int(_C.toupper(i32(int(a)))))

    def _tolower(a: byte) -> byte:
        return byte(int(_C.tolower(i32(int(a)))))

    def _slice(self, i: int, j: int) -> str:
        return str(self.ptr + i, j - i)

    def _at(self, i: int) -> str:
        return str(self.ptr + i, 1)

    def join(self, l: Generator[str]) -> str:
        buf = _strbuf()
        if len(self) == 0:
            for a in l:
                buf.append(a)
        else:
            first = True
            for a in l:
                if first:
                    first = False
                else:
                    buf.append(self)
                buf.append(a)
        return buf.__str__()

    def join(self, l: List[str]) -> str:
        if len(l) == 0:
            return ""
        if len(l) == 1:
            return l[0]
        if len(self) == 0:
            return str.cat(l)

        # compute length
        n = 0
        i = 0
        while i < len(l):
            n += len(l[i])
            if i < len(l) - 1:
                n += len(self)
            i += 1

        # copy to new buffer
        p = Ptr[byte](n)
        r = 0
        i = 0
        while i < len(l):
            str.memcpy(p + r, l[i].ptr, len(l[i]))
            r += len(l[i])
            if i < len(l) - 1:
                str.memcpy(p + r, self.ptr, len(self))
                r += len(self)
            i += 1

        return str(p, n)

    def isdigit(self) -> bool:
        """
        str.isdigit() -> bool

        Return True if all characters in str are digits
        and there is at least one character in str, False otherwise.
        """
        if len(self) == 0:
            return False

        for i in range(len(self)):
            if not str._isdigit(self.ptr[i]):
                return False
        return True

    def islower(self) -> bool:
        """
        str.islower() -> bool

        Return True if all cased characters in str are lowercase and there is
        at least one cased character in str, False otherwise.
        """
        cased = False

        # For empty strings
        if len(self) == 0:
            return False

        # For single character strings
        if len(self) == 1:
            return str._islower(self.ptr[0])

        for i in range(len(self)):
            if str._isupper(self.ptr[i]):
                return False
            elif not cased and str._islower(self.ptr[i]):
                cased = True
        return cased

    def isupper(self) -> bool:
        """
        str.isupper() -> bool

        Return True if all cased characters in str are uppercase and there is
        at least one cased character in str, False otherwise.
        """
        cased = False

        # For empty strings
        if len(self) == 0:
            return False

        # For single character strings
        if len(self) == 1:
            return str._isupper(self.ptr[0])

        for i in range(len(self)):
            if str._islower(self.ptr[i]):
                return False
            elif not cased and str._isupper(self.ptr[i]):
                cased = True
        return cased

    def isalnum(self) -> bool:
        """
        str.isalnum() -> bool

        Return True if all characters in str are alphanumeric
        and there is at least one character in str, False otherwise.
        """
        if len(self) == 0:
            return False

        for i in range(len(self)):
            if not str._isalnum(self.ptr[i]):
                return False
        return True

    def isalpha(self) -> bool:
        """
        str.isalpha() -> bool

        Return True if all characters in str are alphabetic
        and there is at least one character in str, False otherwise.
        """
        if len(self) == 0:
            return False

        for i in range(len(self)):
            if not str._isalpha(self.ptr[i]):
                return False
        return True

    def isspace(self) -> bool:
        """
        str.isspace() -> bool

        Return True if all characters in str are whitespace
        and there is at least one character in str, False otherwise.
        """
        if len(self) == 0:
            return False

        for i in range(len(self)):
            if not str._isspace(self.ptr[i]):
                return False
        return True

    def istitle(self) -> bool:
        """
        str.istitle() -> bool

        Return True if str is a titlecased string and there is at least one
        character in str, i.e. uppercase characters may only follow uncased
        characters and lowercase characters only cased ones. Return False
        otherwise.
        """

        # For empty strings
        if len(self) == 0:
            return False

        # For single character strings
        if len(self) == 1:
            return str._isupper(self.ptr[0])

        cased = False
        prev_is_cased = False
        for i in range(len(self)):
            if str._isupper(self.ptr[i]):
                if prev_is_cased:
                    return False
                prev_is_cased = True
                cased = True
            elif str._islower(self.ptr[i]):
                if not prev_is_cased:
                    return False
                prev_is_cased = True
                cased = True
            else:
                prev_is_cased = False
        return cased

    def capitalize(self) -> str:
        """
        str.capitalize() -> copy of str

        Return a copy of str with only its first character capitalized (ASCII)
        and the rest lower-cased.
        """
        n = len(self)
        if n > 0:
            p = Ptr[byte](n)
            p[0] = str._toupper(self.ptr[0])
            for i in range(1, n):
                p[i] = str._tolower(self.ptr[i])
            return str(p, n)
        return ""

    def isdecimal(self) -> bool:
        """
        str.isdecimal() -> bool

        Return True if str is a decimal string, False otherwise.
        str is a decimal string if all characters in str are decimal and
        there is at least one character in str.
        """
        if len(self) == 0:
            return False

        for i in range(len(self)):
            # test ascii values 48-57 == 0-9
            if not (48 <= int(self.ptr[i]) <= 57):
                return False
        return True

    def lower(self) -> str:
        """
        str.lower() -> copy of str

        Return a copy of str with all ASCII characters converted to lowercase.
        """
        # Empty string
        n = len(self)
        if n == 0:
            return ""
        p = Ptr[byte](n)
        for i in range(n):
            p[i] = str._tolower(self.ptr[i])
        return str(p, n)

    def upper(self) -> str:
        """
        str.upper() -> copy of str

        Return a copy of str with all ASCII characters converted to uppercase.
        """
        # Empty string
        n = len(self)
        if n == 0:
            return ""
        p = Ptr[byte](n)
        for i in range(n):
            p[i] = str._toupper(self.ptr[i])
        return str(p, n)

    def isascii(self) -> bool:
        """
        str.isascii() -> bool

        Return True if str is empty or all characters in str are ASCII,
        False otherwise.
        """
        for i in range(len(self)):
            if int(self.ptr[i]) >= 128:
                return False
        return True

    def casefold(self) -> str:
        """
        str.casefold() -> copy of str

        Return a version of the string suitable for caseless comparisons.

        Unlike Python, casefold() deals with just ASCII characters.
        """
        return self.lower()

    def swapcase(self) -> str:
        """
        str.swapcase() -> copy of str

        Return a copy of str with uppercase ASCII characters converted
        to lowercase ASCII and vice versa.
        """
        # Empty string
        n = len(self)
        if n == 0:
            return ""
        p = Ptr[byte](n)
        for i in range(n):
            if str._islower(self.ptr[i]):
                p[i] = str._toupper(self.ptr[i])
            elif str._isupper(self.ptr[i]):
                p[i] = str._tolower(self.ptr[i])
            else:
                p[i] = self.ptr[i]
        return str(p, n)

    def title(self) -> str:
        """
        str.title() -> copy of str

        Return a titlecased version of str, i.e. ASCII words start with uppercase
        characters, all remaining cased characters have lowercase.
        """
        prev_is_cased = False

        n = len(self)
        if n == 0:
            return ""

        p = Ptr[byte](n)
        for i in range(n):
            if str._islower(self.ptr[i]):
                # lowercase to uppercase
                if not prev_is_cased:
                    p[i] = str._toupper(self.ptr[i])
                else:
                    p[i] = self.ptr[i]
                prev_is_cased = True
            elif str._isupper(self.ptr[i]):
                # uppercase to lowercase
                if prev_is_cased:
                    p[i] = str._tolower(self.ptr[i])
                else:
                    p[i] = self.ptr[i]
                prev_is_cased = True
            else:
                p[i] = self.ptr[i]
                prev_is_cased = False
        return str(p, n)

    def isnumeric(self) -> bool:
        """
        str.isdecimal() -> bool

        Return True if the string is a numeric string, False otherwise.
        A string is numeric if all characters in the string are numeric
        and there is at least one character in the string.

        Unlike Python, isnumeric() deals with just ASCII characters.
        """
        return self.isdecimal()

    def _build(*args):
        total = 0
        for t in args:
            if isinstance(t, str):
                total += len(t)
            else:
                total += len(t[0]) * t[1]
        p = Ptr[byte](total)
        i = 0

        for t in args:
            if isinstance(t, str):
                str.memcpy(p + i, t.ptr, t.len)
                i += t.len
            else:
                s, n = t
                for _ in range(n):
                    str.memcpy(p + i, s.ptr, s.len)
                    i += s.len

        return str(p, total)

    def ljust(self, width: int, fillchar: str = " ") -> str:
        """
        ljust(width[, fillchar]) -> string

        Return a left-justified string of length width.

        Padding is done using the specified fill character (default is a space).
        """
        if len(fillchar) != 1:
            raise ValueError("The fill character must be exactly one character long")
        if width <= len(self):
            return self
        return str._build(self, (fillchar, width - len(self)))

    def rjust(self, width: int, fillchar: str = " ") -> str:
        """
        rjust(width[, fillchar]) -> string

        Return a right-justified string of length width.

        Padding is done using the specified fill character (default is a space).
        """
        if len(fillchar) != 1:
            raise ValueError("The fill character must be exactly one character long")
        if width <= len(self):
            return self
        return str._build((fillchar, width - len(self)), self)

    def center(self, width: int, fillchar: str = " ") -> str:
        """
        str.center(width[, fillchar]) -> string

        Return str centered in a string of length width. Padding is
        done using the specified fill character (default is a space)
        """
        if len(fillchar) != 1:
            raise ValueError("The fill character must be exactly one character long")
        if width <= len(self):
            return self

        pad = width - len(self)
        left_pad = pad // 2
        right_pad = width - len(self) - left_pad
        return str._build((fillchar, left_pad), self, (fillchar, right_pad))

    def zfill(self, width: int) -> str:
        """
        str.zfill(width) -> string

        Pad a numeric string str with zeros on the left, to fill a field
        of the specified width.  The string str is never truncated.
        """
        if len(self) >= width:
            return self

        plus = byte(43)   # +
        minus = byte(45)  # -
        zero = byte(48)   # 0

        zf = self.rjust(width, '0')
        fill = width - len(self)
        p = zf.ptr

        if len(self) > 0 and (p[fill] == plus or p[fill] == minus):
            p[0] = p[fill]
            p[fill] = zero

        return zf

    def count(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
        """
        str.count(sub[, start[, end]]) -> int

        Return the number of occurrences of subsection sub in
        bytes str[start:end].  Optional arguments start and end are interpreted
        as in slice notation.
        """
        end: int = end if end is not None else len(self)
        start, end = self._correct_indices(start, end)
        if end - start < len(sub):
            return 0
        return algorithms.count(self._slice(start, end), sub)

    def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
        """
        str.find(sub [,start [,end]]) -> int

        Return the lowest index in str where substring sub is found,
        such that sub is contained within str[start:end].  Optional
        arguments start and end are interpreted as in slice notation.

        Return -1 on failure.
        """
        end: int = end if end is not None else len(self)
        start, end = self._correct_indices(start, end)
        if end - start < len(sub):
            return -1
        pos = algorithms.find(self._slice(start, end), sub)
        return pos if pos < 0 else pos + start

    def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
        """
        str.rfind(sub [,start [,end]]) -> int

        Return the highest index in str where substring sub is found,
        such that sub is contained within str[start:end].  Optional
        arguments start and end are interpreted as in slice notation.

        Return -1 on failure.
        """
        end: int = end if end is not None else len(self)
        start, end = self._correct_indices(start, end)
        if end - start < len(sub):
            return -1
        pos = algorithms.rfind(self._slice(start, end), sub)
        return pos if pos < 0 else pos + start

    def isidentifier(self) -> bool:
        """
        str.isidentifier() -> bool

        Return True if the string is a valid identifier, False otherwise.
        Unlike Python, isidentifier() deals with just ASCII characters.
        """
        # empty string
        if len(self) == 0:
            return False

        # is not a letter or _
        first = self._at(0)
        if not first.isalpha():
            if first != "_":
                return False

        if first.isalpha() or first == "_":
            for i in range(1, len(self)):
                ith = self._at(i)
                if not ith.isalpha():
                    if not ith.isdecimal():
                        if ith != "_":
                            return False

        return True

    def isprintable(self) -> bool:
        """
        str.isprintable() -> bool

        Return True if the string is printable or empty, False otherwise.
        Unlike Python, isprintable() deals with just ASCII characters.
        """
        for i in range(len(self)):
            if not (31 < int(self.ptr[i]) < 128):
                return False
        return True

    def _has_char(self, chars: str) -> bool:
        s = self._at(0)
        if chars:
            for c in chars:
                if s == c:
                    return True
            return False
        else:
            return s.isspace()

    def lstrip(self, chars: str = "") -> str:
        """
        str.lstrip([chars]) -> string

        Return a copy of the string str with leading whitespace removed.
        If chars is given, remove characters in chars instead.
        Unlike Python, lstrip() deals with just ASCII characters.
        """
        i = 0
        while i < len(self) and self._at(i)._has_char(chars):
            i += 1
        return self._slice(i, len(self))

    def rstrip(self, chars: str = "") -> str:
        """
        str.rstrip([chars]) -> string

        Return a copy of the string str with trailing whitespace removed.
        If chars is given, remove characters in chars instead.
        Unlike Python, rstrip() deals with just ASCII characters.
        """
        i = len(self) - 1
        while i >= 0 and self._at(i)._has_char(chars):
            i -= 1
        return self._slice(0, i + 1)

    def strip(self, chars: str = "") -> str:
        """
        str.strip([chars]) -> string

        Return a copy of the string str with leading and trailing
        whitespace removed.
        If chars is given, remove characters in chars instead.
        Unlike Python, strip() deals with just ASCII characters.
        """
        return self.lstrip(chars).rstrip(chars)

    def partition(self, sep: str) -> Tuple[str, str, str]:
        """
        Search for the separator sep in str, and return the part before it,
        the separator itself, and the part after it.  If the separator is not
        found, return str and two empty strings.
        """
        if not sep:
            raise ValueError("empty separator")
        pos = algorithms.find(self, sep)
        if pos < 0:
            return self, "", ""
        return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self))

    def rpartition(self, sep: str) -> Tuple[str, str, str]: # XXX
        """
        Search for the separator sep in str, starting at the end of str, and return
        the part before it, the separator itself, and the part after it.  If the
        separator is not found, return two empty strings and str.
        """
        if not sep:
            raise ValueError("empty separator")
        pos = algorithms.rfind(self, sep)
        if pos < 0:
            return "", "", self
        return self._slice(0, pos), sep, self._slice(pos + len(sep), len(self))

    def split(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]:
        """
        str.split([sep [,maxsplit]]) -> list of strings

        Return a list of the words in the string str, using sep as the
        delimiter string.  If maxsplit is given, at most maxsplit
        splits are done. If sep is not specified, any
        whitespace string is a separator and empty strings are removed
        from the result.
        """
        if sep is None:
            return self._split_whitespace(
                maxsplit if maxsplit >= 0 else _MAX
            )
        sep: str = sep

        if len(sep) == 0:
            raise ValueError("empty separator")

        # special case for length-1 pattern
        if len(sep) == 1:
            return self._split_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX)

        MAX_PREALLOC = 12
        maxsplit = maxsplit if maxsplit >= 0 else _MAX
        prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1
        v = List[str](capacity=prealloc_size)
        i = 0
        j = 0
        n = len(self)

        while maxsplit > 0:
            maxsplit -= 1
            pos = algorithms.find(self._slice(i, n), sep)
            if pos < 0:
                break
            j = i + pos
            v.append(self._slice(i, j))
            i = j + len(sep)

        v.append(self._slice(i, n))
        return v

    def rsplit(self, sep: Optional[str] = None, maxsplit: int = -1) -> List[str]:
        """
        str.rsplit([sep [,maxsplit]]) -> list of strings

        Return a list of the words in the string str, using sep as the
        delimiter string, starting at the end of the string and working
        to the front.  If maxsplit is given, at most maxsplit splits are
        done. If sep is not specified, any whitespace string
        is a separator.
        """
        if sep is None:
            return self._rsplit_whitespace(
                maxsplit if maxsplit >= 0 else _MAX
            )
        sep: str = sep

        if len(sep) == 0:
            raise ValueError("empty separator")

        # special case for length-1 pattern
        if len(sep) == 1:
            return self._rsplit_char(sep.ptr[0], maxsplit if maxsplit >= 0 else _MAX)

        MAX_PREALLOC = 12
        maxsplit = maxsplit if maxsplit >= 0 else _MAX
        prealloc_size = MAX_PREALLOC if maxsplit >= MAX_PREALLOC else maxsplit + 1
        v = List[str](capacity=prealloc_size)
        i = 0
        j = len(self)
        n = j

        while maxsplit > 0:
            maxsplit -= 1
            pos = algorithms.rfind(self._slice(0, j), sep)
            if pos < 0:
                break
            v.append(self._slice(pos + len(sep), j))
            j = pos

        v.append(self._slice(0, j))
        v.reverse()
        return v

    def splitlines(self, keepends: bool = False) -> List[str]:
        """
        str.splitlines([keepends]) -> list of strings

        Return a list of the lines in str, breaking at line boundaries.
        Line breaks are not included in the resulting list unless keepends
        is given and true.
        """
        v = []
        i = 0
        j = 0
        n = len(self)

        break_r = byte(13)  # \r
        break_n = byte(10)  # \n

        while i < n:
            while i < n and not (self.ptr[i] == break_r or self.ptr[i] == break_n):
                i += 1

            eol = i
            if i < n:
                if self.ptr[i] == break_r and i + 1 < n and self.ptr[i + 1] == break_n:
                    i += 2
                else:
                    i += 1
                if keepends:
                    eol = i

            if j == 0 and eol == n:
                v.append(self)
                break

            v.append(self._slice(j, eol))
            j = i

        return v

    def startswith(
        self, prefix: str, start: int = 0, end: Optional[int] = None
    ) -> bool:
        """
        str.startswith(prefix[, start[, end]]) -> bool

        Return True if str starts with the specified prefix, False otherwise.
        With optional start, test str beginning at that position.
        With optional end, stop comparing str at that position.
        """
        end: int = end if end is not None else len(self)
        if end < 0:
            end += len(self)
        elif start < 0:
            start += len(self)

        # length prefix is longer than range of string being compared to
        if start + len(prefix) > len(self):
            return False

        # length of prefix is longer than range of string[start:end]
        if end - start < len(prefix):
            return False

        # prefix is an empty string
        if not prefix:
            return True

        return prefix == self._slice(start, start + len(prefix))

    def endswith(self, suffix: str, start: int = 0, end: Optional[int] = None) -> bool:
        """
        str.endswith(prefix[, start[, end]]) -> bool

        Return True if str ends with the specified suffix, False otherwise.
        With optional start, test str beginning at that position.
        With optional end, stop comparing str at that position.
        """
        end: int = end if end is not None else len(self)
        if end < 0:
            end += len(self)
        elif start < 0:
            start += len(self)
        if end > len(self):
            end = len(self)

        # length prefix is longer than range of string being compared to
        if end - start < len(suffix) or start > len(self):
            return False

        if end - len(suffix) > start:
            start = end - len(suffix)

        # length of prefix is longer than range of string[start:end]
        if end - start < len(suffix):
            return False

        # prefix is an empty string
        if not suffix:
            return True

        return suffix == self._slice(start, start + len(suffix))

    def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
        """
        str.index(sub [,start [,end]]) -> int

        Like str.find() but raise ValueError when the substring is not found.
        """
        i = self.find(sub, start, end)
        if i == -1:
            raise ValueError("substring not found")
        else:
            return i

    def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> int:
        """
        str.index(sub [,start [,end]]) -> int

        Like str.find() but raise ValueError when the substring is not found.
        """
        i = self.rfind(sub, start, end)
        if i == -1:
            raise ValueError("substring not found")
        else:
            return i

    def replace(self, old: str, new: str, maxcount: int = -1) -> str:
        """
        str.replace(old, new[, count]) -> string

        Return a copy of string str with all occurrences of substring
        old replaced by new.  If the optional argument maxcount is
        given, only the first maxcount occurrences are replaced.
        """
        return self._replace(old, new, maxcount)

    def expandtabs(self, tabsize: int = 8) -> str:
        """
        str.expandtabs([tabsize]) -> string

        Return a copy of str where all tab characters are expanded using spaces.
        If tabsize is not given, a tab size of 8 characters is assumed.
        """
        i = 0
        j = 0
        p = self.ptr
        e = p + len(self)

        break_r = byte(13)  # \r
        break_n = byte(10)  # \n
        tab = byte(9)       # \t
        space = byte(32)    # ' '

        def overflow():
            raise OverflowError("result too long")

        while p < e:
            if p[0] == tab:
                if tabsize > 0:
                    incr = tabsize - (j % tabsize)
                    if j > _MAX - incr:
                        overflow()
                    j += incr
            else:
                if j > _MAX - 1:
                    overflow()
                j += 1
                if p[0] == break_n or p[0] == break_r:
                    if i > _MAX - j:
                        overflow()
                    i += j
                    j = 0
            p += 1

        if i > _MAX - j:
            overflow()

        u_len = i + j
        u = Ptr[byte](u_len)
        j = 0
        q = u
        p = self.ptr

        while p < e:
            if p[0] == tab:
                if tabsize > 0:
                    i = tabsize - (j % tabsize)
                    j += i
                    while True:
                        k = i
                        i -= 1
                        if k == 0:
                            break
                        q[0] = space
                        q += 1
            else:
                j += 1
                q[0] = p[0]
                q += 1
                if p[0] == break_n or p[0] == break_r:
                    j = 0
            p += 1

        return str(u, u_len)

    def translate(self, map) -> str:
        """
        Return a copy with each character mapped by the given translation table.
        """
        n = len(self)
        m = 0

        for i in range(n):
            key = int(self.ptr[i])
            if key in map:
                val = map[key]
                if val is not None:
                    m += len(val)
            else:
                m += 1

        p = Ptr[byte](m)
        q = p

        for i in range(n):
            key = int(self.ptr[i])
            if key in map:
                val = map[key]
                if val is not None:
                    str.memcpy(q, val.ptr, len(val))
                    q += len(val)
            else:
                q[0] = self.ptr[i]
                q += 1

        return str(p, m)


    # Internal helpers

    def _correct_indices(self, start: int, end: int) -> Tuple[int, int]:
        n = len(self)

        if end > n:
            end = n
        elif end < 0:
            end += n
            if end < 0:
                end = 0

        if start < 0:
            start += n
            if start < 0:
                start = 0

        return (start, end)

    def _split_whitespace(self, maxcount: int) -> List[str]:
        PREALLOC_MAX = 12
        l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)

        str_len = len(self)
        i = 0
        j = 0
        while maxcount > 0:
            maxcount -= 1
            while i < str_len and str._isspace(self.ptr[i]):
                i += 1
            if i == str_len:
                break
            j = i
            i += 1
            while i < str_len and not str._isspace(self.ptr[i]):
                i += 1
            l.append(self._slice(j, i))

        if i < str_len:
            while i < str_len and str._isspace(self.ptr[i]):
                i += 1
            if i != str_len:
                l.append(self._slice(i, str_len))

        return l

    def _rsplit_whitespace(self, maxcount: int) -> List[str]:
        PREALLOC_MAX = 12
        l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)

        str_len = len(self)
        i = str_len - 1
        j = str_len - 1
        while maxcount > 0:
            maxcount -= 1
            while i >= 0 and str._isspace(self.ptr[i]):
                i -= 1
            if i < 0:
                break
            j = i
            i -= 1
            while i >= 0 and not str._isspace(self.ptr[i]):
                i -= 1
            l.append(self._slice(i + 1, j + 1))

        if i >= 0:
            while i >= 0 and str._isspace(self.ptr[i]):
                i -= 1
            if i >= 0:
                l.append(self._slice(0, i + 1))

        l.reverse()
        return l

    def _split_char(self, char: byte, maxcount: int) -> List[str]:
        PREALLOC_MAX = 12
        l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)

        str_len = len(self)
        i = 0
        j = 0

        while i < str_len and maxcount > 0:
            if self.ptr[i] == char:
                l.append(self._slice(j, i))
                j = i + 1
                maxcount -= 1
            i += 1

        l.append(self._slice(j, str_len))
        return l

    def _rsplit_char(self, char: byte, maxcount: int) -> List[str]:
        PREALLOC_MAX = 12
        l = List[str](PREALLOC_MAX if maxcount >= PREALLOC_MAX else maxcount + 1)

        str_len = len(self)
        i = str_len - 1
        j = str_len - 1

        while i >= 0 and maxcount > 0:
            if self.ptr[i] == char:
                l.append(self._slice(i + 1, j + 1))
                j = i - 1
                maxcount -= 1
            i -= 1

        l.append(self._slice(0, j + 1))
        l.reverse()
        return l

    def _findchar(self, c: byte):
        return _C.memchr(self.ptr, i32(int(c)), len(self))

    def _countchar(self, c: byte, maxcount: int):
        count = 0
        start = self.ptr
        end = start + len(self)

        while True:
            start = str(start, end - start)._findchar(c)
            if not start:
                break
            count += 1
            if count >= maxcount:
                break
            start += 1
        return count

    def _replace_interleave(self, to: str, maxcount: int):
        self_s = self.ptr
        self_len = len(self)
        to_len = len(to)
        to_s = to.ptr
        count = 0
        i = 0

        if maxcount <= self_len:
            count = maxcount
        else:
            count = self_len + 1

        # assert count > 0
        if to_len > (_MAX - self_len) // count:
            raise OverflowError("replace bytes is too long")

        result_len = count * to_len + self_len
        result_s = Ptr[byte](result_len)
        result_s0 = result_s

        if to_len > 1:
            str.memcpy(result_s, to_s, to_len)
            result_s += to_len
            count -= 1

            while i < count:
                result_s[0] = self_s[0]
                result_s += 1
                self_s += 1
                str.memcpy(result_s, to_s, to_len)
                result_s += to_len
                i += 1
        else:
            result_s[0] = to_s[0]
            result_s += to_len
            count -= 1

            while i < count:
                result_s[0] = self_s[0]
                result_s += 1
                self_s += 1
                result_s[0] = to_s[0]
                result_s += to_len
                i += 1

        str.memcpy(result_s, self_s, self_len - i)
        return str(result_s0, result_len)

    def _replace_delete_single_character(self, from_c: byte, maxcount: int):
        self_len = len(self)
        self_s = self.ptr

        count = self._countchar(from_c, maxcount)
        if count == 0:
            return self

        result_len = self_len - count
        # assert result_len >= 0
        result_s = Ptr[byte](result_len)
        result_s0 = result_s

        start = self_s
        end = self_s + self_len
        while count > 0:
            count -= 1
            nxt = str(start, end - start)._findchar(from_c)
            if not nxt:
                break
            str.memcpy(result_s, start, nxt - start)
            result_s += nxt - start
            start = nxt + 1

        str.memcpy(result_s, start, end - start)
        return str(result_s0, result_len)

    def _replace_delete_substring(self, from_s: str, maxcount: int):
        self_len = len(self)
        self_s = self.ptr
        from_len = len(from_s)

        count = algorithms.count_with_max(self, from_s, maxcount)
        if count == 0:
            return self

        result_len = self_len - (count * from_len)
        # assert result_len >= 0
        result_s = Ptr[byte](result_len)
        result_s0 = result_s

        start = self_s
        end = self_s + self_len
        while count > 0:
            count -= 1
            offset = algorithms.find(str(start, end - start), from_s)
            if offset == -1:
                break
            nxt = start + offset
            str.memcpy(result_s, start, nxt - start)
            result_s += nxt - start
            start = nxt + from_len

        str.memcpy(result_s, start, end - start)
        return str(result_s0, result_len)

    def _replace_single_character_in_place(self, from_c: byte, to_c: byte, maxcount: int):
        self_s = self.ptr
        self_len = len(self)

        nxt = self._findchar(from_c)
        if not nxt:
            return self

        result_s = Ptr[byte](self_len)
        str.memcpy(result_s, self_s, self_len)

        start = result_s + (nxt - self_s)
        start[0] = to_c
        start += 1
        end = result_s + self_len
        maxcount -= 1

        while maxcount > 0:
            maxcount -= 1
            nxt = str(start, end - start)._findchar(from_c)
            if not nxt:
                break
            nxt[0] = to_c
            start = nxt + 1

        return str(result_s, self_len)

    def _replace_substring_in_place(self, from_s: str, to: str, maxcount: int):
        self_s = self.ptr
        self_len = len(self)
        from_len = len(from_s)
        to_s = to.ptr

        offset = algorithms.find(self, from_s)
        if offset == -1:
            return self

        result_s = Ptr[byte](self_len)
        str.memcpy(result_s, self_s, self_len)

        start = result_s + offset
        str.memcpy(start, to_s, from_len)
        start += from_len
        end = result_s + self_len
        maxcount -= 1

        while maxcount > 0:
            maxcount -= 1
            offset = algorithms.find(str(start, end - start), from_s)
            if offset == -1:
                break
            str.memcpy(start + offset, to_s, from_len)
            start += offset + from_len

        return str(result_s, self_len)

    def _replace_single_character(self, from_c: byte, to_s: str, maxcount: int):
        self_s = self.ptr
        self_len = len(self)
        to_len = len(to_s)

        count = self._countchar(from_c, maxcount)
        if count == 0:
            return self

        # assert count > 0
        if to_len - 1 > (_MAX - self_len) // count:
            raise OverflowError("replace bytes is too long")

        result_len = self_len + count * (to_len - 1)
        result_s = Ptr[byte](result_len)
        result_s0 = result_s

        start = self_s
        end = self_s + self_len
        while count > 0:
            count -= 1
            nxt = str(start, end - start)._findchar(from_c)
            if not nxt:
                break

            if nxt == start:
                str.memcpy(result_s, to_s.ptr, to_len)
                result_s += to_len
                start += 1
            else:
                str.memcpy(result_s, start, nxt - start)
                result_s += (nxt - start)
                str.memcpy(result_s, to_s.ptr, to_len)
                result_s += to_len
                start = nxt + 1

        str.memcpy(result_s, start, end - start)
        return str(result_s0, result_len)

    def _replace_substring(self, from_s: str, to_s: str, maxcount: int):
        self_s = self.ptr
        self_len = len(self)
        from_len = len(from_s)
        to_len = len(to_s)

        count = algorithms.count_with_max(self, from_s, maxcount)
        if count == 0:
            return self

        # assert count > 0
        if to_len - from_len > (_MAX - self_len) // count:
            raise OverflowError("replace bytes is too long")

        result_len = self_len + count * (to_len - from_len)
        result_s = Ptr[byte](result_len)
        result_s0 = result_s

        start = self_s
        end = self_s + self_len
        while count > 0:
            count -= 1
            offset = algorithms.find(str(start, end - start), from_s)
            if offset == -1:
                break

            nxt = start + offset
            if nxt == start:
                str.memcpy(result_s, to_s.ptr, to_len)
                result_s += to_len
                start += from_len
            else:
                str.memcpy(result_s, start, nxt - start)
                result_s += (nxt - start)
                str.memcpy(result_s, to_s.ptr, to_len)
                result_s += to_len
                start = nxt + from_len

        str.memcpy(result_s, start, end - start)
        return str(result_s0, result_len)

    def _replace(self, from_s: str, to_s: str, maxcount: int):
        self_len = len(self)
        from_len = len(from_s)
        to_len = len(to_s)

        if self_len < from_len:
            return self

        if maxcount < 0:
            maxcount = _MAX
        elif maxcount == 0:
            return self

        if from_len == 0:
            if to_len == 0:
                return self
            return self._replace_interleave(to_s, maxcount)

        if to_len == 0:
            if from_len == 1:
                return self._replace_delete_single_character(from_s.ptr[0], maxcount)
            return self._replace_delete_substring(from_s, maxcount)

        if from_len == to_len:
            if from_len == 1:
                return self._replace_single_character_in_place(from_s.ptr[0], to_s.ptr[0], maxcount)
            return self._replace_substring_in_place(from_s, to_s, maxcount)

        if from_len == 1:
            return self._replace_single_character(from_s.ptr[0], to_s, maxcount)
        else:
            return self._replace_substring(from_s, to_s, maxcount)