Source code for pybase64._fallback

from __future__ import annotations

import sys
from base64 import b64decode as builtin_decode
from base64 import b64encode as builtin_encode
from base64 import encodebytes as builtin_encodebytes
from binascii import Error as BinAsciiError

from pybase64._unspecified import _Unspecified

TYPE_CHECKING = False
if TYPE_CHECKING:
    from typing import Final, Literal

    from pybase64._typing import Buffer


_SLOW_VALIDATION: Final = sys.version_info[:2] < (3, 13)  # fast/correct validation in CPython 3.13+
_PYTHON_3_15_API: Final = sys.hexversion >= 0x030F00A8  # in 3.15.0a8, move sys.version_info check
_BYTES_TYPES: Final = (bytes, bytearray)  # Types acceptable as binary data
_EQUAL_ASCII: Final = 61  # '='
_UNSPECIFIED: Final = _Unspecified.UNSPECIFIED

if not _PYTHON_3_15_API:
    # we consider '=' part of the alphabet, it will be handled separately
    _BASE64_ALPHABET = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
    # we do not keep '=' on purpose, it will be handled separately
    _IGNORECHARS_VALIDATE_FALSE: Final = bytes(i for i in range(256) if i not in _BASE64_ALPHABET)


def _get_simd_name(flags: int) -> str:
    assert flags == 0  # noqa: S101
    return "fallback"


def _get_simd_path() -> int:
    return 0


def _get_bytes(s: str | Buffer, *, allow_str: bool = True) -> bytes | bytearray:
    if isinstance(s, str):
        if not allow_str:
            msg = "argument should be a bytes-like object"
            raise TypeError(msg) from None
        try:
            return s.encode("ascii")
        except UnicodeEncodeError:
            msg = "string argument should contain only ASCII characters"
            raise ValueError(msg) from None
    if isinstance(s, _BYTES_TYPES):
        return s
    try:
        mv = memoryview(s)
        if not mv.c_contiguous:
            msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous"
            raise BufferError(msg)
        return mv.tobytes()
    except TypeError:
        msg = (
            "argument should be a bytes-like object or ASCII "
            f"string, not {s.__class__.__name__!r:s}"
        )
        raise TypeError(msg) from None


def _validate_altchars(altchars: bytes | bytearray) -> bytes | bytearray | None:
    if len(altchars) != 2:
        msg = "len(altchars) != 2"
        raise ValueError(msg) from None
    if altchars == b"+/":
        return None
    return altchars


[docs] def b64decode( # noqa: C901 s: str | Buffer, altchars: str | Buffer | None = None, validate: bool | Literal[_Unspecified.UNSPECIFIED] = _UNSPECIFIED, *, padded: bool = True, ignorechars: Buffer | Literal[_Unspecified.UNSPECIFIED] = _UNSPECIFIED, canonical: bool = False, ) -> bytes: """Decode bytes encoded with the standard Base64 alphabet. Argument ``s`` is a :term:`bytes-like object` or ASCII string to decode. Optional ``altchars`` must be a :term:`bytes-like object` or ASCII string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. If ``ignorechars`` is specified, it should be a :term:`bytes-like object` containing characters to ignore from the input when ``validate`` is ``True``. If ``ignorechars`` contains the pad character ``'='``, the pad characters presented before the end of the encoded data and the excess pad characters will be ignored. The default value of ``validate`` is ``True`` if ``ignorechars`` is specified, ``False`` otherwise. If ``validate`` is ``False``, characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. If ``validate`` is ``True``, these non-alphabet characters in the input result in a :exc:`binascii.Error`. If ``padded`` is ``False``, padding in the input is not required or not allowed when ``validate`` is ``True`` and ``ignorechars`` does not contain the pad character ``'='``. If ``canonical`` is ``True``, non-zero padding bits are rejected. The result is returned as a :class:`bytes` object. A :exc:`binascii.Error` is raised if ``s`` is incorrectly padded. """ s = _get_bytes(s) has_bad_chars = False if altchars is not None: altchars = _validate_altchars(_get_bytes(altchars)) if validate is _UNSPECIFIED: validate = ignorechars is not _UNSPECIFIED if ignorechars is not _UNSPECIFIED and not validate: msg = "validate must be True or unspecified when ignorechars is specified" raise ValueError(msg) if _PYTHON_3_15_API: kwargs: dict[str, bool | Buffer] = { "validate": validate, "padded": padded, "canonical": canonical, } if ignorechars is not _UNSPECIFIED: kwargs["ignorechars"] = ignorechars return builtin_decode(s, altchars, **kwargs) # type: ignore[arg-type] if ignorechars is not _UNSPECIFIED: ignorechars_ = _get_bytes(ignorechars, allow_str=False) if altchars is not None: if ignorechars is _UNSPECIFIED: for b in b"+/": if b not in altchars and b in s: has_bad_chars = True break trans = bytes.maketrans(altchars, b"+/") s = s.translate(trans) else: trans_in_add = set(b"+/") - set(altchars) if len(trans_in_add) == 2: # we don't want to use an unordered set for 2 elements trans = bytes.maketrans(altchars + b"+/", b"+/" + altchars) else: # 0 or 1 element in the set trans = bytes.maketrans( altchars + bytes(trans_in_add), b"+/" + bytes(set(altchars) - set(b"+/")), ) s = s.translate(trans) ignorechars_ = ignorechars_.translate(trans) ignore_equal = not validate if (not validate) or (ignorechars is not _UNSPECIFIED): # we need to filter s before calling builtin_decode this might be quite slow if not validate: ignore_equal = True ignorechars_ = _IGNORECHARS_VALIDATE_FALSE else: ignore_equal = 61 in ignorechars_ ignorechars_ = bytes(set(ignorechars_) - set(_BASE64_ALPHABET)) if ignorechars_: s = s.translate(None, delete=ignorechars_) if ignore_equal and s: if s[-1] != 61: # there's data at the end, strip all padding bytes s = s.translate(None, delete=b"=") else: # get s without padding last_equal = len(s) - 1 while (last_equal >= 0) and s[last_equal] == 61: last_equal -= 1 last_equal += 1 equal_count = len(s) - last_equal s2 = s[:last_equal].translate(None, delete=b"=") quad_pos = len(s2) % 4 if quad_pos in {0, 1} or ( quad_pos == 2 and equal_count == 1 ): # 0 is OK, 1 will fail s = s2 else: s = s2 + b"=" * (4 - quad_pos) # we always do validation, start with a simple check if s: quad_pos = len(s) % 4 if ( not padded and not ignore_equal and quad_pos in {0, 3} and (s[-2] == _EQUAL_ASCII or s[-1] == _EQUAL_ASCII) ): msg = "Padding not allowed" raise BinAsciiError(msg) if quad_pos != 0: if padded or quad_pos not in {2, 3}: msg = "Incorrect padding" if padded else "Invalid len" raise BinAsciiError(msg) # handle this by adding padding s = s + b"=" * (4 - quad_pos) if _SLOW_VALIDATION: result = builtin_decode(s, validate=False) # check length of result vs length of input expected_len = 0 if s: padding = 0 # len(s) % 4 != 0 implies len(s) >= 4 here if s[-2] == _EQUAL_ASCII: padding += 1 if s[-1] == _EQUAL_ASCII: padding += 1 expected_len = 3 * (len(s) // 4) - padding if expected_len != len(result): msg = "Non-base64 digit found" raise BinAsciiError(msg) else: result = builtin_decode(s, validate=True) if result and canonical: # check padding padding_bits = False if s[-2] == _EQUAL_ASCII: padding_bits = s[-3] not in b"AQgw" elif s[-1] == _EQUAL_ASCII: padding_bits = s[-2] not in b"048AEIMQUYcgkosw" if padding_bits: msg = "Non-zero padding bits" raise BinAsciiError(msg) if has_bad_chars: import warnings # noqa: PLC0415 lazy import msg = f"invalid characters '+' or '/' in Base64 data with altchars={altchars!r}" if validate: msg = f"{msg} and validate=True will be an error in future versions" warnings.warn(msg, DeprecationWarning, stacklevel=2) else: msg = f"{msg} and validate=False will be discarded in future versions" warnings.warn(msg, FutureWarning, stacklevel=2) return result
[docs] def b64decode_as_bytearray( s: str | Buffer, altchars: str | Buffer | None = None, validate: bool | Literal[_Unspecified.UNSPECIFIED] = _UNSPECIFIED, *, padded: bool = True, ignorechars: Buffer | Literal[_Unspecified.UNSPECIFIED] = _UNSPECIFIED, canonical: bool = False, ) -> bytearray: """Decode bytes encoded with the standard Base64 alphabet. Argument ``s`` is a :term:`bytes-like object` or ASCII string to decode. Optional ``altchars`` must be a :term:`bytes-like object` or ASCII string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. If ``ignorechars`` is specified, it should be a :term:`bytes-like object` containing characters to ignore from the input when ``validate`` is ``True``. If ``ignorechars`` contains the pad character ``'='``, the pad characters presented before the end of the encoded data and the excess pad characters will be ignored. The default value of ``validate`` is ``True`` if ``ignorechars`` is specified, ``False`` otherwise. If ``validate`` is ``False``, characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. If ``validate`` is ``True``, these non-alphabet characters in the input result in a :exc:`binascii.Error`. If ``padded`` is ``False``, padding in the input is not required or not allowed when ``validate`` is ``True`` and ``ignorechars`` does not contain the pad character ``'='``. If ``canonical`` is ``True``, non-zero padding bits are rejected. The result is returned as a :class:`bytearray` object. A :exc:`binascii.Error` is raised if ``s`` is incorrectly padded. """ return bytearray( b64decode( s, altchars=altchars, validate=validate, padded=padded, ignorechars=ignorechars, canonical=canonical, ), )
[docs] def b64encode( s: Buffer, altchars: str | Buffer | None = None, *, padded: bool = True, wrapcol: int = 0, ) -> bytes: r"""Encode bytes using the standard Base64 alphabet. Argument ``s`` is a :term:`bytes-like object` to encode. Optional ``altchars`` must be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. Optional ``padded`` specifies whether to pad the encoded data with the '=' character to a size multiple of 4. Optional ``wrapcol`` specifies after how many characters the output should be split with a newline character (``b'\n'``). The value is rounded down to the nearest multiple of 4. If ``wrapcol`` is 0 (the default), no newlines are added. The result is returned as a :class:`bytes` object. """ mv = memoryview(s) if not mv.c_contiguous: msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous" raise BufferError(msg) if altchars is not None: altchars = _validate_altchars(_get_bytes(altchars)) if wrapcol < 0: msg = "wrapcol must be >= 0" raise ValueError(msg) if _PYTHON_3_15_API: return builtin_encode(s, altchars, padded=padded, wrapcol=wrapcol) # type: ignore[call-arg] encoded = builtin_encode(s, altchars) if encoded and not padded: # len is >= 4 if encoded[-2] == _EQUAL_ASCII: encoded = encoded[:-2] elif encoded[-1] == _EQUAL_ASCII: encoded = encoded[:-1] if wrapcol == 0 or not encoded: return encoded effective_wrapcol = (wrapcol // 4) * 4 or 4 return b"\n".join( encoded[i : i + effective_wrapcol] for i in range(0, len(encoded), effective_wrapcol) )
[docs] def b64encode_as_string( s: Buffer, altchars: str | Buffer | None = None, *, padded: bool = True, wrapcol: int = 0, ) -> str: r"""Encode bytes using the standard Base64 alphabet. Argument ``s`` is a :term:`bytes-like object` to encode. Optional ``altchars`` must be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. Optional ``padded`` specifies whether to pad the encoded data with the '=' character to a size multiple of 4. Optional ``wrapcol`` specifies after how many characters the output should be split with a newline character (``'\n'``). The value is rounded down to the nearest multiple of 4. If ``wrapcol`` is 0 (the default), no newlines are added. The result is returned as a :class:`str` object. """ return b64encode(s, altchars, padded=padded, wrapcol=wrapcol).decode("ascii")
[docs] def encodebytes(s: Buffer) -> bytes: r"""Encode bytes into a bytes object with newlines (b'\n') inserted after every 76 bytes of output, and ensuring that there is a trailing newline, as per :rfc:`2045` (MIME). Argument ``s`` is a :term:`bytes-like object` to encode. The result is returned as a :class:`bytes` object. """ # noqa: D205 mv = memoryview(s) if not mv.c_contiguous: msg = f"{s.__class__.__name__!r:s}: underlying buffer is not C-contiguous" raise BufferError(msg) return builtin_encodebytes(s)