
    ޺ia*                        % S r SSKJrJr  S\S\4S jrSr\\S'   \	" \5      r
\	\   \S'   \" \5       V Vs0 s H  u  pX_M	     snn r\\\4   \S	'   S
\S\4S jrS\S\S\4S jrS\S\4S jrS\S\S-  4S jrgs  snn f )a  Early detection of escape-sequence-based encodings (ISO-2022, HZ-GB-2312, UTF-7).

These encodings use ESC (0x1B), tilde (~), or plus (+) sequences to switch
character sets.  They must be detected before binary detection (ESC is a control
byte) and before ASCII detection (HZ-GB-2312 and UTF-7 use only printable ASCII
bytes plus their respective shift markers).

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )DETERMINISTIC_CONFIDENCEDetectionResultdatareturnc                     Sn U R                  SU5      nUS:X  a  gU R                  SUS-   5      nUS:X  a  gXS-   U n[        U5      S:  a*  [        U5      S-  S:X  a  [        S U 5       5      (       a  gUS-   nM|  )	zCheck that at least one ~{...~} region contains valid GB2312 byte pairs.

In HZ-GB-2312 GB mode, characters are encoded as pairs of bytes in the
0x21-0x7E range.  We require at least one region with a non-empty, even-
length run of such bytes.
r   T   ~{F   ~}   c              3   L   #    U  H  nS Us=:*  =(       a    S:*  Os  v   M     g7f)!   ~   N ).0bs     V/var/www/piano.thomer.com/venv/lib/python3.13/site-packages/chardet/pipeline/escape.py	<genexpr>(_has_valid_hz_regions.<locals>.<genexpr>$   s      6v!DA%%%%vs   "$)findlenall)r   startbeginendregions        r   _has_valid_hz_regionsr      s     E
		%'B;iiuqy)"9ai#& K1Fa1$6v666a     s@   ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
_B64_CHARS_UTF7_BASE64_B64_DECODE	b64_bytesc                    [        U 5      nUS-  nUS-  nUS:  a  [        U S      nSU-  S-
  nXE-  (       a  gUS-  n[        U5      nSnSn	Sn
U  H2  nUS-  [        U   -  nU	S-  n	U	S:  d  M  U	S-  n	X-	  S-  Xz'   U
S-  n
M4     Sn[        SUS-
  S	5       HW  nX}   S-  X}S-      -  nS
Us=::  a  S::  a  O  OU(       a    gSnM/  SUs=::  a  S::  a  O  OU(       d    gSnML  U(       a    gSnMY     U(       + $ )u  Check if base64 bytes decode to valid UTF-16BE with correct padding.

A valid UTF-7 shifted sequence must:
1. Contain at least 3 Base64 characters (18 bits, enough for one 16-bit
   UTF-16 code unit).
2. Have zero-valued trailing padding bits (the unused low bits of the last
   Base64 sextet after the last complete 16-bit code unit).
3. Decode to valid UTF-16BE — no lone surrogates.

This rejects accidental ``+<alphanum>-`` patterns found in URLs, MIME
boundaries, hex-encoded hashes (e.g. SHA-1 git refs), and other ASCII data.

The caller (``_has_valid_utf7_sequences``) already checks ``b64_len >= 3``
before calling this function, so *b64_bytes* is always at least 3 bytes.
      r   r	      F      r   i   i  Ti   i  )r   r    	bytearrayrange)r!   n
total_bitspadding_bitslast_valmask	num_bytesrawbit_buf	bit_countout_idxc	prev_highi	code_units                  r   _is_valid_utf7_b64r8   2   s8     	IAQJ ?Lay}-\!Q&?
 aI
I
CGIGa<;q>1Q	>NI#0D8CLqLG  I1i!mQ'Vq[CAJ.	Y(&(Iy*F*II ( =r   posc                     [         [        S5      -  nSnUS-
  nUS:  a/  X   nUS;   a  US-  nM  XR;   a  US-  nUS-  nO US:  $ US:  a  M/  US:  $ )aQ  Return True if the ``+`` at *pos* is embedded in a base64 stream.

Walks backward from *pos*, skipping CR/LF, and counts consecutive base64
characters (including ``=`` for padding).  If 4 or more are found, the
``+`` is likely part of a PEM certificate, email attachment, or similar
base64 blob rather than a real UTF-7 shift character.
   =r   r%   >   
         )r   	frozenset)r   r9   b64_with_padcountr6   r   s         r   _is_embedded_in_base64rB   o   s|     $0)D/#ALEaA
q&GFAQJEFAA: q& A:r   c                    Sn U R                  [        S5      U5      nUS:X  a  gUS-   nU[        U 5      :  a  X   [        S5      :X  a  US-   nMO  U[        U 5      :  a\  X   [        S5      :X  aK  U[        U 5      :  a8  X   [        S5      :X  a'  US-  nU[        U 5      :  a  X   [        S5      :X  a  M'  UnM  [        X5      (       a  UnM  UnU[        U 5      :  a.  X   [        ;   a"  US-  nU[        U 5      :  a  X   [        ;   a  M"  XC-
  nXU nUS:  a  UR                  5       (       a  UnGM5  US:  a  [        U5      (       a  g[        X45      nGMY  )	a_  Check that *data* contains at least one valid UTF-7 shifted sequence.

A valid shifted sequence is ``+<base64 chars>`` terminated by either an
explicit ``-`` or any non-Base64 character (per RFC 2152).  The base64
portion must decode to valid UTF-16BE with correct zero-padding bits.
The sequence ``+-`` is a literal plus sign and is **not** counted.
r   T+r	   Fr%   -   )r   ordr   rB   r   islowerr8   max)r   r   	shift_posr9   r6   b64_lenb64_datas          r   _has_valid_utf7_sequencesrM      sV    E
IIc#h.	?!mT?tyCH4!GE T?tyCH4D	/di3s8&;q D	/di3s8&;E "$22E#d)m< 7FA #d)m< 7'A; a<H,,..E a<.x88Ca r   Nc                 D   SU ;   nSU ;   nSU ;   nU(       d  U(       d  U(       d  gU(       a  SU ;   d  SU ;   d  SU ;   a  [        S[        S	S
9$ SU ;   a  [        S[        S	S
9$ SU ;   d  SU ;   d  SU ;   d  SU ;   a*  SU ;   a  SU ;   a  [        S[        S	S
9$ [        S[        S	S
9$ SU ;   a  [        S[        SS
9$ U(       a+  SU ;   a%  SU ;   a  [        U 5      (       a  [        S[        SS
9$ U(       a.  [        U 5      S:  a  [	        U 5      (       a  [        S[        SS
9$ g)zDetect ISO-2022, HZ-GB-2312, and UTF-7 from escape/tilde/plus sequences.

:param data: The raw byte data to examine.
:returns: A :class:`DetectionResult` if an escape encoding is found, or ``None``.
      ~   +Ns   $(Os   $(Ps   $(Qiso2022_jp_2004ja)encoding
confidencelanguages   (Iiso2022_jp_exts   $Bs   $@s   (Js   $(D      iso2022_jp_2s   $)C
iso2022_krkor   r
   hzzh   zutf-7)r   r   r   rI   rM   )r   has_esc	has_tildehas_pluss       r   detect_escape_encodingrc      s_    oGIt|H9Xt!3zT7I"*3  ")3  D D T! $7d?&-7!  #'3  "%3  Ud]u}9Nt9T9T/
 	
 CI$)B4)H)H/
 	
 r   )__doc__chardet.pipeliner   r   bytesboolr   r   __annotations__r?   r   int	enumerater    dictr8   rB   rM   rc   )r6   r4   s   00r   <module>rl      s   
 G $ 6 X
E W(4in 4 1:*0EF0Eqt0EFT#s(^ F:% :D :z S T 09E 9d 9xO O?T+A Oi Gs   A=