
    ޺i*0                        % S r SSKJr  SSKJrJr  SSKJr  S\S\	\
\\4   4S jrS\S\	\
\\4   4S jrS\S\	\
\\4   4S	 jrS\S\	\
\\4   4S
 jrS\S\	\
\\4   4S jrS\S\	\
\\4   4S jr\\\\\\\\S.r\\\\/\	\
\\4   4   4   \S'   S\S\S\S\	\
\\4   S-  4S jrS\S\S\S\
4S jr SS\S\S\S\S-  S\
4
S jjrS\S\S\S\4S jrg)a  Stage 2b: Multi-byte structural probing.

Computes how well byte patterns in the data match the expected multi-byte
structure for a given encoding.  Used after byte-validity filtering (Stage 2a)
to further rank multi-byte encoding candidates.

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )Callable)
HIGH_BYTESPipelineContext)EncodingInfodatareturnc                    SnSnSn[        5       nSn[        U 5      nXV:  a  X   nSUs=::  a  S::  d  O  SUs=::  a  S::  aj  O  OgUS-  nUS-   U:  aS  XS-      nSUs=::  a  S::  d  O  S	Us=::  a  S
::  a0  O  O-US-  nUR                  U5        US-  nUS:  a  US-  nUS-  nM  US-  nOUS-  nXV:  a  M  US:  a  X!-  OSn	X[        U5      4$ )zSingle-pass Shift_JIS / CP932 structural analysis.

Lead bytes: 0x81-0x9F, 0xE0-0xEF
Trail bytes: 0x40-0x7E, 0x80-0xFC

Returns (pair_ratio, mb_bytes, lead_diversity).
r                  @   ~                       setlenadd
r   
lead_countvalid_countmbleadsilengthbtrailratios
             Z/var/www/piano.thomer.com/venv/lib/python3.13/site-packages/chardet/pipeline/structural.py_analyze_shift_jisr%      s     JK	
BeE	AYF
*GA41#4#4!OJ1uv~UE)T)tu/D/D1$KIIaL!GBt|aFAFAFA# *$ )3QK$CEc%j      c                    SnSnSn[        5       nSn[        U 5      nXV:  Ga$  X   nUS:X  aK  US-  nUS-   U:  a7  SXS-      s=::  a  S::  a%  O  O"US-  nUR                  U5        US-  nUS-  nMU  US-  nOUS:X  a`  US-  nUS-   U:  aL  SXS-      s=::  a  S::  a:  O  O7SXS-      s=::  a  S::  a%  O  O"US-  nUR                  U5        US	-  nUS	-  nM  US-  nOaSUs=::  a  S::  aO  O  OLUS-  nUS-   U:  a8  SXS-      s=::  a  S::  a&  O  O#US-  nUR                  U5        US-  nUS-  nGM  US-  nOUS-  nXV:  a  GM$  US:  a  X!-  OS
nX[        U5      4$ )zSingle-pass EUC-JP structural analysis.

Two-byte: Lead 0xA1-0xFE, Trail 0xA1-0xFE
SS2 (half-width katakana): 0x8E + 0xA1-0xDF
SS3 (JIS X 0212): 0x8F + 0xA1-0xFE + 0xA1-0xFE

Returns (pair_ratio, mb_bytes, lead_diversity).
r      r         r            r   r   	r   r   r   r   r   r   r    r!   r#   s	            r$   _analyze_euc_jpr/   E   s    JK	
BeE	AYF
*G9!OJ1uv~$$1u+"="=q 		!aQFA$Y!OJADQK/4/DQK/4/q 		!aQFAQ$!OJ1uv~$$1u+"="=q 		!aQFAFAI *J )3QK$CEc%j  r&   c                 N   SnSnSn[        5       nSn[        U 5      nXV:  ak  X   nSUs=::  a  S::  aN  O  OKUS-  nUS-   U:  a7  SXS-      s=::  a  S::  a%  O  O"US-  nUR                  U5        US-  nUS-  nM^  US-  nOUS-  nXV:  a  Mk  US:  a  X!-  OSnX[        U5      4$ )zSingle-pass EUC-KR / CP949 structural analysis.

Lead 0xA1-0xFE; Trail 0xA1-0xFE

Returns (pair_ratio, mb_bytes, lead_diversity).
r   r)   r,   r   r   r   r   r.   s	            r$   _analyze_euc_krr1      s     JK	
BeE	AYF
*G1!OJ1uv~$$1u+"="=q 		!aQFAFA * )3QK$CEc%j  r&   c                 B   SnSnSn[        5       nSn[        U 5      nXV:  a  X   nSUs=::  a  S::  a  O  OUS-  nUS-   U:  aa  SXS-      s=::  a  S::  aO  O  OLSXS-      s=::  a  S::  a:  O  O7SXS-      s=::  a  S::  a%  O  O"US-  nUR                  U5        US-  nUS	-  nM  S
Us=::  a  S::  aC  O  O@US-   U:  a7  S
XS-      s=::  a  S::  a%  O  O"US-  nUR                  U5        US-  nUS-  nM  US-  nOUS-  nXV:  a  M  US:  a  X!-  OSnX[        U5      4$ )a  Single-pass GB18030 / GB2312 structural analysis.

Only counts strict GB2312 2-byte pairs (lead 0xA1-0xF7, trail 0xA1-0xFE)
and GB18030 4-byte sequences.  The broader GBK extension range
(lead 0x81-0xFE, trail 0x40-0x7E / 0x80-0xFE) is intentionally excluded
because it is so permissive that unrelated single-byte data (EBCDIC, DOS
codepages, etc.) can score 1.0, leading to false positives.

Returns (pair_ratio, mb_bytes, lead_diversity).
r   r
   r,   r   r-   0   9   r      r)      r   r   r.   s	            r$   _analyze_gb18030r7      sG    JK	
BeE	AYF
*G1!OJ ADQK/4/DQK/4/DQK/4/q 		!aQq D QUV^U8St8Sq 		!aQFAFA3 *4 )3QK$CEc%j  r&   c                    SnSnSn[        5       nSn[        U 5      nXV:  a  X   nSUs=::  a  S::  aj  O  OgUS-  nUS-   U:  aS  XS-      nSUs=::  a  S::  d  O  SUs=::  a  S::  a0  O  O-US-  nUR                  U5        US-  nUS:  a  US-  nUS	-  nMz  US-  nOUS-  nXV:  a  M  US:  a  X!-  OS
n	X[        U5      4$ )zSingle-pass Big5 structural analysis.

Lead 0xA1-0xF9; Trail 0x40-0x7E, 0xA1-0xFE

Returns (pair_ratio, mb_bytes, lead_diversity).
r   r)      r   r   r   r,   r   r   r   r   r   s
             r$   _analyze_big5r:      s     JK	
BeE	AYF
*G1!OJ1uv~UE)T)tu/D/D1$KIIaL!GBt|aFAFAFA# *$ )3QK$CEc%j  r&   c                    SnSnSn[        5       nSn[        U 5      nXV:  a  X   nSUs=::  a  S::  d!  O  SUs=::  a  S::  d  O  SUs=::  a  S::  ap  O  OmUS-  nUS-   U:  aY  XS-      nS	Us=::  a  S
::  d  O  SUs=::  a  S::  a6  O  O3US-  nUR                  U5        US:  a  US-  nUS:  a  US-  nUS-  nM  US-  nOUS-  nXV:  a  M  US:  a  X!-  OSn	X[        U5      4$ )zSingle-pass Johab structural analysis.

Lead: 0x84-0xD3, 0xD8-0xDE, 0xE0-0xF9
Trail: 0x31-0x7E, 0x91-0xFE

Returns (pair_ratio, mb_bytes, lead_diversity).
r               r   r9   r   1   r      r,   r   r   r   r   r   s
             r$   _analyze_johabrB      s    JK	
BeE	AYF
*GA41#4#4$!:Kt:K!OJ1uv~UE)T)tu/D/D1$KIIaL4xat|aFAFAFA# *$ )3QK$CEc%j  r&   )shift_jis_2004cp932euc_jis_2004euc_krcp949gb18030	big5hkscsjohab
_ANALYZERSnamectxNc                     UR                   R                  U5      nUb  U$ [        R                  U5      nUc  gU" U 5      nXRR                   U'   U$ )z/Return cached analysis or compute and cache it.N)analysis_cachegetrK   )r   rL   rM   cachedanalyzerresults         r$   _get_analysisrT   +  sX     ##D)F~~d#Hd^F%tMr&   encoding_infoc                 r    U (       a  UR                   (       d  g[        XR                  U5      nUc  gUS   $ )ay  Return 0.0--1.0 indicating how well *data* matches the encoding's structure.

For single-byte encodings, always returns 0.0.  For empty data, always
returns 0.0.

:param data: The raw byte data to analyze.
:param encoding_info: Metadata for the encoding to probe.
:param ctx: Pipeline context for caching analysis results.
:returns: A structural fit score between 0.0 and 1.0.
r   r   is_multibyterT   rL   r   rU   rM   rS   s       r$   compute_structural_scorerZ   ?  s6     }114!3!3S9F~!9r&   non_ascii_countc                     U (       a  UR                   (       d  g[        XR                  U5      nUc  gUS   nUb  UO*[        U 5      [        U R	                  S[
        5      5      -
  nUS:X  a  gXV-  $ )aN  Ratio of non-ASCII bytes that participate in valid multi-byte sequences.

Genuine CJK text has nearly all non-ASCII bytes paired into valid
multi-byte sequences (coverage close to 1.0), while Latin text with
scattered high bytes has many orphan bytes (coverage well below 1.0).

:param data: The raw byte data to analyze.
:param encoding_info: Metadata for the encoding to probe.
:param ctx: Pipeline context for caching analysis results.
:param non_ascii_count: Pre-computed count of non-ASCII bytes, or ``None``
    to compute from *data*.
:returns: A coverage ratio between 0.0 and 1.0.
r   Nr   r   )rX   rT   rL   r   	translater   )r   rU   rM   r[   rS   mb_bytes	non_asciis          r$   compute_multibyte_byte_coverager`   V  sx    & }114!3!3S9F~ayH & 	YT^^D*=>> 
 A~r&   c                 r    U (       a  UR                   (       d  g[        XR                  U5      nUc  gUS   $ )a  Count distinct lead byte values in valid multi-byte pairs.

Genuine CJK text uses lead bytes from across the encoding's full
repertoire.  European text falsely matching a CJK structural scorer
clusters lead bytes in a narrow band.

:param data: The raw byte data to analyze.
:param encoding_info: Metadata for the encoding to probe.
:param ctx: Pipeline context for caching analysis results.
:returns: The number of distinct lead byte values found.
r      r   rW   rY   s       r$   compute_lead_byte_diversityrc   }  s6     }114!3!3S9F~!9r&   )N)__doc__collections.abcr   chardet.pipeliner   r   chardet.registryr   bytestuplefloatintr%   r/   r1   r7   r:   rB   rK   dictstr__annotations__rT   rZ   r`   rc    r&   r$   <module>rp      s  	 % 8 ) #!
#!
5#s?#!L7!
7!
5#s?7!t!
!
5#s?!@.!
.!
5#s?.!b"!
"!
5#s?"!J#!
#!
5#s?#!V )#	D
DhweS#o(>>??@ 	
!0
5#s?d"(
 ,3B
6 #'	$ 
$ $  
$  4Z	$ 
 $ N
 ,3Br&   