
    ޺iP'                     :   % S r SSKrSSKrSSKrSSKrSSKrSSKJrJ	r	  \R                  " S5      R                  r\R                  " S5      R                  rSr\\S'   0 r\\\4   \S'   \R*                  " 5        H7  r\" \R0                  5      S	:X  d  M  \R0                  S   \\R2                  '   M9     S
\S\\\\4   \\\4   4   4S jr\R>                  S\\\\4   \\\4   4   4S j5       r S\\\4   4S jr!S\\\4   S\\\"\\S-  \\4      4   4S jr#\R>                  S\\\"\\S-  \\4      4   4S j5       r$S\S\S-  4S jr%S\S\&4S jr'S\\\4   4S jr( " S S5      r) SS\)S\S\S\4S jjr* SS
\S\S\)S-  S\\\S-  4   4S jjr+g)zModel loading and bigram scoring utilities.

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    N)REGISTRYlookup_encodingz>Iz>BBB   NON_ASCII_BIGRAM_WEIGHT_SINGLE_LANG_MAP   datareturnc                    0 n0 n[         R                  n[        n[        n SnU" X5      u  nUS-  nUS:  a  SU S3n[	        U5      e[        U5       H  n	U" X5      u  n
US-  nU
S:  a  SU
 S3n[	        U5      eXXj-    R                  S	5      nXj-  nU" X5      u  nUS-  nUS
:  a  SU S3n[	        U5      e[        S
5      nSnUS-  nXXo-    n[        U5      U:w  a  SU< 3n[	        U5      eXo-  nU" U5       H  u  nnnUUUS-  U-  '   UUU-  -  nM     XU'   U" U5      X+'   M     X4$ ! [        R                  [        4 a  nSU 3n[	        U5      UeSnAff = f)zParse the binary models.bin format into model tables and L2 norms.

:param data: Raw bytes of models.bin (must be non-empty).
:returns: A ``(models, norms)`` tuple.
:raises ValueError: If the data is corrupt or truncated.
r      i'  z"corrupt models.bin: num_encodings=z exceeds limit   zcorrupt models.bin: name_len=z exceeds 256zutf-8   z corrupt models.bin: num_entries=z exceeds 65536   z-corrupt models.bin: truncated entry data for r   zcorrupt models.bin: N)mathsqrt_unpack_uint32_iter_3bytes
ValueErrorrangedecode	bytearraylenstructerrorUnicodeDecodeError)r	   modelsnorms_sqrt_unpack_u32	_iter_bbboffsetnum_encodingsmsg_name_lennamenum_entriestablesq_sumexpected_byteschunkb1b2weightes                        V/var/www/piano.thomer.com/venv/lib/python3.13/site-packages/chardet/models/__init__.py_parse_models_binr1      s    $&F EIIE KI&%&t4!6!6}o^TCS/!}%A%d3KXaKF#~5hZ|L o%!23::7CDF(6N[aKFU"8^T o%e$EF(1_N&"9:E5z^+EdXN o%$F"+E"2B(.rQw"n%&6/) #3 !4L-EK5 &> =	 LL,- %$QC(o1$%s   DD4 4E$EE$c                      [         R                  R                  S5      R                  S5      n U R	                  5       nU(       d  [
        R                  " S[        SS9  0 0 4$ [        U5      $ )zcLoad and parse models.bin, returning (models, norms).

Cached: only reads from disk on first call.
zchardet.modelsz
models.binuX   chardet models.bin is empty — statistical detection disabled; reinstall chardet to fix   )
stacklevel)		importlib	resourcesfilesjoinpath
read_byteswarningswarnRuntimeWarningr1   )refr	   s     r0   _load_models_datar>   W   sb     


#
#$4
5
>
>|
LC>>D'		
 2vT""    c                      [        5       S   $ )zLoad all bigram models from the bundled models.bin file.

Each model is a bytearray of length 65536 (256*256).
Index: (b1 << 8) | b2 -> weight (0-255).

:returns: A dict mapping model key strings to 65536-byte lookup tables.
r   r>    r?   r0   load_modelsrC   l   s     q!!r?   r   c                    0 nU R                  5        H<  u  p#UR                  SS5      u  pEUR                  U/ 5      R                  XCU45        M>     [	        U5       H   n[        U5      nUc  M  Xq;  d  M  X   X'   M"     U$ )zBuild a grouped index from a models dict.

:param models: Mapping of ``"lang/encoding"`` keys to 65536-byte tables.
:returns: Mapping of encoding name to ``[(lang, model, model_key), ...]``.
/r   )itemssplit
setdefaultappendlistr   )r   indexkeymodellangencenc_name	canonicals           r0   _build_enc_indexrR   w   s     ACElln
IIc1%	b!(($s);< % K#H-	 Y%;$E  
 Lr?   c                  (    [        [        5       5      $ )zTReturn a pre-grouped index mapping encoding name -> [(lang, model, model_key), ...].)rR   rC   rB   r?   r0   get_enc_indexrT      s     KM**r?   encodingc                 ,    [         R                  U 5      $ )zReturn the language for a single-language encoding, or None.

:param encoding: The canonical encoding name.
:returns: An ISO 639-1 language code, or ``None`` if the encoding is
    multi-language.
)r   getrU   s    r0   infer_languagerY      s     ))r?   c                     U [        5       ;   $ )zReturn True if the encoding has language variants in the model index.

:param encoding: The canonical encoding name.
:returns: ``True`` if bigram models exist for this encoding.
)rT   rX   s    r0   has_model_variantsr[      s     }&&r?   c                      [        5       S   $ )zAReturn cached L2 norms for all models, keyed by model key string.r   rA   rB   r?   r0   _get_model_normsr]      s    q!!r?   c                   T    \ rS rSrSrSrS\SS4S jr\S\	\
\
4   SS 4S	 j5       rSrg)
BigramProfile   a  Pre-computed bigram frequency distribution for a data sample.

Computing this once and reusing it across all models reduces per-model
scoring from O(n) to O(distinct_bigrams).

Stores a single ``weighted_freq`` dict mapping bigram index to
*count * weight* (weight is 8 for non-ASCII bigrams, 1 otherwise).
This pre-multiplies the weight during construction so the scoring
inner loop only needs a single dict traversal with no branching.
)
input_norm
weight_sumweighted_freqr	   r
   Nc                    [        U5      S-
  nUS::  a  0 U l        SU l        SU l        g0 nSn[        nUR
                  n[        U5       HI  nX   nXS-      n	US-  U	-  n
US:  d  U	S:  a  U" U
S5      U-   X:'   XE-  nM6  U" U
S5      S-   X:'   US-  nMK     X0l        X@l        [        R                  " [        S UR                  5        5       5      5      U l        g)zbCompute the bigram frequency distribution for *data*.

:param data: The raw byte data to profile.
r   r           Nr      c              3   *   #    U  H	  oU-  v   M     g 7fNrB   .0vs     r0   	<genexpr>)BigramProfile.__init__.<locals>.<genexpr>   s     'E}!A}   )r   rc   rb   ra   r   rW   r   r   r   sumvalues)selfr	   total_bigramsfreqw_sumhi_w_getir,   r-   idxs              r0   __init__BigramProfile.__init__   s    
 D	AA13D#$DO%(DO!&xx}%AB!eB7b.CDyBI aL4/	 aL1,	
 & "))C'Et{{}'E$EFr?   rc   c                     U " S5      nXl         [        UR                  5       5      Ul        [        R
                  " [        S UR                  5        5       5      5      Ul        U$ )a#  Create a BigramProfile from pre-computed weighted frequencies.

Computes ``weight_sum`` and ``input_norm`` from *weighted_freq* to
ensure consistency between the three fields.

:param weighted_freq: Mapping of bigram index to weighted count.
:returns: A new :class:`BigramProfile` instance.
r?   c              3   *   #    U  H	  oU-  v   M     g 7frh   rB   ri   s     r0   rl   3BigramProfile.from_weighted_freq.<locals>.<genexpr>   s     *Q:PQq5:Prn   )rc   ro   rp   rb   r   r   ra   )clsrc   profiles      r0   from_weighted_freq BigramProfile.from_weighted_freq   sS     c( - !5!5!78!YYs*Q-:N:N:P*Q'QRr?   )__name__
__module____qualname____firstlineno____doc__	__slots__bytesry   classmethoddictintr   __static_attributes__rB   r?   r0   r_   r_      sN    	 >IGU Gt G< tCH~ /  r?   r_   r   rM   	model_keyc                 |   U R                   S:X  a  g[        5       nU(       a  UR                  U5      OSnUc>  Sn[        S5       H  nX   nU(       d  M  XWU-  -  nM     [        R
                  " U5      nUS:X  a  gSnU R                  R                  5        H  u  pXU	   U
-  -  nM     XU R                   -  -  $ )zSScore a pre-computed bigram profile against a single model using cosine similarity.re   Nr   r   )ra   r]   rW   r   r   r   rc   rF   )r   rM   r   r   
model_normr)   rw   rk   dotrx   wcounts              r0   score_with_profiler      s     S E)29%JuAAqa%  YYv&
S
C,,224SzF"" 5w11122r?   c                     U (       d  Uc  g[        5       nUR                  U5      nUc  gUc  [        U 5      nSnSnU H  u  pxn	[        X(U	5      n
X:  d  M  U
nUnM     XV4$ )ah  Score data against all language variants of an encoding.

Returns (best_score, best_language). Uses a pre-grouped index for O(L)
lookup where L is the number of language variants for the encoding.

If *profile* is provided, it is reused instead of recomputing the bigram
frequency distribution from *data*.

:param data: The raw byte data to score.
:param encoding: The canonical encoding name to match against.
:param profile: Optional pre-computed :class:`BigramProfile` to reuse.
:returns: A ``(score, language)`` tuple with the best cosine-similarity
    score and the corresponding language code (or ``None``).
N)re   Nre   )rT   rW   r_   r   )r	   rU   r   rK   variants
best_score	best_langrN   rM   r   ss              r0   score_best_languager      sz    & GOOEyy"H%J I"*Ywy9>JI	 #+   r?   ) rh   ),r   	functoolsimportlib.resourcesr5   r   r   r:   chardet.registryr   r   Structunpack_fromr   iter_unpackr   r   r   __annotations__r   r   strrp   _encr   	languagesr&   r   tupler   floatr1   cacher>   rC   rJ   rR   rT   rY   boolr[   r]   r_   r   r   rB   r?   r0   <module>r      sV        6t$00}}V$00  !    $& $sCx. %OOD
4>>a&*nnQ&7# 
6
6
4Yc5j!1126r #5c9n!5tCJ7G!GH # #("T#y.) "i 	#tE#*i456
67. +tCeC$J	3,F&G!HHI + +
*S *S4Z *' ' '"$sEz* "
: :| @B33#,39<3
34 %)&!
&!&! T!&! 5#*	&!r?   