
    ޺iH                    4   % S SK Jr  S SKrS SKJr  S SKJr  SSKJrJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJr  SS	KJrJr  SS
KJrJrJrJrJrJr  \R<                  " S5      r\R@                  " 5       r!\!RE                  \RF                  " S5      5        / r$S\%S'   / r&S\%S'   \ H5  r' \" \'5      (       a  \$RQ                  \'5        M$  \&RQ                  \'5        M7     \$\&-   r*S\%S'            S                     SS jjr+         S                     SS jjr,         S                     SS jjr-         S                     SS jjr.g! \) a    \&RQ                  \'5         M  f = f)    )annotationsN)PathLike)BinaryIO   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDIANA_SUPPORTED_SIMILARTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sz	list[str]_mb_supported_sb_supportedIANA_SUPPORTED_MB_FIRSTc
                j!   [        U [        [        45      (       d#  [        SR	                  [        U 5      5      5      eU(       aB  [        R                  n
[        R                  [        5        [        R                  [        5        [        U 5      nUS:X  ad  [        R                  S5        U(       a.  [        R                  [        5        [        R                  W
5        [        [!        U SSS/ S5      /5      $ UbG  [        R#                  [        S	S
R%                  U5      5        U Vs/ s H  n['        US5      PM     nnO/ nUbG  [        R#                  [        SS
R%                  U5      5        U Vs/ s H  n['        US5      PM     nnO/ nXU-  ::  a!  [        R#                  [        SUUU5        SnUnUS:  a  X-  U:  a  [)        X-  5      n[        U 5      [*        :  n[        U 5      [,        :  nU(       a*  [        R#                  [        SR	                  U5      5        O0U(       a)  [        R#                  [        SR	                  U5      5        / nU(       a  [/        U 5      OSnUb,  UR1                  U5        [        R#                  [        SU5        [3        5       n/ n/ n[3        5       n[3        5       n0 nSn[3        5       nSnSnSnSnSnSn[        5       n[        5       n [5        U 5      u  n!n"U!b6  UR1                  U!5        [        R#                  [        S[        U"5      U!5        UR1                  S5        SU;  a  UR1                  S5        U[6        -    G
H  n#U(       a  U#U;  a  M  U(       a  U#U;   a  M"  U#U;   a  M*  UR9                  U#5        Sn$U!U#:H  n%U%=(       a    [;        U#5      n&U#S;   a$  U%(       d  [        R#                  [        SU#5        M  U#S;   a$  U%(       d  [        R#                  [        SU#5        M  U#U;   a  [        R#                  [        SU#5        M  U#U;   a  [        R#                  [        SU#5        M   [=        U#5      n'U(       af  U'(       d  [3        [C        U#5      5      n(O[3        [E        U#5      5      n(U(RG                  U5      (       d   [        R#                  [        SU#U(U5        GMi  U(       a-  U'(       d&  UU:  a   [        R#                  [        SU#UU5        GM  U(       a%  U'(       d  [        R#                  [        SU#5        GM   U(       a8  U'SL a3  [I        U&SL a  U S[)        S5       OU [        U"5      [)        S5       U#S9  O[I        U&SL a  U OU [        U"5      S U#S9n$ [O        U%(       d  SO
[        U"5      U[)        X-  5      5      n*U'=(       a    U$SL=(       a    [        U$5      U:  n+U+(       a  [        R#                  [        S!U#5        U$Gb&  U'(       Gd  [Q        U$5      n,URS                  U,5      n-U-Gb  U-u  n.n/n0U0(       Ga  [!        U U#U.U%U/USL d	  U#USS4;   a  U$OSUS"9n1UR1                  U15        UR9                  U#5        [        R#                  [        S#U#[U        U.S$-  S%S&95        U#USS4;   a  U.S':  az  U.S:X  ac  [        R                  S(U1RV                  5        U(       a.  [        R                  [        5        [        R                  W
5        [        U1/5      s  $ U R1                  U15        [        U 5      (       a  Ub  UU;   a  SU;   ay  SU;   as  U RY                  5       n2[        R                  S(U2RV                  5        U(       a.  [        R                  [        5        [        R                  W
5        [        U2/5      s  $ GM[  UR1                  U#5        [        R#                  [        S)U#5        U	(       a.  U#SSUS*S+4;   a#  [!        U U#UU%/ U$US"9n3U#U:X  a  U3nOU#S:X  a  U3nOU3nGM  [)        [        U*5      S,-  5      n4[[        U4S-5      n4Sn5Sn6/ n7/ n8 []        U U#U*UU%U&U"U'U$5	       H{  n9U7R1                  U95        U8R1                  [_        U9UUS.L =(       a    S[        U5      s=:*  =(       a    S-:*  Os  5      5        U8S/   U:  a  U5S-  n5U5U4:  d  U%(       d  Mt  U&SL d  M{    O    U6(       d+  U(       a$  U'(       d   U [)        S15      S Ra                  U#S2S39  U8(       a  [c        U85      [        U85      -  OSn:U:U:  d  U5U4:  a  UR1                  U#5        U#[d        ;   a  URg                  [d        U#   5        U$b%  U'(       d  URi                  [Q        U$5      U:/ S45        [        R#                  [        S5U#U5[U        U:S$-  S%S&95        U	(       a5  U#SSUS*S+4;   a*  U6(       d#  [!        U U#UU%/ U$US"9n3U#U:X  a  U3nOU#S:X  a  U3nOU3nGM  [        R#                  [        S6U#[U        U:S$-  S%S&95        U'(       d  [C        U#5      n;O[E        U#5      n;U;(       a3  [        R#                  [        S7R	                  U#[I        U;5      5      5        / n<U#S:w  aK  U7 H9  n9[k        U9UU;(       a  S8R%                  U;5      OS5      n=U<R1                  U=5        M;     [m        U<5      n>O[m        U<5      n>U>(       a*  [        R#                  [        S9R	                  U>U#5      5        [!        U U#U:U%U>USL d	  U#USS4;   a  U$OSUS"9n?UR1                  U?5        U$b%  U'(       d  URi                  [Q        U$5      U:U>S.45        U(       a  U'(       d  U:S::  a  US-  nU#USS4;   a  U:S':  az  U:S:X  ac  [        R                  S(U?RV                  5        U(       a.  [        R                  [        5        [        R                  W
5        [        U?/5      s  $ U R1                  U?5        [        U 5      (       a  Ub  UU;   a  SU;   ay  SU;   as  U RY                  5       n2[        R                  S(U2RV                  5        U(       a.  [        R                  [        5        [        R                  W
5        [        U2/5      s  $ U(       dc  U'(       d\  U>(       a  [[        S; U> 5       SS<9OSn@U@S=:  a<  SU;   a6  SU;   a0  S.nURg                  U;5        [        R#                  [        S>U#U:W@5        U(       dn  U'(       ag  U+(       a`  U$b]  [        U$5      US?-  :  aK  U#S@;  aE  SU;   a?  SU;   a9  S.n[        R#                  [        SAU#U:[        U$5      U[        U$5      U-  S$-  5        U#U!:X  d  G
M&  [        R                  SBU#5        U(       a.  [        R                  [        5        [        R                  W
5        [        UU#   /5      s  $    [        U5      S:X  a  U(       d  U(       d  U(       a  [        R#                  [        SC5        U(       a2  [        R                  SDURV                  5        UR1                  U5        OU(       a  Ub+  U(       a!  U(       a  URn                  URn                  :w  d  Ub'  [        R                  SE5        UR1                  U5        O-U(       a&  [        R                  SF5        UR1                  U5        U(       a<  [        R                  SGURY                  5       RV                  [        U5      S-
  5        O[        R                  SH5        U(       a.  [        R                  [        5        [        R                  W
5        U$ s  snf s  snf ! [>        [@        4 a     [        R#                  [        SU#5         GML  f = f! [J        [L        4 aW  n)[        U)[L        5      (       d%  [        R#                  [        S U#[I        U)5      5        UR1                  U#5         Sn)A)GM  Sn)A)ff = f! [J         a4  n)[        R#                  [        S0U#[I        U)5      5        U4n5S.n6 Sn)A)GNuSn)A)ff = f! [J         aB  n)[        R#                  [        S4U#[I        U)5      5        UR1                  U#5         Sn)A)GMB  Sn)A)ff = f)Ia2  
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
but never take it for granted. Can improve the performance.

You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
purpose.

This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
Custom logging format and handler can be set manually.
z3Expected object of type bytes or bytearray, got: {}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.   zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.zY%s is deemed too similar to a code page that was already considered unsuited. Continuing!zESkipping %s: already fast-tracked from a similar successful encoding.z2Encoding %s does not provide an IncrementalDecoderzbSkipping %s: definitive match already found, this encoding targets different languages (%s vs %s).zXSkipping %s: already accumulated %d same-family results after definitive match (cap=%d).zCSkipping single-byte %s: multi-byte definitive match already found.g    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %szpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.)preemptive_declarationzM%s fast-tracked (identical decoded payload to a prior encoding, chaos=%f %%).d      )ndigits皙?z.Encoding detection: %s is most likely the one.zZ%s fast-skipped (identical decoded payload to a prior encoding that failed chaos probing).r"   r#         TzaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.z=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}g{Gz?c              3  *   #    U  H	  u  pUv   M     g 7f)N ).0_vs      U/var/www/piano.thomer.com/venv/lib/python3.13/site-packages/charset_normalizer/api.py	<genexpr>from_bytes.<locals>.<genexpr>  s     4#341Q#3s   )defaultg      ?zyDefinitive match found: %s (chaos=%.3f, coherence=%.2f). Encodings targeting different language families will be skipped.g\(\?>	   r$   r   r"   r#   	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigzjMulti-byte definitive match: %s (chaos=%.3f, decoded=%d/%d=%.1f%%). Single-byte encodings will be skipped.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)8
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerr   r   logjoinr   intr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorr   r	   intersectionstrUnicodeDecodeErrorLookupErrorrangehashgetroundr%   bestmaxr   r   decodesumr   update
setdefaultr   r
   fingerprint)A	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdenable_fallbackprevious_logger_levellengthcpis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failuresoft_failure_skipsuccess_fast_trackedpayload_result_cachedefinitive_match_founddefinitive_target_languages post_definitive_sb_success_countPOST_DEFINITIVE_SB_CAPmb_definitive_match_foundfallback_asciifallback_u8fallback_specifiedresultsearly_stop_resultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderenc_languageser_multi_byte_bonuspayload_hashcachedcached_mess	cached_cdcached_passed
fast_matchprobable_resultfallback_entrymax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiotarget_languages	cd_ratioschunk_languagescd_ratios_mergedcurrent_matchbest_coherencesA                                                                    r6   
from_bytesr   9   s   < i)U!344AHHY
 	
 %+\\/*i.F{ST  1OO12|IwUBPRSTUU

5IIl#		
 8DD|	"e,|D

6IIl#		
 8DD|	"e,|Du$%

l	
 
qyV^j0(
"%i.3E"E"%i.4D"D

LSS	
 


W^^	
 (* .By)t  %$$%78

N	
 uF)+)+"%%%(U
 TV $),/E -.$"# ',*.N'+K.2,.G)7)9 3I >L+$$\2

W		
   )++$$W-.1HHM=M\9F"

=!&*%1]%B!5 "
:Q;
 009MJJn
 I%.BJJd
  --JJk
  00JJW
 	*@*O! "( #$6}$E F #$9-$H I --.IJJ

x!!/  #)04JJJJj0& 
 %-BJJU
 	$)>%)G ,u4 "+CI.&s;'7#d)D* #& ,u4 "&s;'7'9:*#& )As;/?
 " .t+.O$v- 	 JJ-	 &/D $_ 5L)--l;F!8>5Y !-!%#,! !6 >#0$6#I$J , "&/A"J" NN:.(,,];JJg%kC/;	 &*<gw)OO'#-&#-"LL P * 3 3  ' & 4 4_ E &0E F#1:,#??*11*= .///7;MQW;W#v-#v-8J8O8O8QL+44 #"00A"OO,AB-.?@@ ,22=AJJt% '=*  = , *6%)%0+3E* ),>>1?.*g5-;N*8K!$SWq[!1 115 ! %!		'	),$ %
   '  !4GA\1B,G,Ga,G R=I-$)$$(99((-=-F7
V &%)
#d)+&--mH-M ENY#i.!@SVi'+;?P+P#**=9 66!(()?)NO *3H$//)OR+G JJ0 o+Q7  !W&8(HMN-!-!(#+=" !$66)7&"g-%3N"0K

K/C'3		
 %*<]*K4]CJJ8??!3'7#8 	 G# #"1&2BCHH-.#   1 #  6i@5i@JJ299$m %  *U2$);Wg(NN  
 #5
" 	}% &/D ++_% "2D9 #)$&,1, 0'7CC#% #%D!** ((9OO$9:%}o66%%m4 "###+/AV/K6!6!0557OLL@(( $$_5 56!?"344 &.C $ 4#34cB 
 $F):w&?P)-&+223CD

 P!#" *% +O$v}4
 6!6!(,%JJ|O$O$v-3 L(LL1
 $$_5 56!7=#9":;;m Ip 7|q.,>JJa
 LLI"++ NN-.^3"++~/I/II'LLUVNN;'LLUVNN>*kLLN##L1	
 	TU_--.Ny E EL $[1 	JJD
 	X #K0 		a--

O!F	 $**=9		X 
	) JJsA	  1$(!
	)* & 

t!F	 (..}=s   .}>:~~5>~;4~;BA@%A@%$A@%&A@%?AA&+~87~8;A@"AA@@A@"@%
AA#@/)AAAAA#A&
AB2A06AB-B-AB2c
                F    [        U R                  5       UUUUUUUUU	5
      $ )zz
Same thing than the function from_bytes but using a file pointer that is already ready.
Will not close the file pointer.
)r   read)
fpre   rf   rg   rh   ri   rj   rk   rl   rm   s
             r6   from_fpr   R  s5      
	     c
                x    [        U S5       n
[        U
UUUUUUUUU	5
      sSSS5        $ ! , (       d  f       g= f)z
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
Can raise IOError.
rbN)openr   )pathre   rf   rg   rh   ri   rj   rk   rl   rm   r   s              r6   	from_pathr   p  sB      
dD	R 
 
		s   +
9c
                   [        U [        [        45      (       a  [        U UUUUUUUUU	S9
n
U
(       + $ [        U [        [
        45      (       a  [        U UUUUUUUUU	S9
n
U
(       + $ [        U UUUUUUUUU	S9
n
U
(       + $ )a  
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
)	re   rf   rg   rh   ri   rj   rk   rl   rm   )r?   rV   r   r   rA   r@   r   r   )fp_or_path_or_payloadre   rf   rg   rh   ri   rj   rk   rl   rm   guessess              r6   	is_binaryr     s    " '#x99!!%%!51+
Z ;C 
	

 
 !!%%!51+
4 ; !!%%!51+
 ;r   )	      皙?NNTFr*   T)rd   zbytes | bytearrayre   rO   rf   rO   rg   floatrh   list[str] | Noneri   r   rj   boolrk   r   rl   r   rm   r   returnr   )r   r   re   rO   rf   rO   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   rm   r   r   r   )r   zstr | bytes | PathLikere   rO   rf   rO   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   rm   r   r   r   )	r   r   r   NNTFr*   F)r   z!PathLike | str | BinaryIO | bytesre   rO   rf   rO   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   rm   r   r   r   )/
__future__r   loggingosr   typingr   cdr   r   r	   r
   constantr   r   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   	getLoggerrE   StreamHandlerrH   setFormatter	Formatterr   __annotations__r   _supported_encrP   rT   r   r   r   r   r   r2   r   r6   <module>r      s2   "       0  
		/	0'')   AB y y $N-!.11  0  0 % &3]%B  B
 %)%)!% # V VV V 	V
 #V #V V V V V Vv %)%)!% #   	
 # #     @ %)%)!% # 

 

 
 	

 #
 #
 
 
 
 
 
B %)%)!% #!?<?? ? 	?
 #? #? ? ? ? ? 
?y  -^,-s   (E<E<<FF