"""Detection pipeline stages and shared types."""

from __future__ import annotations

import dataclasses
from dataclasses import field
from typing import TypedDict

#: Confidence for deterministic (non-BOM) detection stages.
#: Used by escape, markup, and utf1632 stages (and by the orchestrator for
#: the binary-detection result).
DETERMINISTIC_CONFIDENCE: float = 0.95

#: Byte table for fast non-ASCII counting (C-speed via bytes.translate).
#: Deleting all bytes >= 0x80 and comparing lengths gives the non-ASCII count.
HIGH_BYTES: bytes = bytes(range(0x80, 0x100))

#: Bytes considered valid in ASCII text: tab (0x09), newline (0x0A),
#: carriage return (0x0D), and printable ASCII (0x20-0x7E).
#: Used by ``ascii.py`` directly and by ``utf1632.py`` (with null added).
ASCII_TEXT_BYTES: bytes = bytes([0x09, 0x0A, 0x0D, *range(0x20, 0x7F)])


class DetectionDict(TypedDict):
    """Dictionary representation of a detection result.

    Returned by :func:`chardet.detect`, :func:`chardet.detect_all`,
    and :attr:`chardet.UniversalDetector.result`.
    """

    encoding: str | None
    confidence: float
    language: str | None


@dataclasses.dataclass(frozen=True, slots=True)
class DetectionResult:
    """A single encoding detection result.

    Frozen dataclass holding the encoding name, confidence score, and
    optional language identifier returned by the detection pipeline.
    """

    encoding: str | None
    confidence: float
    language: str | None

    def to_dict(self) -> DetectionDict:
        """Convert this result to a plain dict.

        :returns: A dict with ``'encoding'``, ``'confidence'``, and ``'language'`` keys.
        """
        return {
            "encoding": self.encoding,
            "confidence": self.confidence,
            "language": self.language,
        }


@dataclasses.dataclass(slots=True)
class PipelineContext:
    """Per-run mutable state for a single pipeline invocation.

    Created once at the start of ``run_pipeline()`` and threaded through
    the call chain via function parameters.  Each concurrent ``detect()``
    call gets its own context, eliminating the need for module-level
    mutable caches.
    """

    analysis_cache: dict[str, tuple[float, int, int]] = field(default_factory=dict)
    non_ascii_count: int | None = None
    mb_scores: dict[str, float] = field(default_factory=dict)
    mb_coverage: dict[str, float] = field(default_factory=dict)
