"""
ScirDom Algorithm Specification Document v1.0
Python Reference Implementation

This file is part of the public ASD release bundle. The source of truth is
the written ASD specification plus the published machine-readable test
vectors. Any implementation that disagrees with the ASD or the vectors is
non-conforming, regardless of whether it is this reference implementation,
a third-party verifier, or the production server.

Mode: unweighted_without_replacement only.

This implementation reflects the exact direct-selection semantics of ASD
v1.0:
- locked participant-list construction is deterministic
- winner selection is exact and unbiased
- rejection sampling is used for every uniform integer draw
- winners are selected directly and recorded in selection order
- selection uses partial Fisher-Yates subset selection
"""

from __future__ import annotations

import hashlib
from typing import Any, Dict, List, Tuple


SALT_LENGTH_BYTES = 32
MODE = "unweighted_without_replacement"

# ---------------------------------------------------------------------------
# ASD Section 4: Explicit whitespace definition
# ---------------------------------------------------------------------------
# Python's str.strip() is not used here because the ASD defines an explicit,
# finite set of code points that MUST be trimmed, no more and no fewer.

ASD_WHITESPACE_CODEPOINTS = frozenset({
    # Unicode General Category Zs (space separators)
    0x0020,  # SPACE
    0x00A0,  # NO-BREAK SPACE
    0x1680,  # OGHAM SPACE MARK
    0x2000,  # EN QUAD
    0x2001,  # EM QUAD
    0x2002,  # EN SPACE
    0x2003,  # EM SPACE
    0x2004,  # THREE-PER-EM SPACE
    0x2005,  # FOUR-PER-EM SPACE
    0x2006,  # SIX-PER-EM SPACE
    0x2007,  # FIGURE SPACE
    0x2008,  # PUNCTUATION SPACE
    0x2009,  # THIN SPACE
    0x200A,  # HAIR SPACE
    0x202F,  # NARROW NO-BREAK SPACE
    0x205F,  # MEDIUM MATHEMATICAL SPACE
    0x3000,  # IDEOGRAPHIC SPACE
    # Explicit additional code points required by the ASD
    0x0009,  # HT
    0x000A,  # LF
    0x000B,  # VT
    0x000C,  # FF
    0x000D,  # CR
    0x001C,  # IS4
    0x001D,  # IS3
    0x001E,  # IS2
    0x001F,  # IS1
    0x0085,  # NEL
    0x2028,  # LINE SEPARATOR
    0x2029,  # PARAGRAPH SEPARATOR
})


def asd_trim(text: str) -> str:
    """
    Trim leading and trailing ASD-defined whitespace.

    Python strings are sequences of Unicode code points, so direct indexing is
    safe for the ASD's scalar-value-based rules.
    """
    start = 0
    end = len(text)

    while start < end and ord(text[start]) in ASD_WHITESPACE_CODEPOINTS:
        start += 1

    while end > start and ord(text[end - 1]) in ASD_WHITESPACE_CODEPOINTS:
        end -= 1

    return text[start:end]


# ---------------------------------------------------------------------------
# ASD Section 4: Locked participant list construction
# ---------------------------------------------------------------------------


def _is_participant_record(entry: Any) -> bool:
    return isinstance(entry, dict) and "participant_id" in entry and "name" in entry


def _serialise_locked_entry(entry: str | Dict[str, str]) -> str:
    if _is_participant_record(entry):
        return f"{entry['participant_id']}\t{entry['name']}"
    return str(entry)


def _winner_entries(records: List[str | Dict[str, str]], winner_indices: List[int]) -> List[Dict[str, str]]:
    winner_entries: List[Dict[str, str]] = []
    for index in winner_indices:
        record = records[index]
        if _is_participant_record(record):
            winner_entries.append(
                {
                    "participant_id": str(record["participant_id"]),
                    "name": str(record["name"]),
                }
            )
        else:
            winner_entries.append({"participant_id": "", "name": str(record)})
    return winner_entries


def build_locked_entries(raw_entries: List[str | Dict[str, str]]) -> List[str | Dict[str, str]]:
    """
    Build the locked participant list from raw participant entries.

    Steps, in exact order:
    1. Trim ASD-defined whitespace.
    2. Remove empty entries.
    3. Preserve upload order exactly.
    4. Preserve duplicate entries exactly.
    5. Assign zero-indexed positions implicitly by list position.
    """
    locked_entries: List[str | Dict[str, str]] = []
    for entry in raw_entries:
        if _is_participant_record(entry):
            trimmed = asd_trim(str(entry["name"]))
            participant_id = str(entry["participant_id"])
            if trimmed != "":
                locked_entries.append(
                    {
                        "participant_id": participant_id,
                        "name": trimmed,
                    }
                )
            continue
        trimmed = asd_trim(str(entry))
        if trimmed != "":
            locked_entries.append(trimmed)
    return locked_entries



def serialise_locked_list(locked_entries: List[str | Dict[str, str]]) -> bytes:
    """
    Serialise the locked participant list.

    Entries are joined by U+000A with no trailing newline and then encoded as
    UTF-8.
    """
    return "\n".join(_serialise_locked_entry(entry) for entry in locked_entries).encode("utf-8")


# ---------------------------------------------------------------------------
# ASD Section 7: Salt generation and participant commitment
# ---------------------------------------------------------------------------


def compute_participant_commitment(salt: bytes, locked_entries: List[str | Dict[str, str]]) -> str:
    """
    Compute SHA-256(salt || serialised_locked_list).
    """
    serialised = serialise_locked_list(locked_entries)
    return hashlib.sha256(salt + serialised).hexdigest()


# ---------------------------------------------------------------------------
# ASD Section 8: Exact uniform integer generation
# ---------------------------------------------------------------------------


def minimal_byte_count_for_range(n: int) -> int:
    """
    Return the smallest positive integer b such that 256**b >= n.

    This is the exact byte width required for unbiased rejection sampling over
    the interval [0, n).
    """
    if n < 1:
        raise ValueError(f"n must be >= 1, got {n}")
    return max(1, ((n - 1).bit_length() + 7) // 8)



def uniform_integer_from_bytes(entropy: bytes, offset: int, n: int) -> Tuple[int, int]:
    """
    Generate an exact uniform integer in [0, n) using rejection sampling.

    Returns (index, bytes_consumed). Rejected bytes are counted as consumed.
    """
    if n < 1:
        raise ValueError(f"n must be >= 1, got {n}")
    if offset < 0:
        raise ValueError(f"offset must be >= 0, got {offset}")

    byte_count = minimal_byte_count_for_range(n)
    max_val = 1 << (8 * byte_count)
    threshold = (max_val // n) * n

    total_consumed = 0

    while True:
        read_start = offset + total_consumed
        raw_bytes = entropy[read_start: read_start + byte_count]
        if len(raw_bytes) < byte_count:
            raise ValueError(f"Insufficient entropy at offset {read_start}")

        raw = int.from_bytes(raw_bytes, byteorder="big")
        total_consumed += byte_count

        if raw < threshold:
            return raw % n, total_consumed


# ---------------------------------------------------------------------------
# ASD Section 9: Exact ordered winner selection without replacement
# ---------------------------------------------------------------------------

def select_winners_in_selection_order(
    locked_entries: List[str | Dict[str, str]],
    entropy: bytes,
    offset: int,
    k: int,
) -> Tuple[List[int], List[str | Dict[str, str]], int]:
    """
    Select an exact ordered winner sequence.

    The working array A starts as [0, 1, ..., N-1]. A direct partial
    Fisher-Yates selection is then applied for exactly K steps. The returned
    winner indices are therefore the actual selection sequence, not a sorted
    presentation layer.
    """
    n = len(locked_entries)

    if n == 0:
        raise ValueError("Empty participant list: draw invalid")
    if not isinstance(k, int) or k < 1:
        raise ValueError(f"K must be a positive integer, got {k}")
    if k > n:
        raise ValueError(f"K ({k}) exceeds N ({n}): draw invalid")
    if offset < 0:
        raise ValueError(f"offset must be >= 0, got {offset}")

    working = list(range(n))
    total_consumed = 0

    for t in range(k):
        range_n = n - t
        relative_index, bytes_used = uniform_integer_from_bytes(
            entropy=entropy,
            offset=offset + total_consumed,
            n=range_n,
        )
        total_consumed += bytes_used
        j = t + relative_index
        working[t], working[j] = working[j], working[t]

    winner_indices = working[:k]
    winner_records = [locked_entries[index] for index in winner_indices]
    return winner_indices, winner_records, total_consumed


# ---------------------------------------------------------------------------
# Full draw execution
# ---------------------------------------------------------------------------


def execute_draw(
    raw_entries: List[str | Dict[str, str]],
    entropy: bytes,
    offset_start: int,
    k: int,
    verbose: bool = False,
) -> Dict[str, object]:
    """
    Execute a complete ScirDom draw.

    Entropy consumption order:
    1. Salt bytes are consumed first at offset_start.
    2. Selection bytes are consumed immediately afterwards.
    3. offset_end is one past the final consumed byte.
    4. bytes_consumed = offset_end - offset_start.

    The salt does not influence winner selection. Its only algorithmic role is
    participant commitment computation.
    """
    if not isinstance(k, int) or k < 1:
        raise ValueError(f"K must be a positive integer, got {k}")
    if not isinstance(offset_start, int) or offset_start < 0:
        raise ValueError(
            f"offset_start must be a non-negative integer, got {offset_start}"
        )

    locked_entries = build_locked_entries(raw_entries)
    locked_count = len(locked_entries)

    if locked_count == 0:
        raise ValueError("All entries empty after locked-list construction: draw invalid")
    if k > locked_count:
        raise ValueError(
            f"K ({k}) exceeds locked N ({locked_count}): draw invalid"
        )

    if offset_start + SALT_LENGTH_BYTES > len(entropy):
        raise ValueError("Insufficient entropy for salt")

    salt = entropy[offset_start: offset_start + SALT_LENGTH_BYTES]
    participant_commitment = compute_participant_commitment(salt, locked_entries)

    selection_offset = offset_start + SALT_LENGTH_BYTES
    winner_indices, selected_winner_records, selection_bytes = select_winners_in_selection_order(
        locked_entries=locked_entries,
        entropy=entropy,
        offset=selection_offset,
        k=k,
    )
    winner_entries = _winner_entries(locked_entries, winner_indices)
    winner_identifiers = [winner_entry["name"] for winner_entry in winner_entries]
    winner_indices_locked_order = sorted(winner_indices)
    winner_entries_locked_order = _winner_entries(locked_entries, winner_indices_locked_order)
    winner_identifiers_locked_order = [winner_entry["name"] for winner_entry in winner_entries_locked_order]

    offset_end = selection_offset + selection_bytes
    total_consumed = SALT_LENGTH_BYTES + selection_bytes

    locked_serialised = serialise_locked_list(locked_entries)
    locked_list_sha256 = hashlib.sha256(locked_serialised).hexdigest()

    if verbose:
        print(f"Locked entries ({locked_count}):")
        for index, entry in enumerate(locked_entries):
            print(f"  [{index}] {_serialise_locked_entry(entry)!r}")
        print(f"\nSalt (hex): {salt.hex()}")
        print(f"Participant commitment: {participant_commitment}")
        print(f"Selection bytes consumed: {selection_bytes}")
        print(f"Total bytes consumed: {total_consumed}")
        print(f"Offset range: [{offset_start}, {offset_end})")
        print(f"\nWinner selection sequence ({k}):")
        for winner_index, winner_record in zip(winner_indices, selected_winner_records):
            print(f"  index {winner_index} = {_serialise_locked_entry(winner_record)!r}")

    return {
        "locked_entries": locked_entries,
        "locked_count": locked_count,
        "locked_serialised_hex": locked_serialised.hex(),
        "locked_list_sha256": locked_list_sha256,
        "salt_hex": salt.hex(),
        "participant_commitment_sha256": participant_commitment,
        "offset_start": offset_start,
        "offset_end": offset_end,
        "bytes_consumed": total_consumed,
        "salt_bytes": SALT_LENGTH_BYTES,
        "selection_bytes": selection_bytes,
        "winner_indices": winner_indices,
        "winner_entries": winner_entries,
        "winner_identifiers": winner_identifiers,
        "winner_indices_locked_order": winner_indices_locked_order,
        "winner_entries_locked_order": winner_entries_locked_order,
        "winner_identifiers_locked_order": winner_identifiers_locked_order,
        "k": k,
        "mode": MODE,
    }
