"""
Subvurs Mute Button - Benchmark Module

Standardized benchmarking protocol for measuring noise resilience.
"""

import sys
import json
from datetime import datetime
from typing import List, Optional, Dict

from .mute_button import MuteButton

try:
    from qiskit import transpile
    from qiskit_ibm_runtime import QiskitRuntimeService, SamplerV2
    IBM_AVAILABLE = True
except ImportError:
    IBM_AVAILABLE = False


class MuteButtonBenchmark:
    """
    Standardized Mute Button Benchmark

    Measures encoding resilience across increasing noise depths.

    Usage:
        benchmark = MuteButtonBenchmark()
        job_id = benchmark.run(backend="ibm_torino")

        # Later...
        results = benchmark.check(job_id)
        benchmark.report(results)
    """

    def __init__(
        self,
        mode: str = "ratio",
        depths: List[int] = [0, 5, 10, 15, 20],
        shots: int = 8192
    ):
        """
        Initialize benchmark.

        Args:
            mode: "ratio" or "differential"
            depths: CNOT noise depths to test
            shots: Shots per circuit
        """
        self.mode = mode
        self.depths = depths
        self.shots = shots
        self.mb = MuteButton(mode=mode)

    def build_circuits(self) -> tuple:
        """Build benchmark circuit suite."""
        return self.mb.create_test_suite(depths=self.depths)

    def run(
        self,
        backend: str = "ibm_torino",
        save_job_info: bool = True
    ) -> str:
        """
        Run benchmark on IBM hardware.

        Args:
            backend: IBM backend name
            save_job_info: Whether to save job metadata

        Returns:
            Job ID
        """
        if not IBM_AVAILABLE:
            raise ImportError("qiskit-ibm-runtime required")

        circuits, metadata = self.build_circuits()

        print(f"Subvurs Mute Button Benchmark")
        print(f"=" * 50)
        print(f"Mode: {self.mode}")
        print(f"Depths: {self.depths}")
        print(f"Circuits: {len(circuits)}")
        print(f"Backend: {backend}")
        print(f"Shots: {self.shots:,}")

        service = QiskitRuntimeService()
        backend_obj = service.backend(backend)

        print("\nTranspiling...")
        transpiled = transpile(circuits, backend=backend_obj, optimization_level=1)

        sampler = SamplerV2(mode=backend_obj)

        print("Submitting...")
        job = sampler.run(transpiled, shots=self.shots)
        job_id = job.job_id()

        print(f"Job submitted: {job_id}")

        if save_job_info:
            job_info = {
                "job_id": job_id,
                "backend": backend,
                "shots": self.shots,
                "mode": self.mode,
                "depths": self.depths,
                "submitted": datetime.now().isoformat(),
                "experiment": "mute_button_benchmark",
                "metadata": metadata,
            }

            filename = f"mute_button_benchmark_{job_id}.json"
            with open(filename, 'w') as f:
                json.dump(job_info, f, indent=2)
            print(f"Job info saved: {filename}")

        return job_id

    def check(self, job_id: str) -> Optional[dict]:
        """
        Check benchmark results.

        Args:
            job_id: IBM job ID

        Returns:
            Analysis dictionary or None if job not complete
        """
        if not IBM_AVAILABLE:
            raise ImportError("qiskit-ibm-runtime required")

        service = QiskitRuntimeService()
        job = service.job(job_id)

        status = job.status()
        status_str = status.name if hasattr(status, 'name') else str(status)

        print(f"Job: {job_id}")
        print(f"Status: {status_str}")

        if status_str != "DONE":
            print("Job not complete.")
            return None

        result = job.result()

        # Reconstruct metadata
        metadata = []
        for depth in self.depths:
            metadata.append({
                "name": f"MUTE_ZERO_d{depth}",
                "logical": 0,
                "depth": depth,
                "mode": self.mode,
            })
            metadata.append({
                "name": f"MUTE_ONE_d{depth}",
                "logical": 1,
                "depth": depth,
                "mode": self.mode,
            })

        # Extract counts
        counts_list = []
        for pub_result in result:
            counts = pub_result.data.c.get_counts()
            counts_list.append(counts)

        # Analyze
        analysis = self.mb.analyze_benchmark(counts_list, metadata)
        analysis["job_id"] = job_id

        return analysis

    def report(self, analysis: dict) -> None:
        """
        Print formatted benchmark report.

        Args:
            analysis: Analysis dictionary from check()
        """
        print("\n" + "=" * 60)
        print("  SUBVURS MUTE BUTTON BENCHMARK RESULTS")
        print("=" * 60)

        print(f"\nJob ID: {analysis.get('job_id', 'N/A')}")
        print(f"Mode: {self.mode}")
        print(f"Overall Accuracy: {analysis['accuracy']:.1%}")

        print("\n" + "-" * 60)
        print(f"{'Depth':<10} {'Accuracy':<15} {'Details'}")
        print("-" * 60)

        for depth in sorted(analysis["by_depth"].keys()):
            depth_data = analysis["by_depth"][depth]
            acc = depth_data["correct"] / depth_data["total"]

            details = []
            for r in depth_data["results"]:
                symbol = "✓" if r["correct"] else "✗"
                details.append(f"{r['name'].split('_')[1]}:{symbol}")

            print(f"{depth:<10} {acc:.0%}{'':<10} {' '.join(details)}")

        print("-" * 60)

        # Verdict
        print("\nVERDICT:")
        if analysis["accuracy"] == 1.0:
            print("  PERFECT: 100% decoding accuracy at all depths")
            print("  → Mute Button encoding survives CNOT noise")
        elif analysis["accuracy"] >= 0.8:
            print(f"  GOOD: {analysis['accuracy']:.0%} accuracy")
            print("  → Encoding is noise-resilient but not perfect")
        else:
            print(f"  DEGRADED: {analysis['accuracy']:.0%} accuracy")
            print("  → Noise exceeds encoding resilience")

    def save_results(self, analysis: dict, filename: Optional[str] = None):
        """Save analysis results to JSON."""
        if filename is None:
            job_id = analysis.get('job_id', 'unknown')[:8]
            filename = f"mute_button_results_{job_id}.json"

        with open(filename, 'w') as f:
            json.dump(analysis, f, indent=2)

        print(f"Results saved: {filename}")


def run_benchmark(
    backend: str = "ibm_torino",
    mode: str = "ratio",
    depths: List[int] = [0, 5, 10, 15, 20]
) -> str:
    """
    Quick function to run benchmark.

    Returns job ID.
    """
    benchmark = MuteButtonBenchmark(mode=mode, depths=depths)
    return benchmark.run(backend=backend)


def check_benchmark(
    job_id: str,
    mode: str = "ratio",
    depths: List[int] = [0, 5, 10, 15, 20]
) -> Optional[dict]:
    """
    Quick function to check benchmark results.

    Returns analysis dictionary.
    """
    benchmark = MuteButtonBenchmark(mode=mode, depths=depths)
    analysis = benchmark.check(job_id)

    if analysis:
        benchmark.report(analysis)
        benchmark.save_results(analysis)

    return analysis
