#!/usr/bin/env python3
"""
RAG CLI – Variante 2: automatische venv + pip Installation.
Beim ersten Start wird eine venv unter ~/.ragindex/venv erstellt
und alle Abhängigkeiten installiert. Danach startet das Skript
sich selbst innerhalb der venv neu.

Verwendung: python rag_venv.py /pfad/zum/verzeichnis [--reindex]
"""

import sys
import os
from pathlib import Path

# ──────────────────────────────────────────────
# KONFIGURATION
# ──────────────────────────────────────────────
LLAMA_BIN_DIR    = Path("~/llama.cpp/bin/builds").expanduser()
LLAMA_SERVER_BIN = LLAMA_BIN_DIR / "llama-server"

CHAT_MODEL       = Path("~/models/chat.gguf").expanduser()
EMBED_MODEL      = Path("~/models/embed.gguf").expanduser()

CHAT_PORT        = 8080
EMBED_PORT       = 8081
GPU_LAYERS       = 99

CHAT_BASE_URL    = f"http://localhost:{CHAT_PORT}"
EMBED_BASE_URL   = f"http://localhost:{EMBED_PORT}"

INDEX_ROOT       = Path("~/.ragindex").expanduser()
VENV_DIR         = INDEX_ROOT / "venv"
CHUNK_SIZE       = 512
CHUNK_OVERLAP    = 64
TOP_K            = 4

REQUIRED_PACKAGES = [
    "llama-index",
    "llama-index-llms-openai",
    "llama-index-embeddings-openai",
]
# ──────────────────────────────────────────────


def in_venv() -> bool:
    return sys.prefix != sys.base_prefix


def bootstrap_venv():
    """venv erstellen, Pakete installieren, Skript neu starten."""
    import subprocess

    venv_python = VENV_DIR / "bin" / "python"

    if not VENV_DIR.exists():
        print(f"[*] Erstelle venv unter {VENV_DIR} ...")
        INDEX_ROOT.mkdir(parents=True, exist_ok=True)
        subprocess.check_call([sys.executable, "-m", "venv", str(VENV_DIR)])

    print("[*] Installiere/aktualisiere Abhängigkeiten ...")
    subprocess.check_call([
        str(venv_python), "-m", "pip", "install", "--quiet", "--upgrade",
        *REQUIRED_PACKAGES,
    ])

    print("[*] Starte Skript in venv neu ...\n")
    os.execv(str(venv_python), [str(venv_python), __file__] + sys.argv[1:])


# Bootstrap: falls nicht in venv, venv aufsetzen und neu starten
if not in_venv():
    bootstrap_venv()

# Ab hier läuft der Code garantiert in der venv
import hashlib
import json
import subprocess
import time
import signal
import atexit

_processes = []


def start_server(model: Path, port: int, embeddings: bool) -> subprocess.Popen:
    cmd = [
        str(LLAMA_SERVER_BIN),
        "-m", str(model),
        "--port", str(port),
        "-ngl", str(GPU_LAYERS),
        "--host", "127.0.0.1",
    ]
    if embeddings:
        cmd.append("--embeddings")

    label = "embed" if embeddings else "chat"
    print(f"[*] Starte {label}-server auf Port {port} ...")
    proc = subprocess.Popen(
        cmd,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )
    _processes.append(proc)
    return proc


def wait_for_server(base_url: str, timeout: int = 60) -> bool:
    import urllib.request
    deadline = time.time() + timeout
    while time.time() < deadline:
        try:
            urllib.request.urlopen(f"{base_url}/health", timeout=2)
            return True
        except Exception:
            time.sleep(1)
    return False


def stop_servers():
    for proc in _processes:
        try:
            proc.terminate()
            proc.wait(timeout=5)
        except Exception:
            proc.kill()
    print("[*] Server gestoppt.")


def get_index_path(directory: Path) -> Path:
    h = hashlib.md5(str(directory.resolve()).encode()).hexdigest()[:10]
    return INDEX_ROOT / f"{directory.name}_{h}"


def build_index(directory: Path, index_path: Path):
    from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
    from llama_index.core.node_parser import SentenceSplitter
    from llama_index.embeddings.openai import OpenAIEmbedding
    from llama_index.llms.openai import OpenAI as LlamaOpenAI

    Settings.llm = LlamaOpenAI(
        model="local", api_base=f"{CHAT_BASE_URL}/v1", api_key="none",
    )
    Settings.embed_model = OpenAIEmbedding(
        model="local", api_base=f"{EMBED_BASE_URL}/v1", api_key="none",
    )
    Settings.node_parser = SentenceSplitter(
        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP,
    )

    print(f"[*] Lade Dokumente aus {directory} ...")
    docs = SimpleDirectoryReader(
        str(directory), recursive=True, exclude_hidden=True
    ).load_data()
    print(f"[*] {len(docs)} Dokument(e) geladen, erstelle Embeddings ...")

    index = VectorStoreIndex.from_documents(docs, show_progress=True)
    index_path.mkdir(parents=True, exist_ok=True)
    index.storage_context.persist(persist_dir=str(index_path))

    meta = {"directory": str(directory.resolve()), "docs": len(docs)}
    (index_path / "meta.json").write_text(json.dumps(meta, indent=2))
    print(f"[*] Index gespeichert: {index_path}")
    return index


def load_index(index_path: Path):
    from llama_index.core import StorageContext, load_index_from_storage, Settings
    from llama_index.embeddings.openai import OpenAIEmbedding
    from llama_index.llms.openai import OpenAI as LlamaOpenAI

    Settings.llm = LlamaOpenAI(
        model="local", api_base=f"{CHAT_BASE_URL}/v1", api_key="none",
    )
    Settings.embed_model = OpenAIEmbedding(
        model="local", api_base=f"{EMBED_BASE_URL}/v1", api_key="none",
    )

    print(f"[*] Lade Index aus {index_path} ...")
    storage_context = StorageContext.from_defaults(persist_dir=str(index_path))
    return load_index_from_storage(storage_context)


def chat_loop(index):
    query_engine = index.as_query_engine(similarity_top_k=TOP_K)
    print("\nBereit. Fragen stellen (exit / Ctrl+C zum Beenden):\n")
    while True:
        try:
            question = input("Du: ").strip()
        except (KeyboardInterrupt, EOFError):
            print("\nTschüss.")
            break
        if not question:
            continue
        if question.lower() in ("exit", "quit", "q"):
            print("Tschüss.")
            break
        response = query_engine.query(question)
        print(f"\nAssistent: {response}\n")


def main():
    if len(sys.argv) < 2:
        print("Verwendung: python rag_venv.py <verzeichnis> [--reindex]")
        sys.exit(1)

    directory = Path(sys.argv[1]).expanduser().resolve()
    force_reindex = "--reindex" in sys.argv

    if not directory.is_dir():
        print(f"Fehler: '{directory}' ist kein Verzeichnis.")
        sys.exit(1)

    for path, label in [(LLAMA_SERVER_BIN, "llama-server"), (CHAT_MODEL, "Chat-Modell"), (EMBED_MODEL, "Embedding-Modell")]:
        if not path.exists():
            print(f"Fehler: {label} nicht gefunden: {path}")
            sys.exit(1)

    atexit.register(stop_servers)
    signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))

    start_server(EMBED_MODEL, EMBED_PORT, embeddings=True)
    start_server(CHAT_MODEL,  CHAT_PORT,  embeddings=False)

    print("[*] Warte auf Server ...")
    for url, label in [(EMBED_BASE_URL, "Embed"), (CHAT_BASE_URL, "Chat")]:
        if not wait_for_server(url):
            print(f"Fehler: {label}-Server nicht erreichbar ({url})")
            sys.exit(1)
    print("[*] Beide Server bereit.")

    index_path = get_index_path(directory)
    already_indexed = (index_path / "meta.json").exists()

    if force_reindex or not already_indexed:
        index = build_index(directory, index_path)
    else:
        meta = json.loads((index_path / "meta.json").read_text())
        print(f"[*] Bestehender Index: {meta['docs']} Docs  –  {meta['directory']}")
        print("    Mit --reindex neu aufbauen.")
        index = load_index(index_path)

    chat_loop(index)


if __name__ == "__main__":
    main()
