# app/main_helpers.py
"""Helper functions extracted from main.py for testability."""
import json
import logging
import os
import re
from typing import Any, Dict, List
from fastapi import HTTPException
from langchain_text_splitters import RecursiveCharacterTextSplitter

logger = logging.getLogger(__name__)


def is_safe_filename(name: str) -> bool:
    """Validate filename for security."""
    return (
        bool(re.fullmatch(r"[\w\-.,() ]+", name))
        and ".." not in name
        and "/" not in name
        and "\\" not in name
    )


def get_index_name(course_id: str) -> str:
    """Generate course-specific index name."""
    return f"rag_index_{course_id}"


def get_registry_path(base_path: str, course_id: str) -> str:
    """Generate course-specific registry path."""
    dir_path = os.path.dirname(base_path)
    filename = f"assets_index_{course_id}.json"
    return os.path.join(dir_path, filename)


def rel_source_for(file_id: str) -> str:
    """Generate relative source path for file."""
    return f"./app/data/{file_id}"


def load_registry(registry_path: str) -> Dict[str, Any]:
    """Load asset registry from JSON."""
    if not os.path.exists(registry_path):
        return {}
    try:
        with open(registry_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {}


def save_registry(reg: Dict[str, Any], registry_path: str) -> None:
    """Save asset registry to JSON with atomic write."""
    tmp_path = registry_path + ".tmp"
    with open(tmp_path, "w", encoding="utf-8") as f:
        json.dump(reg, f, ensure_ascii=False, indent=2, default=str)
    os.replace(tmp_path, registry_path)


def chunk_text(text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[str]:
    """Split text into chunks."""

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)


def read_text_file(path: str) -> str:
    """Read UTF-8 text file."""

    try:
        with open(path, "r", encoding="utf-8") as f:
            return f.read()
    except UnicodeDecodeError:
        raise HTTPException(
            status_code=415, detail="Unsupported file encoding. Expecting UTF-8 text."
        )


def mask_redis_url(url: str) -> str:
    """Mask password in Redis URL for logging."""
    try:
        if "@" in url and ":" in url.split("@", 1)[0]:
            prefix, host = url.split("@", 1)
            if prefix.startswith("redis://:"):
                return "redis://:***@" + host
    except Exception:
        pass
    return url


def get_all_courses(base_registry_path: str) -> List[str]:
    """Get all course IDs from registry files."""
    dir_path = os.path.dirname(base_registry_path)
    if not os.path.exists(dir_path):
        return []
    
    courses = []
    try:
        for filename in os.listdir(dir_path):
            if filename.startswith("assets_index_") and filename.endswith(".json"):
                # Extract course_id from assets_index_{course_id}.json
                course_id = filename[len("assets_index_"):-len(".json")]
                courses.append(course_id)
    except Exception as e:
        logger.error("Failed to list courses: %s", e)
        return []
    
    return sorted(courses)
