# tests/test_file_extractor.py
"""Tests for FileExtractorService."""
import pytest
from fastapi import HTTPException
from app.services.file_extractor_service import FileExtractorService


class TestFileExtractorService:
    """Tests for file extraction functionality."""
    
    def test_is_supported_txt(self):
        """Test txt file support detection."""
        assert FileExtractorService.is_supported("document.txt")
        assert FileExtractorService.is_supported("test.TXT")
    
    def test_is_supported_md(self):
        """Test markdown file support detection."""
        assert FileExtractorService.is_supported("readme.md")
        assert FileExtractorService.is_supported("guide.MD")
    
    def test_is_supported_pdf(self):
        """Test PDF file support detection."""
        assert FileExtractorService.is_supported("document.pdf")
        assert FileExtractorService.is_supported("report.PDF")
    
    def test_is_supported_youtube(self):
        """Test YouTube file support detection."""
        assert FileExtractorService.is_supported("video.youtube")
        assert FileExtractorService.is_supported("lecture.youtube_url")
    
    def test_is_supported_unsupported_type(self):
        """Test unsupported file type."""
        assert not FileExtractorService.is_supported("document.docx")
        assert not FileExtractorService.is_supported("image.png")
        assert not FileExtractorService.is_supported("data.csv")
    
    def test_extract_text_file(self):
        """Test extracting text from .txt file."""
        content = b"Hello, world!\nThis is a test."
        result = FileExtractorService.extract_text("test.txt", content)
        assert result == "Hello, world!\nThis is a test."
    
    def test_extract_markdown_file(self):
        """Test extracting text from .md file."""
        content = b"# Heading\n\nSome **markdown** text."
        result = FileExtractorService.extract_text("readme.md", content)
        assert result == "# Heading\n\nSome **markdown** text."
    
    def test_extract_text_file_utf8_encoded(self):
        """Test extracting UTF-8 encoded text."""
        content = "Café, München, 北京".encode("utf-8")
        result = FileExtractorService.extract_text("test.txt", content)
        assert "Café" in result
        assert "München" in result
        assert "北京" in result
    
    def test_extract_text_file_invalid_encoding(self):
        """Test error on non-UTF-8 text file."""
        content = b"\xff\xfe"  # Invalid UTF-8
        with pytest.raises(HTTPException) as exc_info:
            FileExtractorService.extract_text("test.txt", content)
        assert exc_info.value.status_code == 415
    
    def test_extract_unsupported_type(self):
        """Test error on unsupported file type."""
        with pytest.raises(HTTPException) as exc_info:
            FileExtractorService.extract_text("test.docx", b"content")
        assert exc_info.value.status_code == 415
    
    def test_extract_pdf_file_requires_pypdf2(self):
        """Test PDF extraction requires PyPDF2 library."""
        # This test will pass if PyPDF2 is installed (which it should be)
        # or fail with 503 if not installed
        pdf_content = b"%PDF-1.4\n..."  # Minimal PDF header
        try:
            FileExtractorService.extract_text("test.pdf", pdf_content)
        except HTTPException as e:
            # Either 503 (not installed) or 400 (invalid PDF)
            assert e.status_code in (503, 400)
    
    def test_extract_youtube_requires_library(self):
        """Test YouTube extraction works or fails gracefully."""
        url = b"https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        try:
            result = FileExtractorService.extract_text("video.youtube", url)
            # If successful, result should be a non-empty string
            assert isinstance(result, str)
            assert len(result) > 0
        except HTTPException as e:
            # Either 503 (not installed), 400 (invalid URL/no transcript)
            assert e.status_code in (503, 400)
    
    def test_extract_youtube_invalid_url(self):
        """Test YouTube extraction with invalid URL."""
        url = b"https://example.com"
        with pytest.raises(HTTPException) as exc_info:
            FileExtractorService.extract_text("video.youtube", url)
        assert exc_info.value.status_code == 400
    
    def test_extract_youtube_empty_url(self):
        """Test YouTube extraction with empty URL."""
        url = b""
        with pytest.raises(HTTPException) as exc_info:
            FileExtractorService.extract_text("video.youtube", url)
        assert exc_info.value.status_code == 400
        assert "empty" in exc_info.value.detail.lower()