GitHub Actions
Clean sync from GitHub - no large files in history
aca8ab4
"""
Unit tests for Pydantic schema validators.
Tests the field_validator decorators in utils/schemas.py.
"""
import pytest
from datetime import datetime
from utils.schemas import Analysis, ConsensusPoint, Contradiction, SynthesisResult
class TestAnalysisValidators:
"""Tests for Analysis schema validators."""
def test_citations_with_nested_empty_list(self):
"""Test that nested empty lists in citations are flattened."""
analysis = Analysis(
paper_id="test_id",
methodology="Test methodology",
key_findings=["Finding 1"],
conclusions="Test conclusions",
limitations=["Limit 1"],
citations=["Citation 1", [], "Citation 2"], # Nested empty list
main_contributions=["Contribution 1"],
confidence_score=0.8
)
# Should flatten and remove empty lists
assert analysis.citations == ["Citation 1", "Citation 2"]
def test_citations_with_deeply_nested_lists(self):
"""Test deeply nested lists are flattened."""
analysis = Analysis(
paper_id="test_id",
methodology="Test",
key_findings=[["Nested finding"]],
conclusions="Test",
limitations=[[["Triple nested"]]],
citations=[[["Deep citation"]]],
main_contributions=[],
confidence_score=0.5
)
assert analysis.key_findings == ["Nested finding"]
assert analysis.limitations == ["Triple nested"]
assert analysis.citations == ["Deep citation"]
def test_mixed_types_are_normalized(self):
"""Test that mixed types in lists are handled."""
analysis = Analysis(
paper_id="test_id",
methodology="Test",
key_findings=["Finding", None, 123, ""],
conclusions="Test",
limitations=[456, "Limit"],
citations=["Citation", None, ""],
confidence_score=0.7
)
# None and empty strings filtered out, numbers converted to strings
assert analysis.key_findings == ["Finding", "123"]
assert analysis.limitations == ["456", "Limit"]
assert analysis.citations == ["Citation"]
def test_string_converted_to_list(self):
"""Test that strings in list fields are converted to single-element lists."""
analysis = Analysis(
paper_id="test_id",
methodology="Test",
key_findings="Single finding", # String instead of list
conclusions="Test",
limitations="Single limitation", # String instead of list
citations=[],
confidence_score=0.6
)
assert analysis.key_findings == ["Single finding"]
assert analysis.limitations == ["Single limitation"]
class TestConsensusPointValidators:
"""Tests for ConsensusPoint schema validators."""
def test_supporting_papers_with_nested_lists(self):
"""Test that nested lists in supporting_papers are flattened."""
cp = ConsensusPoint(
statement="Test consensus",
supporting_papers=["paper1", [], ["paper2"]],
citations=["Citation 1", [["Nested citation"]]],
confidence=0.9
)
assert cp.supporting_papers == ["paper1", "paper2"]
assert cp.citations == ["Citation 1", "Nested citation"]
def test_empty_and_none_values_filtered(self):
"""Test that None and empty strings are filtered."""
cp = ConsensusPoint(
statement="Test",
supporting_papers=["paper1", None, "", "paper2"],
citations=["Citation", None],
confidence=0.8
)
assert cp.supporting_papers == ["paper1", "paper2"]
assert cp.citations == ["Citation"]
class TestContradictionValidators:
"""Tests for Contradiction schema validators."""
def test_papers_lists_with_nested_values(self):
"""Test that nested lists in papers_a and papers_b are flattened."""
contr = Contradiction(
topic="Test topic",
viewpoint_a="View A",
papers_a=["paper1", [], "paper2"],
viewpoint_b="View B",
papers_b=[["paper3"], "paper4"],
citations=["Citation 1", [["Nested"]]],
confidence=0.7
)
assert contr.papers_a == ["paper1", "paper2"]
assert contr.papers_b == ["paper3", "paper4"]
assert contr.citations == ["Citation 1", "Nested"]
def test_mixed_types_normalized(self):
"""Test mixed types in papers lists."""
contr = Contradiction(
topic="Test",
viewpoint_a="A",
papers_a=["paper1", 123, None],
viewpoint_b="B",
papers_b=[456, "paper2"],
citations=["Citation"],
confidence=0.6
)
assert contr.papers_a == ["paper1", "123"]
assert contr.papers_b == ["456", "paper2"]
class TestSynthesisResultValidators:
"""Tests for SynthesisResult schema validators."""
def test_research_gaps_with_nested_lists(self):
"""Test that nested lists in research_gaps are flattened."""
synthesis = SynthesisResult(
consensus_points=[],
contradictions=[],
research_gaps=["Gap 1", [["Nested gap"]], None],
summary="Test summary",
confidence_score=0.8,
papers_analyzed=["paper1", [], "paper2"]
)
assert synthesis.research_gaps == ["Gap 1", "Nested gap"]
assert synthesis.papers_analyzed == ["paper1", "paper2"]
def test_string_converted_to_list(self):
"""Test that strings are converted to lists."""
synthesis = SynthesisResult(
consensus_points=[],
contradictions=[],
research_gaps="Single gap", # String instead of list
summary="Test",
confidence_score=0.7,
papers_analyzed="paper1" # String instead of list
)
assert synthesis.research_gaps == ["Single gap"]
assert synthesis.papers_analyzed == ["paper1"]
class TestValidatorsWithRealWorldData:
"""Tests simulating real-world LLM response edge cases."""
def test_llm_returns_empty_arrays_within_citations(self):
"""Simulate the exact bug reported: citations contains empty lists."""
# This is the bug: ["citation 1", [], "citation 2"]
analysis = Analysis(
paper_id="2303.08710v1",
methodology="Deep learning approach",
key_findings=["95% accuracy", [], "Outperforms baselines"],
conclusions="Novel method works well",
limitations=["Limited dataset", []],
citations=["Methodology section", [], "Results section"],
main_contributions=["Novel architecture"],
confidence_score=0.85
)
# Should successfully create Analysis without Pydantic validation errors
assert isinstance(analysis, Analysis)
assert analysis.citations == ["Methodology section", "Results section"]
assert analysis.key_findings == ["95% accuracy", "Outperforms baselines"]
assert analysis.limitations == ["Limited dataset"]
def test_llm_returns_mixed_malformed_data(self):
"""Test extremely malformed data that might come from LLM."""
analysis = Analysis(
paper_id="test_id",
methodology="Test",
key_findings=[[], "Finding", None, [["Nested"]], "", " ", 123],
conclusions="Test",
limitations=[[["Deep"]], None, "Limit", []],
citations=["Citation", [[], []], None, ""],
main_contributions=[None, [], "Contribution", [["Deep contrib"]]],
confidence_score=0.5
)
# All malformed data should be cleaned
assert analysis.key_findings == ["Finding", "Nested", "123"]
assert analysis.limitations == ["Deep", "Limit"]
assert analysis.citations == ["Citation"]
assert analysis.main_contributions == ["Contribution", "Deep contrib"]
if __name__ == "__main__":
pytest.main([__file__, "-v"])