|
|
""" |
|
|
Unit tests for Pydantic schema validators. |
|
|
Tests the field_validator decorators in utils/schemas.py. |
|
|
""" |
|
|
import pytest |
|
|
from datetime import datetime |
|
|
|
|
|
from utils.schemas import Analysis, ConsensusPoint, Contradiction, SynthesisResult |
|
|
|
|
|
|
|
|
class TestAnalysisValidators: |
|
|
"""Tests for Analysis schema validators.""" |
|
|
|
|
|
def test_citations_with_nested_empty_list(self): |
|
|
"""Test that nested empty lists in citations are flattened.""" |
|
|
analysis = Analysis( |
|
|
paper_id="test_id", |
|
|
methodology="Test methodology", |
|
|
key_findings=["Finding 1"], |
|
|
conclusions="Test conclusions", |
|
|
limitations=["Limit 1"], |
|
|
citations=["Citation 1", [], "Citation 2"], |
|
|
main_contributions=["Contribution 1"], |
|
|
confidence_score=0.8 |
|
|
) |
|
|
|
|
|
|
|
|
assert analysis.citations == ["Citation 1", "Citation 2"] |
|
|
|
|
|
def test_citations_with_deeply_nested_lists(self): |
|
|
"""Test deeply nested lists are flattened.""" |
|
|
analysis = Analysis( |
|
|
paper_id="test_id", |
|
|
methodology="Test", |
|
|
key_findings=[["Nested finding"]], |
|
|
conclusions="Test", |
|
|
limitations=[[["Triple nested"]]], |
|
|
citations=[[["Deep citation"]]], |
|
|
main_contributions=[], |
|
|
confidence_score=0.5 |
|
|
) |
|
|
|
|
|
assert analysis.key_findings == ["Nested finding"] |
|
|
assert analysis.limitations == ["Triple nested"] |
|
|
assert analysis.citations == ["Deep citation"] |
|
|
|
|
|
def test_mixed_types_are_normalized(self): |
|
|
"""Test that mixed types in lists are handled.""" |
|
|
analysis = Analysis( |
|
|
paper_id="test_id", |
|
|
methodology="Test", |
|
|
key_findings=["Finding", None, 123, ""], |
|
|
conclusions="Test", |
|
|
limitations=[456, "Limit"], |
|
|
citations=["Citation", None, ""], |
|
|
confidence_score=0.7 |
|
|
) |
|
|
|
|
|
|
|
|
assert analysis.key_findings == ["Finding", "123"] |
|
|
assert analysis.limitations == ["456", "Limit"] |
|
|
assert analysis.citations == ["Citation"] |
|
|
|
|
|
def test_string_converted_to_list(self): |
|
|
"""Test that strings in list fields are converted to single-element lists.""" |
|
|
analysis = Analysis( |
|
|
paper_id="test_id", |
|
|
methodology="Test", |
|
|
key_findings="Single finding", |
|
|
conclusions="Test", |
|
|
limitations="Single limitation", |
|
|
citations=[], |
|
|
confidence_score=0.6 |
|
|
) |
|
|
|
|
|
assert analysis.key_findings == ["Single finding"] |
|
|
assert analysis.limitations == ["Single limitation"] |
|
|
|
|
|
|
|
|
class TestConsensusPointValidators: |
|
|
"""Tests for ConsensusPoint schema validators.""" |
|
|
|
|
|
def test_supporting_papers_with_nested_lists(self): |
|
|
"""Test that nested lists in supporting_papers are flattened.""" |
|
|
cp = ConsensusPoint( |
|
|
statement="Test consensus", |
|
|
supporting_papers=["paper1", [], ["paper2"]], |
|
|
citations=["Citation 1", [["Nested citation"]]], |
|
|
confidence=0.9 |
|
|
) |
|
|
|
|
|
assert cp.supporting_papers == ["paper1", "paper2"] |
|
|
assert cp.citations == ["Citation 1", "Nested citation"] |
|
|
|
|
|
def test_empty_and_none_values_filtered(self): |
|
|
"""Test that None and empty strings are filtered.""" |
|
|
cp = ConsensusPoint( |
|
|
statement="Test", |
|
|
supporting_papers=["paper1", None, "", "paper2"], |
|
|
citations=["Citation", None], |
|
|
confidence=0.8 |
|
|
) |
|
|
|
|
|
assert cp.supporting_papers == ["paper1", "paper2"] |
|
|
assert cp.citations == ["Citation"] |
|
|
|
|
|
|
|
|
class TestContradictionValidators: |
|
|
"""Tests for Contradiction schema validators.""" |
|
|
|
|
|
def test_papers_lists_with_nested_values(self): |
|
|
"""Test that nested lists in papers_a and papers_b are flattened.""" |
|
|
contr = Contradiction( |
|
|
topic="Test topic", |
|
|
viewpoint_a="View A", |
|
|
papers_a=["paper1", [], "paper2"], |
|
|
viewpoint_b="View B", |
|
|
papers_b=[["paper3"], "paper4"], |
|
|
citations=["Citation 1", [["Nested"]]], |
|
|
confidence=0.7 |
|
|
) |
|
|
|
|
|
assert contr.papers_a == ["paper1", "paper2"] |
|
|
assert contr.papers_b == ["paper3", "paper4"] |
|
|
assert contr.citations == ["Citation 1", "Nested"] |
|
|
|
|
|
def test_mixed_types_normalized(self): |
|
|
"""Test mixed types in papers lists.""" |
|
|
contr = Contradiction( |
|
|
topic="Test", |
|
|
viewpoint_a="A", |
|
|
papers_a=["paper1", 123, None], |
|
|
viewpoint_b="B", |
|
|
papers_b=[456, "paper2"], |
|
|
citations=["Citation"], |
|
|
confidence=0.6 |
|
|
) |
|
|
|
|
|
assert contr.papers_a == ["paper1", "123"] |
|
|
assert contr.papers_b == ["456", "paper2"] |
|
|
|
|
|
|
|
|
class TestSynthesisResultValidators: |
|
|
"""Tests for SynthesisResult schema validators.""" |
|
|
|
|
|
def test_research_gaps_with_nested_lists(self): |
|
|
"""Test that nested lists in research_gaps are flattened.""" |
|
|
synthesis = SynthesisResult( |
|
|
consensus_points=[], |
|
|
contradictions=[], |
|
|
research_gaps=["Gap 1", [["Nested gap"]], None], |
|
|
summary="Test summary", |
|
|
confidence_score=0.8, |
|
|
papers_analyzed=["paper1", [], "paper2"] |
|
|
) |
|
|
|
|
|
assert synthesis.research_gaps == ["Gap 1", "Nested gap"] |
|
|
assert synthesis.papers_analyzed == ["paper1", "paper2"] |
|
|
|
|
|
def test_string_converted_to_list(self): |
|
|
"""Test that strings are converted to lists.""" |
|
|
synthesis = SynthesisResult( |
|
|
consensus_points=[], |
|
|
contradictions=[], |
|
|
research_gaps="Single gap", |
|
|
summary="Test", |
|
|
confidence_score=0.7, |
|
|
papers_analyzed="paper1" |
|
|
) |
|
|
|
|
|
assert synthesis.research_gaps == ["Single gap"] |
|
|
assert synthesis.papers_analyzed == ["paper1"] |
|
|
|
|
|
|
|
|
class TestValidatorsWithRealWorldData: |
|
|
"""Tests simulating real-world LLM response edge cases.""" |
|
|
|
|
|
def test_llm_returns_empty_arrays_within_citations(self): |
|
|
"""Simulate the exact bug reported: citations contains empty lists.""" |
|
|
|
|
|
analysis = Analysis( |
|
|
paper_id="2303.08710v1", |
|
|
methodology="Deep learning approach", |
|
|
key_findings=["95% accuracy", [], "Outperforms baselines"], |
|
|
conclusions="Novel method works well", |
|
|
limitations=["Limited dataset", []], |
|
|
citations=["Methodology section", [], "Results section"], |
|
|
main_contributions=["Novel architecture"], |
|
|
confidence_score=0.85 |
|
|
) |
|
|
|
|
|
|
|
|
assert isinstance(analysis, Analysis) |
|
|
assert analysis.citations == ["Methodology section", "Results section"] |
|
|
assert analysis.key_findings == ["95% accuracy", "Outperforms baselines"] |
|
|
assert analysis.limitations == ["Limited dataset"] |
|
|
|
|
|
def test_llm_returns_mixed_malformed_data(self): |
|
|
"""Test extremely malformed data that might come from LLM.""" |
|
|
analysis = Analysis( |
|
|
paper_id="test_id", |
|
|
methodology="Test", |
|
|
key_findings=[[], "Finding", None, [["Nested"]], "", " ", 123], |
|
|
conclusions="Test", |
|
|
limitations=[[["Deep"]], None, "Limit", []], |
|
|
citations=["Citation", [[], []], None, ""], |
|
|
main_contributions=[None, [], "Contribution", [["Deep contrib"]]], |
|
|
confidence_score=0.5 |
|
|
) |
|
|
|
|
|
|
|
|
assert analysis.key_findings == ["Finding", "Nested", "123"] |
|
|
assert analysis.limitations == ["Deep", "Limit"] |
|
|
assert analysis.citations == ["Citation"] |
|
|
assert analysis.main_contributions == ["Contribution", "Deep contrib"] |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
pytest.main([__file__, "-v"]) |
|
|
|