GitHub Actions
Clean sync from GitHub - no large files in history
aca8ab4
"""
Unit tests for MCP arXiv Client.
"""
import os
import pytest
from datetime import datetime
from pathlib import Path
from unittest.mock import Mock, MagicMock, patch, AsyncMock
from typing import Dict, Any
import json
from utils.mcp_arxiv_client import MCPArxivClient
from utils.schemas import Paper
from mcp.types import CallToolResult, TextContent
@pytest.fixture
def mock_mcp_session():
"""Create a mock MCP session."""
session = AsyncMock()
session.call_tool = AsyncMock()
return session
@pytest.fixture
def mcp_client(tmp_path):
"""Create MCPArxivClient with temporary storage."""
return MCPArxivClient(storage_path=str(tmp_path))
@pytest.fixture
def sample_mcp_paper_data():
"""Sample paper data as returned by MCP tools."""
return {
"id": "2401.00001",
"title": "Deep Learning for Image Classification",
"authors": ["John Doe", "Jane Smith"],
"summary": "This paper presents a novel approach to image classification.",
"pdf_url": "https://arxiv.org/pdf/2401.00001.pdf",
"published": "2024-01-01T00:00:00Z",
"categories": ["cs.CV", "cs.AI"]
}
@pytest.fixture
def sample_paper():
"""Create a sample Paper object."""
return Paper(
arxiv_id="2401.00001",
title="Deep Learning for Image Classification",
authors=["John Doe", "Jane Smith"],
abstract="This paper presents a novel approach to image classification.",
pdf_url="https://arxiv.org/pdf/2401.00001.pdf",
published=datetime(2024, 1, 1),
categories=["cs.CV", "cs.AI"]
)
class TestMCPArxivClient:
"""Test suite for MCPArxivClient."""
def test_init(self, tmp_path):
"""Test client initialization."""
client = MCPArxivClient(storage_path=str(tmp_path))
assert client.storage_path == tmp_path
assert tmp_path.exists()
def test_init_default_path(self):
"""Test initialization with default storage path."""
with patch.dict(os.environ, {"MCP_ARXIV_STORAGE_PATH": "data/test_mcp"}):
client = MCPArxivClient()
assert client.storage_path == Path("data/test_mcp")
def test_parse_mcp_paper_success(self, mcp_client, sample_mcp_paper_data):
"""Test parsing MCP paper data into Paper object."""
paper = mcp_client._parse_mcp_paper(sample_mcp_paper_data)
assert isinstance(paper, Paper)
assert paper.arxiv_id == "2401.00001"
assert paper.title == "Deep Learning for Image Classification"
assert paper.authors == ["John Doe", "Jane Smith"]
assert paper.abstract == "This paper presents a novel approach to image classification."
assert paper.pdf_url == "https://arxiv.org/pdf/2401.00001.pdf"
assert paper.categories == ["cs.CV", "cs.AI"]
def test_parse_mcp_paper_with_abstract_field(self, mcp_client):
"""Test parsing when MCP returns 'abstract' instead of 'summary'."""
paper_data = {
"id": "2401.00002",
"title": "Test Paper",
"authors": ["Author A"],
"abstract": "Abstract text here",
"published": "2024-01-01T00:00:00Z",
"categories": ["cs.AI"]
}
paper = mcp_client._parse_mcp_paper(paper_data)
assert paper.abstract == "Abstract text here"
def test_parse_mcp_paper_missing_pdf_url(self, mcp_client):
"""Test parsing generates PDF URL if missing."""
paper_data = {
"id": "2401.00003",
"title": "Test",
"authors": ["Author"],
"summary": "Summary",
"published": "2024-01-01T00:00:00Z",
"categories": ["cs.AI"]
}
paper = mcp_client._parse_mcp_paper(paper_data)
assert paper.pdf_url == "https://arxiv.org/pdf/2401.00003.pdf"
@pytest.mark.asyncio
async def test_search_papers_async_success(self, mcp_client, sample_mcp_paper_data):
"""Test successful async paper search."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {
"papers": [sample_mcp_paper_data]
}
papers = await mcp_client.search_papers_async("deep learning", max_results=5)
assert len(papers) == 1
assert papers[0].arxiv_id == "2401.00001"
assert papers[0].title == "Deep Learning for Image Classification"
# Verify tool was called correctly
mock_call_tool.assert_called_once()
call_args = mock_call_tool.call_args[0]
assert call_args[0] == "search_papers"
assert call_args[1]["query"] == "deep learning"
assert call_args[1]["max_results"] == 5
@pytest.mark.asyncio
async def test_search_papers_async_with_category(self, mcp_client, sample_mcp_paper_data):
"""Test search with category filter."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"papers": [sample_mcp_paper_data]}
papers = await mcp_client.search_papers_async(
"machine learning",
max_results=3,
category="cs.AI"
)
call_args = mock_call_tool.call_args[0]
assert call_args[1]["category"] == "cs.AI"
assert call_args[1]["max_results"] == 3
@pytest.mark.asyncio
async def test_search_papers_async_list_response(self, mcp_client, sample_mcp_paper_data):
"""Test handling MCP response as list instead of dict."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = [sample_mcp_paper_data]
papers = await mcp_client.search_papers_async("test")
assert len(papers) == 1
assert papers[0].arxiv_id == "2401.00001"
@pytest.mark.asyncio
async def test_search_papers_async_no_results(self, mcp_client):
"""Test search with no results."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"papers": []}
papers = await mcp_client.search_papers_async("nonexistent query")
assert len(papers) == 0
def test_search_papers_sync(self, mcp_client, sample_mcp_paper_data):
"""Test synchronous search_papers wrapper."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"papers": [sample_mcp_paper_data]}
papers = mcp_client.search_papers("test query")
assert len(papers) == 1
assert papers[0].arxiv_id == "2401.00001"
@pytest.mark.asyncio
async def test_download_paper_async_success(self, mcp_client, sample_paper, tmp_path):
"""Test successful paper download."""
pdf_path = tmp_path / "2401.00001.pdf"
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"status": "success"}
# Create mock PDF file after the tool call (simulating MCP server behavior)
def create_pdf(*args, **kwargs):
pdf_path.write_text("mock pdf content")
return {"status": "success"}
mock_call_tool.side_effect = create_pdf
result = await mcp_client.download_paper_async(sample_paper)
assert result == pdf_path
assert pdf_path.exists()
# Verify tool was called
mock_call_tool.assert_called_once()
call_args = mock_call_tool.call_args[0]
assert call_args[0] == "download_paper"
assert call_args[1]["paper_id"] == "2401.00001"
@pytest.mark.asyncio
async def test_download_paper_async_already_cached(self, mcp_client, sample_paper, tmp_path):
"""Test downloading already cached paper."""
# Create existing PDF
pdf_path = tmp_path / "2401.00001.pdf"
pdf_path.write_text("existing pdf")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
result = await mcp_client.download_paper_async(sample_paper)
assert result == pdf_path
# Should not call MCP tool if already cached
mock_call_tool.assert_not_called()
@pytest.mark.asyncio
async def test_download_paper_async_failure(self, mcp_client, sample_paper):
"""Test download failure handling."""
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.side_effect = Exception("Download failed")
result = await mcp_client.download_paper_async(sample_paper)
assert result is None
def test_download_paper_sync(self, mcp_client, sample_paper, tmp_path):
"""Test synchronous download_paper wrapper."""
pdf_path = tmp_path / "2401.00001.pdf"
pdf_path.write_text("mock pdf")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"status": "success"}
result = mcp_client.download_paper(sample_paper)
assert result == pdf_path
def test_download_papers_multiple(self, mcp_client, tmp_path):
"""Test downloading multiple papers."""
papers = [
Paper(
arxiv_id=f"2401.0000{i}",
title=f"Paper {i}",
authors=["Author"],
abstract="Abstract",
pdf_url=f"https://arxiv.org/pdf/2401.0000{i}.pdf",
published=datetime(2024, 1, 1),
categories=["cs.AI"]
)
for i in range(1, 4)
]
# Create mock PDFs
for paper in papers:
pdf_path = tmp_path / f"{paper.arxiv_id}.pdf"
pdf_path.write_text("mock content")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"status": "success"}
paths = mcp_client.download_papers(papers)
assert len(paths) == 3
assert all(isinstance(p, Path) for p in paths)
@pytest.mark.asyncio
async def test_get_cached_papers_async_success(self, mcp_client, tmp_path):
"""Test listing cached papers via MCP."""
# Create mock cached PDFs
pdf1 = tmp_path / "2401.00001.pdf"
pdf2 = tmp_path / "2401.00002.pdf"
pdf1.write_text("pdf1")
pdf2.write_text("pdf2")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {
"papers": ["2401.00001", "2401.00002"]
}
paths = await mcp_client.get_cached_papers_async()
assert len(paths) == 2
assert pdf1 in paths
assert pdf2 in paths
@pytest.mark.asyncio
async def test_get_cached_papers_async_fallback(self, mcp_client, tmp_path):
"""Test fallback to filesystem listing on MCP error."""
# Create mock PDFs
pdf1 = tmp_path / "2401.00001.pdf"
pdf1.write_text("pdf1")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.side_effect = Exception("MCP error")
paths = await mcp_client.get_cached_papers_async()
# Should fall back to filesystem glob
assert len(paths) == 1
assert pdf1 in paths
def test_get_cached_papers_sync(self, mcp_client, tmp_path):
"""Test synchronous get_cached_papers wrapper."""
pdf1 = tmp_path / "2401.00001.pdf"
pdf1.write_text("pdf")
with patch.object(mcp_client, '_call_tool') as mock_call_tool:
mock_call_tool.return_value = {"papers": ["2401.00001"]}
paths = mcp_client.get_cached_papers()
assert len(paths) == 1
assert pdf1 in paths
@pytest.mark.asyncio
async def test_call_tool_error_handling(self, mcp_client):
"""Test error handling in _call_tool."""
with patch.object(mcp_client, '_get_session') as mock_get_session:
mock_session = AsyncMock()
mock_session.call_tool.side_effect = Exception("Tool call failed")
mock_get_session.return_value = mock_session
with pytest.raises(Exception, match="Tool call failed"):
await mcp_client._call_tool("test_tool", {})
@pytest.mark.asyncio
async def test_close_session(self, mcp_client):
"""Test closing MCP session."""
mcp_client._session = AsyncMock()
await mcp_client.close()
assert mcp_client._session is None
@pytest.mark.asyncio
async def test_call_tool_with_calltoolresult(self, mcp_client, sample_mcp_paper_data):
"""Test _call_tool properly extracts data from CallToolResult."""
with patch.object(mcp_client, '_get_session') as mock_get_session:
# Create a mock CallToolResult like what real MCP server returns
mock_result_data = {"papers": [sample_mcp_paper_data]}
mock_text_content = TextContent(
type="text",
text=json.dumps(mock_result_data)
)
mock_call_tool_result = CallToolResult(
content=[mock_text_content]
)
# Mock session to return CallToolResult
mock_session = AsyncMock()
mock_session.call_tool.return_value = mock_call_tool_result
mock_get_session.return_value = mock_session
# Call the tool
result = await mcp_client._call_tool("search_papers", {"query": "test"})
# Verify result was properly extracted and parsed
assert isinstance(result, dict)
assert "papers" in result
assert len(result["papers"]) == 1
assert result["papers"][0]["id"] == "2401.00001"
@pytest.mark.asyncio
async def test_search_papers_with_calltools_result(self, mcp_client, sample_mcp_paper_data):
"""Test search_papers_async works with CallToolResult from real MCP server."""
with patch.object(mcp_client, '_get_session') as mock_get_session:
# Simulate real MCP server returning CallToolResult
mock_result_data = {"papers": [sample_mcp_paper_data]}
mock_text_content = TextContent(
type="text",
text=json.dumps(mock_result_data)
)
mock_call_tool_result = CallToolResult(
content=[mock_text_content]
)
mock_session = AsyncMock()
mock_session.call_tool.return_value = mock_call_tool_result
mock_get_session.return_value = mock_session
# Search for papers
papers = await mcp_client.search_papers_async("deep learning")
# Verify papers were properly parsed
assert len(papers) == 1
assert papers[0].arxiv_id == "2401.00001"
assert papers[0].title == "Deep Learning for Image Classification"