Spaces:

samir72
/

Multi-Agent-Research-Paper-Analysis-System

Running

Multi-Agent-Research-Paper-Analysis-System / tests /test_mcp_arxiv_client.py

GitHub Actions

Clean sync from GitHub - no large files in history

aca8ab4 about 2 months ago

15 kB

	"""
	Unit tests for MCP arXiv Client.
	"""
	import os
	import pytest
	from datetime import datetime
	from pathlib import Path
	from unittest.mock import Mock, MagicMock, patch, AsyncMock
	from typing import Dict, Any
	import json

	from utils.mcp_arxiv_client import MCPArxivClient
	from utils.schemas import Paper
	from mcp.types import CallToolResult, TextContent


	@pytest.fixture
	def mock_mcp_session():
	"""Create a mock MCP session."""
	session = AsyncMock()
	session.call_tool = AsyncMock()
	return session


	@pytest.fixture
	def mcp_client(tmp_path):
	"""Create MCPArxivClient with temporary storage."""
	return MCPArxivClient(storage_path=str(tmp_path))


	@pytest.fixture
	def sample_mcp_paper_data():
	"""Sample paper data as returned by MCP tools."""
	return {
	"id": "2401.00001",
	"title": "Deep Learning for Image Classification",
	"authors": ["John Doe", "Jane Smith"],
	"summary": "This paper presents a novel approach to image classification.",
	"pdf_url": "https://arxiv.org/pdf/2401.00001.pdf",
	"published": "2024-01-01T00:00:00Z",
	"categories": ["cs.CV", "cs.AI"]
	}


	@pytest.fixture
	def sample_paper():
	"""Create a sample Paper object."""
	return Paper(
	arxiv_id="2401.00001",
	title="Deep Learning for Image Classification",
	authors=["John Doe", "Jane Smith"],
	abstract="This paper presents a novel approach to image classification.",
	pdf_url="https://arxiv.org/pdf/2401.00001.pdf",
	published=datetime(2024, 1, 1),
	categories=["cs.CV", "cs.AI"]
	)


	class TestMCPArxivClient:
	"""Test suite for MCPArxivClient."""

	def test_init(self, tmp_path):
	"""Test client initialization."""
	client = MCPArxivClient(storage_path=str(tmp_path))
	assert client.storage_path == tmp_path
	assert tmp_path.exists()

	def test_init_default_path(self):
	"""Test initialization with default storage path."""
	with patch.dict(os.environ, {"MCP_ARXIV_STORAGE_PATH": "data/test_mcp"}):
	client = MCPArxivClient()
	assert client.storage_path == Path("data/test_mcp")

	def test_parse_mcp_paper_success(self, mcp_client, sample_mcp_paper_data):
	"""Test parsing MCP paper data into Paper object."""
	paper = mcp_client._parse_mcp_paper(sample_mcp_paper_data)

	assert isinstance(paper, Paper)
	assert paper.arxiv_id == "2401.00001"
	assert paper.title == "Deep Learning for Image Classification"
	assert paper.authors == ["John Doe", "Jane Smith"]
	assert paper.abstract == "This paper presents a novel approach to image classification."
	assert paper.pdf_url == "https://arxiv.org/pdf/2401.00001.pdf"
	assert paper.categories == ["cs.CV", "cs.AI"]

	def test_parse_mcp_paper_with_abstract_field(self, mcp_client):
	"""Test parsing when MCP returns 'abstract' instead of 'summary'."""
	paper_data = {
	"id": "2401.00002",
	"title": "Test Paper",
	"authors": ["Author A"],
	"abstract": "Abstract text here",
	"published": "2024-01-01T00:00:00Z",
	"categories": ["cs.AI"]
	}
	paper = mcp_client._parse_mcp_paper(paper_data)
	assert paper.abstract == "Abstract text here"

	def test_parse_mcp_paper_missing_pdf_url(self, mcp_client):
	"""Test parsing generates PDF URL if missing."""
	paper_data = {
	"id": "2401.00003",
	"title": "Test",
	"authors": ["Author"],
	"summary": "Summary",
	"published": "2024-01-01T00:00:00Z",
	"categories": ["cs.AI"]
	}
	paper = mcp_client._parse_mcp_paper(paper_data)
	assert paper.pdf_url == "https://arxiv.org/pdf/2401.00003.pdf"

	@pytest.mark.asyncio
	async def test_search_papers_async_success(self, mcp_client, sample_mcp_paper_data):
	"""Test successful async paper search."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {
	"papers": [sample_mcp_paper_data]
	}

	papers = await mcp_client.search_papers_async("deep learning", max_results=5)

	assert len(papers) == 1
	assert papers[0].arxiv_id == "2401.00001"
	assert papers[0].title == "Deep Learning for Image Classification"

	# Verify tool was called correctly
	mock_call_tool.assert_called_once()
	call_args = mock_call_tool.call_args[0]
	assert call_args[0] == "search_papers"
	assert call_args[1]["query"] == "deep learning"
	assert call_args[1]["max_results"] == 5

	@pytest.mark.asyncio
	async def test_search_papers_async_with_category(self, mcp_client, sample_mcp_paper_data):
	"""Test search with category filter."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"papers": [sample_mcp_paper_data]}

	papers = await mcp_client.search_papers_async(
	"machine learning",
	max_results=3,
	category="cs.AI"
	)

	call_args = mock_call_tool.call_args[0]
	assert call_args[1]["category"] == "cs.AI"
	assert call_args[1]["max_results"] == 3

	@pytest.mark.asyncio
	async def test_search_papers_async_list_response(self, mcp_client, sample_mcp_paper_data):
	"""Test handling MCP response as list instead of dict."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = [sample_mcp_paper_data]

	papers = await mcp_client.search_papers_async("test")

	assert len(papers) == 1
	assert papers[0].arxiv_id == "2401.00001"

	@pytest.mark.asyncio
	async def test_search_papers_async_no_results(self, mcp_client):
	"""Test search with no results."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"papers": []}

	papers = await mcp_client.search_papers_async("nonexistent query")

	assert len(papers) == 0

	def test_search_papers_sync(self, mcp_client, sample_mcp_paper_data):
	"""Test synchronous search_papers wrapper."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"papers": [sample_mcp_paper_data]}

	papers = mcp_client.search_papers("test query")

	assert len(papers) == 1
	assert papers[0].arxiv_id == "2401.00001"

	@pytest.mark.asyncio
	async def test_download_paper_async_success(self, mcp_client, sample_paper, tmp_path):
	"""Test successful paper download."""
	pdf_path = tmp_path / "2401.00001.pdf"

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"status": "success"}

	# Create mock PDF file after the tool call (simulating MCP server behavior)
	def create_pdf(args, *kwargs):
	pdf_path.write_text("mock pdf content")
	return {"status": "success"}

	mock_call_tool.side_effect = create_pdf

	result = await mcp_client.download_paper_async(sample_paper)

	assert result == pdf_path
	assert pdf_path.exists()

	# Verify tool was called
	mock_call_tool.assert_called_once()
	call_args = mock_call_tool.call_args[0]
	assert call_args[0] == "download_paper"
	assert call_args[1]["paper_id"] == "2401.00001"

	@pytest.mark.asyncio
	async def test_download_paper_async_already_cached(self, mcp_client, sample_paper, tmp_path):
	"""Test downloading already cached paper."""
	# Create existing PDF
	pdf_path = tmp_path / "2401.00001.pdf"
	pdf_path.write_text("existing pdf")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	result = await mcp_client.download_paper_async(sample_paper)

	assert result == pdf_path
	# Should not call MCP tool if already cached
	mock_call_tool.assert_not_called()

	@pytest.mark.asyncio
	async def test_download_paper_async_failure(self, mcp_client, sample_paper):
	"""Test download failure handling."""
	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.side_effect = Exception("Download failed")

	result = await mcp_client.download_paper_async(sample_paper)

	assert result is None

	def test_download_paper_sync(self, mcp_client, sample_paper, tmp_path):
	"""Test synchronous download_paper wrapper."""
	pdf_path = tmp_path / "2401.00001.pdf"
	pdf_path.write_text("mock pdf")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"status": "success"}

	result = mcp_client.download_paper(sample_paper)

	assert result == pdf_path

	def test_download_papers_multiple(self, mcp_client, tmp_path):
	"""Test downloading multiple papers."""
	papers = [
	Paper(
	arxiv_id=f"2401.0000{i}",
	title=f"Paper {i}",
	authors=["Author"],
	abstract="Abstract",
	pdf_url=f"https://arxiv.org/pdf/2401.0000{i}.pdf",
	published=datetime(2024, 1, 1),
	categories=["cs.AI"]
	)
	for i in range(1, 4)
	]

	# Create mock PDFs
	for paper in papers:
	pdf_path = tmp_path / f"{paper.arxiv_id}.pdf"
	pdf_path.write_text("mock content")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"status": "success"}

	paths = mcp_client.download_papers(papers)

	assert len(paths) == 3
	assert all(isinstance(p, Path) for p in paths)

	@pytest.mark.asyncio
	async def test_get_cached_papers_async_success(self, mcp_client, tmp_path):
	"""Test listing cached papers via MCP."""
	# Create mock cached PDFs
	pdf1 = tmp_path / "2401.00001.pdf"
	pdf2 = tmp_path / "2401.00002.pdf"
	pdf1.write_text("pdf1")
	pdf2.write_text("pdf2")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {
	"papers": ["2401.00001", "2401.00002"]
	}

	paths = await mcp_client.get_cached_papers_async()

	assert len(paths) == 2
	assert pdf1 in paths
	assert pdf2 in paths

	@pytest.mark.asyncio
	async def test_get_cached_papers_async_fallback(self, mcp_client, tmp_path):
	"""Test fallback to filesystem listing on MCP error."""
	# Create mock PDFs
	pdf1 = tmp_path / "2401.00001.pdf"
	pdf1.write_text("pdf1")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.side_effect = Exception("MCP error")

	paths = await mcp_client.get_cached_papers_async()

	# Should fall back to filesystem glob
	assert len(paths) == 1
	assert pdf1 in paths

	def test_get_cached_papers_sync(self, mcp_client, tmp_path):
	"""Test synchronous get_cached_papers wrapper."""
	pdf1 = tmp_path / "2401.00001.pdf"
	pdf1.write_text("pdf")

	with patch.object(mcp_client, '_call_tool') as mock_call_tool:
	mock_call_tool.return_value = {"papers": ["2401.00001"]}

	paths = mcp_client.get_cached_papers()

	assert len(paths) == 1
	assert pdf1 in paths

	@pytest.mark.asyncio
	async def test_call_tool_error_handling(self, mcp_client):
	"""Test error handling in _call_tool."""
	with patch.object(mcp_client, '_get_session') as mock_get_session:
	mock_session = AsyncMock()
	mock_session.call_tool.side_effect = Exception("Tool call failed")
	mock_get_session.return_value = mock_session

	with pytest.raises(Exception, match="Tool call failed"):
	await mcp_client._call_tool("test_tool", {})

	@pytest.mark.asyncio
	async def test_close_session(self, mcp_client):
	"""Test closing MCP session."""
	mcp_client._session = AsyncMock()
	await mcp_client.close()
	assert mcp_client._session is None

	@pytest.mark.asyncio
	async def test_call_tool_with_calltoolresult(self, mcp_client, sample_mcp_paper_data):
	"""Test _call_tool properly extracts data from CallToolResult."""
	with patch.object(mcp_client, '_get_session') as mock_get_session:
	# Create a mock CallToolResult like what real MCP server returns
	mock_result_data = {"papers": [sample_mcp_paper_data]}
	mock_text_content = TextContent(
	type="text",
	text=json.dumps(mock_result_data)
	)
	mock_call_tool_result = CallToolResult(
	content=[mock_text_content]
	)

	# Mock session to return CallToolResult
	mock_session = AsyncMock()
	mock_session.call_tool.return_value = mock_call_tool_result
	mock_get_session.return_value = mock_session

	# Call the tool
	result = await mcp_client._call_tool("search_papers", {"query": "test"})

	# Verify result was properly extracted and parsed
	assert isinstance(result, dict)
	assert "papers" in result
	assert len(result["papers"]) == 1
	assert result["papers"][0]["id"] == "2401.00001"

	@pytest.mark.asyncio
	async def test_search_papers_with_calltools_result(self, mcp_client, sample_mcp_paper_data):
	"""Test search_papers_async works with CallToolResult from real MCP server."""
	with patch.object(mcp_client, '_get_session') as mock_get_session:
	# Simulate real MCP server returning CallToolResult
	mock_result_data = {"papers": [sample_mcp_paper_data]}
	mock_text_content = TextContent(
	type="text",
	text=json.dumps(mock_result_data)
	)
	mock_call_tool_result = CallToolResult(
	content=[mock_text_content]
	)

	mock_session = AsyncMock()
	mock_session.call_tool.return_value = mock_call_tool_result
	mock_get_session.return_value = mock_session

	# Search for papers
	papers = await mcp_client.search_papers_async("deep learning")

	# Verify papers were properly parsed
	assert len(papers) == 1
	assert papers[0].arxiv_id == "2401.00001"
	assert papers[0].title == "Deep Learning for Image Classification"