Spaces:

stefanches
/

OpenBIDSifier

Sleeping

App Files Files Community

OpenBIDSifier / demo_agent.py

earrieta

Integrated GUI

15b5ec4 about 2 months ago

raw

history blame contribute delete

18.3 kB

	#!/usr/bin/env python
	"""
	Gradio demo UI for the BIDSifierAgent.

	This wraps the existing CLI-style step-wise logic (prompts.py + agent.py)
	into an interactive Gradio interface.

	Requirements
	------------
	pip install gradio bids_validator python-dotenv dspy-ai
	"""

	from __future__ import annotations

	import time
	from bids_validator import BIDSValidator
	import subprocess
	from pathlib import Path
	from typing import Any, Dict, List, Optional, Tuple

	import gradio as gr

	from agent import BIDSifierAgent # your existing agent
	from cli import parse_commands_from_markdown # reuse the CLI helper if available

	# Step mapping: UI label -> agent step id

	BIDSIFIER_STEPS: Dict[str, str] = {
	"1. Summarize dataset": "summary",
	"2. Propose metadata commands": "create_metadata",
	"3. Propose structure commands": "create_structure",
	"4. Propose rename/move commands": "rename_move",
	}

	STEP_LABELS = list(BIDSIFIER_STEPS.keys())
	NUM_STEPS = len(STEP_LABELS)


	# Helpers

	def split_shell_commands(text: str) -> List[str]:
	"""
	Split a multi-line shell script into individual commands.

	Each non-empty line is treated as a separate command, except when a line
	ends with a backslash (\\), in which case it is joined with the following
	line(s) to form a single logical command.

	Parameters
	----------
	text : str
	Multi-line string containing shell commands.

	Returns
	-------
	list of str
	The list of shell commands to execute.
	"""
	commands: List[str] = []
	current: str = ""

	for raw_line in text.splitlines():
	line = raw_line.strip()
	if not line:
	continue

	if current:
	# Continue an ongoing command
	if line.endswith("\\"):
	current += " " + line[:-1].rstrip()
	else:
	current += " " + line
	commands.append(current)
	current = ""
	else:
	# Start a new command
	if line.endswith("\\"):
	current = line[:-1].rstrip()
	else:
	commands.append(line)

	if current:
	commands.append(current)

	return commands


	def build_context(
	dataset_xml: str,
	readme_text: str,
	publication_text: str,
	output_root: str,
	) -> Dict[str, Any]:
	"""
	Build the context dictionary expected by BIDSifierAgent.

	Parameters
	----------
	dataset_xml : str
	Dataset XML content (or empty string).
	readme_text : str
	README text content (or empty string).
	publication_text : str
	Publication/notes content (or empty string).
	output_root : str
	Target BIDS root directory.

	Returns
	-------
	dict
	Context dictionary.
	"""
	return {
	"dataset_xml": dataset_xml or None,
	"readme_text": readme_text or None,
	"publication_text": publication_text or None,
	"output_root": output_root or "./bids_output",
	"user_feedback": "",
	}


	# Core callbacks

	def call_bidsifier_step(
	dataset_xml: str,
	readme_text: str,
	publication_text: str,
	output_root: str,
	provider: str,
	model: str,
	step_label: str,
	manual_prompt: str,
	) -> Tuple[str, str, Dict[str, Any], int]:
	"""
	Call BIDSifierAgent for a given step and return raw output + parsed commands.

	Parameters
	----------
	dataset_xml : str
	Dataset XML content.
	readme_text : str
	README content.
	publication_text : str
	Publication/notes content.
	output_root : str
	Target BIDS root directory.
	provider : str
	LLM provider (e.g. "openai").
	model : str
	LLM model name (e.g. "gpt-5" or "gpt-4o-mini").
	step_label : str
	UI label of the selected step.
	manual_prompt : str
	Optional free-form user override; if non-empty we call `run_query`
	instead of the structured `run_step`.

	Returns
	-------
	llm_output : str
	Raw text returned by the LLM.
	commands_str : str
	Commands extracted from the first fenced bash/sh code block.
	state : dict
	State capturing last call inputs, for potential reuse (e.g. retry).
	step_index : int
	Index of the current step (for progress updates).
	"""
	if not output_root.strip():
	return (
	"⚠️ Please provide an output root before calling BIDSifier.",
	"",
	{},
	0,
	)

	if step_label not in BIDSIFIER_STEPS:
	return (
	"⚠️ Please select a valid BIDSifier step.",
	"",
	{},
	0,
	)

	step_id = BIDSIFIER_STEPS[step_label]
	context = build_context(dataset_xml, readme_text, publication_text, output_root)

	agent = BIDSifierAgent(provider=provider, model=model)

	# Decide whether to use the structured step prompt or a free-form query:
	if manual_prompt.strip():
	llm_output = agent.run_query(manual_prompt)
	else:
	llm_output = agent.run_step(step_id, context)

	# Extract bash commands from fenced block
	commands = parse_commands_from_markdown(llm_output)
	commands_str = "\n".join(commands) if commands else ""

	# Step index for progress bar
	try:
	step_index = STEP_LABELS.index(step_label) + 1
	except ValueError:
	step_index = 0

	state = {
	"dataset_xml": dataset_xml,
	"readme_text": readme_text,
	"publication_text": publication_text,
	"output_root": output_root,
	"provider": provider,
	"model": model,
	"step_label": step_label,
	"step_id": step_id,
	"llm_output": llm_output,
	"commands": commands,
	}

	return llm_output, commands_str, state, step_index


	def confirm_commands(
	last_state: Optional[Dict[str, Any]],
	progress_value: int,
	) -> Tuple[str, str, Dict[str, Any], int, str, str]:
	"""Advance to the next BIDSifier step and call the agent for it.

	Parameters
	----------
	last_state : dict or None
	State from the previous `call_bidsifier_step`.
	progress_value : int
	Current progress value.

	Returns
	-------
	llm_output : str
	Raw output from the agent for the next step.
	commands_str : str
	Parsed commands from that output.
	new_state : dict
	Updated state reflecting the new step.
	new_progress : int
	Updated progress value (1-based index of new step).
	new_step_label : str
	UI label of the advanced step (or unchanged if already at last step).
	status_msg : str
	Short status / info message.
	"""
	if not last_state:
	return (
	"⚠️ No previous BIDSifier step to advance from.",
	"",
	{},
	progress_value,
	STEP_LABELS[0],
	"No state available to confirm.",
	)

	current_label = last_state.get("step_label")
	try:
	idx = STEP_LABELS.index(current_label)
	except (ValueError, TypeError):
	idx = 0

	# If already at last step, do not advance further.
	if idx >= len(STEP_LABELS) - 1:
	return (
	"⚠️ Already at final step; cannot advance.",
	"",
	last_state,
	progress_value,
	current_label,
	"Final step reached.",
	)

	next_label = STEP_LABELS[idx + 1]
	next_id = BIDSIFIER_STEPS[next_label]

	# Rebuild context from last_state.
	context = build_context(
	last_state.get("dataset_xml", "") or "",
	last_state.get("readme_text", "") or "",
	last_state.get("publication_text", "") or "",
	last_state.get("output_root", "") or "",
	)

	agent = BIDSifierAgent(
	provider=last_state.get("provider", "openai"),
	model=last_state.get("model", "gpt-4o-mini"),
	)

	llm_output = agent.run_step(next_id, context)
	commands = parse_commands_from_markdown(llm_output)
	commands_str = "\n".join(commands) if commands else ""

	new_state = dict(last_state)
	new_state.update(
	{
	"step_label": next_label,
	"step_id": next_id,
	"llm_output": llm_output,
	"commands": commands,
	}
	)

	new_progress = max(progress_value, idx + 2) # idx is 0-based; progress is 1-based
	status_msg = f"Advanced to step '{next_label}'. Parsed {len(commands)} command(s)."

	return llm_output, commands_str, new_state, new_progress, next_label, status_msg


	def run_commands(
	last_state: Optional[Dict[str, Any]],
	progress_value: int,
	) -> Tuple[str, int, str]:
	"""Execute parsed shell commands for the current step, then advance step pointer.

	Parameters
	----------
	last_state : dict or None
	State containing commands to execute.
	progress_value : int
	Current progress value.

	Returns
	-------
	execution_log : str
	Markdown log of command execution results.
	new_progress : int
	Updated progress value after execution.
	new_step_label : str
	Updated dropdown label pointing to next step (or unchanged if final).
	"""
	if not last_state:
	return "⚠️ No previous BIDSifier step to run.", progress_value, STEP_LABELS[0]

	output_root = last_state.get("output_root", "").strip()
	commands: List[str] = last_state.get("commands", [])
	step_label = last_state.get("step_label")

	if not output_root:
	return "⚠️ Output root is empty; cannot execute commands.", progress_value, step_label or STEP_LABELS[0]
	if not commands:
	return "⚠️ No commands detected to execute.", progress_value, step_label or STEP_LABELS[0]

	root = Path(output_root)
	root.mkdir(parents=True, exist_ok=True)

	all_details: List[str] = []
	for raw_cmd in commands:
	for cmd in split_shell_commands(raw_cmd):
	proc = subprocess.run(
	cmd,
	shell=True,
	cwd=str(root),
	capture_output=True,
	text=True,
	)
	all_details.append(
	f"Executed: {cmd}\n"
	f"Exit code: {proc.returncode}\n"
	f"Stdout:\n{proc.stdout}\n"
	f"Stderr:\n{proc.stderr}\n" + "-" * 40
	)

	status = "### Command execution log\n\n" + "\n\n".join(all_details)

	try:
	idx = STEP_LABELS.index(step_label)
	except (ValueError, TypeError):
	idx = 0

	# Advance pointer (without auto-calling agent) if not at final step.
	if idx < len(STEP_LABELS) - 1:
	new_step_label = STEP_LABELS[idx + 1]
	new_progress = max(progress_value, idx + 2)
	else:
	new_step_label = STEP_LABELS[idx]
	new_progress = progress_value

	return status, new_progress, new_step_label


	def run_bids_validation(output_root: str) -> Tuple[str, str]:
	"""
	Run the BIDS filename validator on all files under `output_root`.

	Parameters
	----------
	output_root : str
	Root directory of the BIDS dataset.

	Returns
	-------
	report : str
	A Markdown report summarizing which files are BIDS-like and which are not.
	status_token : str
	"pass:<timestamp>" if all files are BIDS-compliant (at least one file),
	otherwise "fail:<timestamp>". The timestamp ensures Gradio's .change
	event fires every time.
	"""
	if not output_root.strip():
	return (
	"⚠️ Please provide an output root before running the BIDS validator.",
	f"fail:{time.time()}",
	)

	root = Path(output_root)
	if not root.exists():
	return (
	f"⚠️ Output root `{output_root}` does not exist. Nothing to validate.",
	f"fail:{time.time()}",
	)

	validator = BIDSValidator()

	lines = []
	valid_count = 0
	invalid_count = 0

	for path in sorted(root.rglob("*")):
	if not path.is_file():
	continue
	rel = path.relative_to(root)
	rel_str = "/" + rel.as_posix()
	is_valid = validator.is_bids(rel_str)
	if is_valid:
	valid_count += 1
	status = "OK"
	else:
	invalid_count += 1
	status = "NOT BIDS"
	lines.append(f"{rel_str}: {status}")

	if not lines:
	return (
	f"Note: No files found under `{output_root}` to validate.",
	f"fail:{time.time()}",
	)

	summary = (
	f"Validated {valid_count + invalid_count} files: "
	f"{valid_count} OK, {invalid_count} NOT BIDS."
	)
	bullet_lines = "\n".join(f"- `{line}`" for line in lines)

	report = f"### BIDS Validator report\n\n{bullet_lines}\n\nSummary: {summary}"

	status_flag = "pass" if invalid_count == 0 and valid_count > 0 else "fail"
	status_token = f"{status_flag}:{time.time()}"
	return report, status_token


	# Gradio UI

	with gr.Blocks(
	title="BIDSifier Agent Interface",
	theme=gr.themes.Citrus(),
	head="""
	<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/confetti.browser.min.js"></script>
	""",
	) as demo:
	gr.Image(
	value="images/bh_logo.png",
	show_label=False,
	height=80,
	elem_id="bh_logo",
	)

	gr.Markdown(
	"""
	# BIDSifier Agent Demo

	Interactive UI wrapping the BIDSifierAgent (CLI logic) to propose
	shell commands for BIDS conversion, step by step.
	Commands are extracted from fenced ```bash```/```sh``` blocks.
	"""
	)

	with gr.Row():
	# File uploader + editable textbox for dataset XML content.
	dataset_xml_file = gr.File(
	label="Upload dataset_structure.xml (optional)",
	file_types=[".xml", ".txt"],
	type="filepath",
	)
	dataset_xml_input = gr.Textbox(
	label="Dataset XML (editable)",
	placeholder="Paste or upload dataset_structure.xml content here",
	lines=8,
	)
	readme_input = gr.Textbox(
	label="README",
	placeholder="Paste README.md content here (optional)",
	lines=8,
	)

	publication_input = gr.Textbox(
	label="Publication / Notes",
	placeholder="Paste relevant publication snippets or notes here (optional)",
	lines=6,
	)

	with gr.Accordion("LLM settings (advanced)", open=False):
	provider_input = gr.Dropdown(
	label="Provider",
	choices=["openai"],
	value="openai",
	)
	model_input = gr.Textbox(
	label="Model",
	value="gpt-4o-mini",
	placeholder="e.g., gpt-4o-mini, gpt-5",
	)

	output_root_input = gr.Textbox(
	label="Output root",
	placeholder="brainmets-bids",
	lines=1,
	)

	step_dropdown = gr.Dropdown(
	label="BIDSifier step",
	choices=STEP_LABELS,
	value=STEP_LABELS[0],
	info="Select the current logical step in the BIDSifier workflow.",
	)

	progress_bar = gr.Slider(
	label="Progress through BIDSifier steps",
	minimum=0,
	maximum=NUM_STEPS,
	step=1,
	value=0,
	interactive=False,
	)

	manual_prompt_input = gr.Textbox(
	label="Override prompt / free-form query (optional)",
	placeholder=(
	"If non-empty, this free-form query will be sent to the agent instead "
	"of the structured step prompt."
	),
	lines=3,
	)

	call_button = gr.Button("Call BIDSifier", variant="primary")

	llm_output_box = gr.Textbox(
	label="Raw BIDSifier output",
	lines=10,
	interactive=True,
	)

	commands_box = gr.Textbox(
	label="Parsed shell commands (from fenced bash block)",
	lines=10,
	interactive=True,
	)

	confirm_button = gr.Button("Confirm (advance & call next step)", variant="primary")
	run_commands_button = gr.Button("Run Commands", variant="secondary")
	bids_validator_button = gr.Button("Run BIDS Validator", variant="primary")

	status_msg = gr.Markdown(label="Status / execution log")
	validation_status = gr.Textbox(visible=False)

	# State to store last agent call for Confirm
	last_state = gr.State(value=None)

	# Wiring

	call_button.click(
	fn=call_bidsifier_step,
	inputs=[
	dataset_xml_input,
	readme_input,
	publication_input,
	output_root_input,
	provider_input,
	model_input,
	step_dropdown,
	manual_prompt_input,
	],
	outputs=[llm_output_box, commands_box, last_state, progress_bar],
	)

	# Callback to load uploaded file content into the textbox.
	def _load_dataset_xml(file_path: Optional[str]) -> str:
	if not file_path:
	return ""
	try:
	return Path(file_path).read_text(encoding="utf-8", errors="ignore")
	except Exception as e:
	return f"⚠️ Failed to read file: {e}"

	dataset_xml_file.change(
	fn=_load_dataset_xml,
	inputs=[dataset_xml_file],
	outputs=[dataset_xml_input],
	)

	confirm_button.click(
	fn=confirm_commands,
	inputs=[last_state, progress_bar],
	outputs=[llm_output_box, commands_box, last_state, progress_bar, step_dropdown, status_msg],
	)

	run_commands_button.click(
	fn=run_commands,
	inputs=[last_state, progress_bar],
	outputs=[status_msg, progress_bar, step_dropdown],
	)

	bids_validator_button.click(
	fn=run_bids_validation,
	inputs=[output_root_input],
	outputs=[status_msg, validation_status],
	)

	validation_status.change(
	fn=None,
	inputs=[validation_status],
	outputs=[],
	js="""
	(value) => {
	if (value && value.startsWith("pass") && window.confetti) {
	window.confetti({
	particleCount: 240,
	spread: 70,
	origin: { y: 0.6 }
	});
	}
	return [];
	}
	""",
	)


	if __name__ == "__main__":
	demo.launch()