BioMCP Research Workflows¶
Quick, practical workflows for common biomedical research tasks.
1. Literature Review Workflow¶
Quick Start¶
# Find key papers on BRAF V600E melanoma therapy
biomcp article search --gene BRAF --disease melanoma \
--keyword "V600E|therapy|treatment" --limit 50 \
--format json > braf_papers.json
Full Workflow Script¶
import asyncio
from biomcp import BioMCPClient
async def literature_review(gene, disease, focus_terms):
async with BioMCPClient() as client:
# 1. Get gene context
gene_info = await client.genes.get(gene)
# 2. Search by topic
results = {}
for term in focus_terms:
articles = await client.articles.search(
genes=[gene],
diseases=[disease],
keywords=[term],
limit=30
)
results[term] = articles.articles
# 3. Generate summary
print(f"\n{gene} in {disease}: Found {sum(len(v) for v in results.values())} articles")
for topic, articles in results.items():
print(f"\n{topic}: {len(articles)} articles")
for a in articles[:3]:
print(f" - {a.title[:80]}... ({a.year})")
return results
# Run it
asyncio.run(literature_review(
"BRAF",
"melanoma",
["resistance", "combination therapy", "immunotherapy"]
))
Key Points¶
- Start broad, then narrow by topic
- Use OR syntax for variant notations
- Export results for citation management
- Set up weekly searches for updates
2. Clinical Trial Matching Workflow¶
Quick Start¶
# Find trials for EGFR-mutant lung cancer near Boston
biomcp trial search --condition "lung cancer" \
--term "EGFR mutation" --status RECRUITING \
--latitude 42.3601 --longitude -71.0589 --distance 100
Patient Matching Script¶
async def match_patient_to_trials(patient_profile):
async with BioMCPClient() as client:
# 1. Search trials with location
trials = await client.trials.search(
conditions=[patient_profile['diagnosis']],
other_terms=patient_profile['mutations'],
lat=patient_profile['lat'],
long=patient_profile['long'],
distance=patient_profile['max_distance'],
status="RECRUITING"
)
# 2. Score trials
scored = []
for trial in trials.trials[:20]:
score = 0
# Location score
if trial.distance < 50:
score += 25
# Phase score
if trial.phase == "PHASE3":
score += 20
elif trial.phase == "PHASE2":
score += 15
# Mutation match
if any(mut in str(trial.eligibility) for mut in patient_profile['mutations']):
score += 30
scored.append((score, trial))
# 3. Return top matches
scored.sort(reverse=True, key=lambda x: x[0])
return [(s, t) for s, t in scored[:5]]
# Example patient
patient = {
'diagnosis': 'non-small cell lung cancer',
'mutations': ['EGFR L858R'],
'lat': 42.3601,
'long': -71.0589,
'max_distance': 100
}
matches = asyncio.run(match_patient_to_trials(patient))
Key Points¶
- Always use coordinates for location search
- Check both ClinicalTrials.gov and NCI sources
- Contact trial sites directly for pre-screening
- Consider travel burden in recommendations
3. Variant Interpretation Workflow¶
Quick Start¶
# Get variant annotations
biomcp variant get rs121913529 # By rsID
biomcp variant get "NM_007294.4:c.5266dupC" # By HGVS
# Search pathogenic variants
biomcp variant search --gene BRCA1 --significance pathogenic
Variant Analysis Script¶
async def interpret_variant(gene, variant_notation, cancer_type):
async with BioMCPClient() as client:
# 1. Get variant details
try:
variant = await client.variants.get(variant_notation)
significance = variant.clinical_significance
frequency = variant.frequencies.gnomad if hasattr(variant, 'frequencies') else None
except:
significance = "Not found"
frequency = None
# 2. Search literature
articles = await client.articles.search(
genes=[gene],
variants=[variant_notation],
diseases=[cancer_type],
limit=10
)
# 3. Find trials
trials = await client.trials.search(
conditions=[cancer_type],
other_terms=[f"{gene} mutation"],
status="RECRUITING",
limit=5
)
# 4. Generate interpretation
print(f"\nVariant: {gene} {variant_notation}")
print(f"Significance: {significance}")
print(f"Population Frequency: {frequency or 'Unknown'}")
print(f"Literature: {len(articles.articles)} relevant papers")
print(f"Clinical Trials: {len(trials.trials)} active trials")
# Actionability assessment
if significance in ["Pathogenic", "Likely pathogenic"]:
if trials.trials:
print("✓ ACTIONABLE - Clinical trials available")
else:
print("⚠ Pathogenic but no targeted trials")
return {
'significance': significance,
'frequency': frequency,
'articles': len(articles.articles),
'trials': len(trials.trials)
}
# Run it
asyncio.run(interpret_variant("BRAF", "p.V600E", "melanoma"))
Key Points¶
- Check multiple databases (MyVariant, ClinVar via articles)
- Consider cancer type for interpretation
- Look for FDA-approved therapies
- Document tier classification
4. Quick Integration Patterns¶
Batch Processing¶
# Process multiple queries efficiently
async def batch_analysis(items):
async with BioMCPClient() as client:
tasks = []
for item in items:
if item['type'] == 'gene':
tasks.append(client.genes.get(item['id']))
elif item['type'] == 'variant':
tasks.append(client.variants.get(item['id']))
results = await asyncio.gather(*tasks, return_exceptions=True)
return results
Error Handling¶
from biomcp.exceptions import NotFoundError, RateLimitError
import time
async def robust_search(search_func, **params):
retries = 3
for attempt in range(retries):
try:
return await search_func(**params)
except RateLimitError as e:
if attempt < retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
else:
raise
except NotFoundError:
return None
Caching Results¶
from functools import lru_cache
import json
# Simple file-based cache
def cache_results(filename):
def decorator(func):
async def wrapper(*args, **kwargs):
# Check cache
try:
with open(filename, 'r') as f:
return json.load(f)
except FileNotFoundError:
pass
# Fetch and cache
result = await func(*args, **kwargs)
with open(filename, 'w') as f:
json.dump(result, f)
return result
return wrapper
return decorator
@cache_results('gene_cache.json')
async def get_gene_info(gene):
async with BioMCPClient() as client:
return await client.genes.get(gene)
Complete Example: Precision Medicine Report¶
async def generate_precision_medicine_report(patient):
"""Generate comprehensive report for molecular tumor board."""
async with BioMCPClient() as client:
report = {
'patient_id': patient['id'],
'date': datetime.now().isoformat(),
'variants': [],
'trials': [],
'therapies': []
}
# Analyze each variant
for variant in patient['variants']:
# Get annotations
var_info = await robust_search(
client.variants.search,
gene=variant['gene'],
hgvs=variant['hgvs']
)
# Search literature
articles = await client.articles.search(
genes=[variant['gene']],
diseases=[patient['cancer_type']],
keywords=['therapy', 'treatment'],
limit=5
)
# Find trials
trials = await client.trials.search(
conditions=[patient['cancer_type']],
other_terms=[f"{variant['gene']} mutation"],
status="RECRUITING",
limit=3
)
report['variants'].append({
'variant': variant,
'annotation': var_info,
'relevant_articles': len(articles.articles),
'available_trials': len(trials.trials)
})
report['trials'].extend(trials.trials)
# Generate summary
print(f"\nPrecision Medicine Report - {patient['id']}")
print(f"Cancer Type: {patient['cancer_type']}")
print(f"Variants Analyzed: {len(report['variants'])}")
print(f"Clinical Trials Found: {len(report['trials'])}")
# Prioritize actionable findings
actionable = [v for v in report['variants']
if v['available_trials'] > 0]
if actionable:
print(f"\n✓ {len(actionable)} ACTIONABLE variants with trial options")
return report
# Example usage
patient = {
'id': 'PT001',
'cancer_type': 'lung adenocarcinoma',
'variants': [
{'gene': 'EGFR', 'hgvs': 'p.L858R'},
{'gene': 'TP53', 'hgvs': 'p.R273H'}
]
}
report = asyncio.run(generate_precision_medicine_report(patient))
Tips for All Workflows¶
- Always start with the think tool (for AI assistants)
- Use official gene symbols - check genenames.org
- Batch API calls when possible
- Handle errors gracefully - APIs can be unavailable
- Cache frequently accessed data - respect rate limits
- Document your process - for reproducibility