bmrb_api Module
The bmrb_api module provides programmatic access to experimental NMR data from the Biological Magnetic Resonance Data Bank (BMRB) and geometric validation metrics from the Protein Data Bank (PDB).
Overview
A critical part of validating synthetic protein models is comparing them to real-world experimental data. This module allows users to fetch ground-truth restraints, chemical shifts, and geometric quality reports to ensure their synthetic structures are biologically realistic.
Key Features
- Experimental Restraints: Fetch distance constraints (NOEs) that define the 3D fold of a protein.
- Chemical Shifts: Download peer-reviewed chemical shift assignments for structural validation.
- PDBe Validation: Retrieve summary reports and outlier lists (Ramachandran, bond lengths, etc.) for established PDB entries.
- Automated Downloads: Easily download PDB files from RCSB for benchmarking.
API Reference
bmrb_api
Classes
BMRBAPI
Interface to Biological Magnetic Resonance Data Bank (BMRB) API.
This provides empirical validation data for NMR structures by fetching
peer-reviewed experimental restraints and chemical shifts.
Source code in synth_pdb/bmrb_api.py
| class BMRBAPI:
"""Interface to Biological Magnetic Resonance Data Bank (BMRB) API.
This provides empirical validation data for NMR structures by fetching
peer-reviewed experimental restraints and chemical shifts.
"""
BASE_URL = "https://api.bmrb.io/v2"
@staticmethod
def get_entry_metadata(bmrb_id: str) -> dict[str, Any]:
"""Fetch metadata for a BMRB entry.
Args:
bmrb_id: BMRB ID (e.g., '6457' for Ubiquitin).
"""
url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}"
response = requests.get(url)
response.raise_for_status()
data = response.json()
return cast(dict[str, Any], data.get(bmrb_id, {}))
@staticmethod
def search_entries_with_restraints(search_term: str = "ubiquitin") -> list[str]:
"""Search for BMRB entries that likely have restraint data."""
url = f"{BMRBAPI.BASE_URL}/search/entry?q={search_term}&field=nmr_star_loop_category&value=_Gen_dist_constraint"
try:
response = requests.get(url)
if response.status_code == 200:
return [item["entry_id"] for item in response.json().get("results", [])]
except Exception:
pass
return []
@staticmethod
def fetch_restraints(bmrb_id: str) -> list[dict[str, Any]]:
"""Fetch distance restraints from BMRB.
SCIENTIFIC BASIS:
Restraints in the BMRB are the ground truth for structural modeling.
Comparing synthetic models against these ensures biological realism.
"""
# We try both modern _Gen_dist_constraint and legacy names
categories = ["_Gen_dist_constraint", "Gen_dist_constraint"]
for cat in categories:
url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}/loop/{cat}"
try:
response = requests.get(url)
if response.status_code == 200:
data = response.json()
# Parse the BMRB API v2 response format
# Expected format: { "cat_name": { "tags": [...], "data": [[...]] } }
loop_key = list(data.keys())[0]
tags = data[loop_key]["tags"]
rows = data[loop_key]["data"]
# Map tags to indices
tag_to_idx = {tag.lower(): i for i, tag in enumerate(tags)}
# Required tags (handle variations in naming).
# Use explicit `is not None` so that index 0 is never treated as
# falsy and we don't silently fall through to the wrong key.
_id_raw = tag_to_idx.get("id")
idx_id = _id_raw if _id_raw is not None else 0
_r1 = tag_to_idx.get("auth_seq_id_1")
idx_res1 = _r1 if _r1 is not None else tag_to_idx.get("res_id_1")
_a1 = tag_to_idx.get("atom_id_1")
idx_atom1 = _a1 if _a1 is not None else tag_to_idx.get("atom_name_1")
_r2 = tag_to_idx.get("auth_seq_id_2")
idx_res2 = _r2 if _r2 is not None else tag_to_idx.get("res_id_2")
_a2 = tag_to_idx.get("atom_id_2")
idx_atom2 = _a2 if _a2 is not None else tag_to_idx.get("atom_name_2")
idx_upper = tag_to_idx.get("distance_upper_bound_val")
restraints = []
for row in rows:
if all(
idx is not None for idx in [idx_res1, idx_atom1, idx_res2, idx_atom2]
):
restraints.append(
{
"id": row[idx_id] if idx_id < len(row) else None,
"index_1": int(row[idx_res1]),
"atom_name_1": row[idx_atom1],
"index_2": int(row[idx_res2]),
"atom_name_2": row[idx_atom2],
"upper_limit": (
float(row[idx_upper])
if idx_upper and row[idx_upper]
else 5.0
),
}
)
if restraints:
return restraints
except Exception as e:
logger.debug(f"Failed to fetch {cat} for {bmrb_id}: {e}")
continue
return []
@staticmethod
def fetch_chemical_shifts(bmrb_id: str) -> dict[int, dict[str, float]]:
"""Fetch chemical shifts from BMRB using pynmrstar.
Returns:
Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.
"""
if pynmrstar is None:
logger.error("pynmrstar not installed. Cannot fetch shifts.")
return {}
try:
entry = pynmrstar.Entry.from_database(bmrb_id)
loops = entry.get_loops_by_category("_Atom_chem_shift")
if not loops:
return {}
loop = loops[0]
tag_to_idx = {tag.lower(): i for i, tag in enumerate(loop.tags)}
idx_res = tag_to_idx.get("comp_index_id")
if idx_res is None:
idx_res = tag_to_idx.get("seq_id")
idx_atom = tag_to_idx.get("atom_id")
idx_val = tag_to_idx.get("val")
shifts: dict[int, dict[str, float]] = {}
for row in loop.data:
try:
if idx_res is None or idx_atom is None or idx_val is None:
continue
res_id = int(row[idx_res])
atom_name = row[idx_atom]
val = float(row[idx_val])
if atom_name == "H":
atom_name = "HN"
if res_id not in shifts:
shifts[res_id] = {}
shifts[res_id][atom_name] = val
except (ValueError, TypeError):
continue
return shifts
except Exception as e:
logger.error(f"Failed to fetch shifts for BMRB {bmrb_id}: {e}")
return {}
@staticmethod
def download_pdb(pdb_id: str, output_path: str) -> bool:
"""Download a PDB file from RCSB.
Args:
pdb_id: 4-character PDB ID.
output_path: Destination file path.
"""
url = f"https://files.rcsb.org/download/{pdb_id.upper()}.pdb"
try:
response = requests.get(url)
if response.status_code == 200:
with open(output_path, "w") as f:
f.write(response.text)
return True
except Exception as e:
logger.error(f"Failed to download PDB {pdb_id}: {e}")
return False
|
Functions
get_entry_metadata(bmrb_id)
staticmethod
Fetch metadata for a BMRB entry.
Parameters:
| Name |
Type |
Description |
Default |
bmrb_id
|
str
|
BMRB ID (e.g., '6457' for Ubiquitin).
|
required
|
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def get_entry_metadata(bmrb_id: str) -> dict[str, Any]:
"""Fetch metadata for a BMRB entry.
Args:
bmrb_id: BMRB ID (e.g., '6457' for Ubiquitin).
"""
url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}"
response = requests.get(url)
response.raise_for_status()
data = response.json()
return cast(dict[str, Any], data.get(bmrb_id, {}))
|
search_entries_with_restraints(search_term='ubiquitin')
staticmethod
Search for BMRB entries that likely have restraint data.
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def search_entries_with_restraints(search_term: str = "ubiquitin") -> list[str]:
"""Search for BMRB entries that likely have restraint data."""
url = f"{BMRBAPI.BASE_URL}/search/entry?q={search_term}&field=nmr_star_loop_category&value=_Gen_dist_constraint"
try:
response = requests.get(url)
if response.status_code == 200:
return [item["entry_id"] for item in response.json().get("results", [])]
except Exception:
pass
return []
|
fetch_restraints(bmrb_id)
staticmethod
Fetch distance restraints from BMRB.
SCIENTIFIC BASIS:
Restraints in the BMRB are the ground truth for structural modeling.
Comparing synthetic models against these ensures biological realism.
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def fetch_restraints(bmrb_id: str) -> list[dict[str, Any]]:
"""Fetch distance restraints from BMRB.
SCIENTIFIC BASIS:
Restraints in the BMRB are the ground truth for structural modeling.
Comparing synthetic models against these ensures biological realism.
"""
# We try both modern _Gen_dist_constraint and legacy names
categories = ["_Gen_dist_constraint", "Gen_dist_constraint"]
for cat in categories:
url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}/loop/{cat}"
try:
response = requests.get(url)
if response.status_code == 200:
data = response.json()
# Parse the BMRB API v2 response format
# Expected format: { "cat_name": { "tags": [...], "data": [[...]] } }
loop_key = list(data.keys())[0]
tags = data[loop_key]["tags"]
rows = data[loop_key]["data"]
# Map tags to indices
tag_to_idx = {tag.lower(): i for i, tag in enumerate(tags)}
# Required tags (handle variations in naming).
# Use explicit `is not None` so that index 0 is never treated as
# falsy and we don't silently fall through to the wrong key.
_id_raw = tag_to_idx.get("id")
idx_id = _id_raw if _id_raw is not None else 0
_r1 = tag_to_idx.get("auth_seq_id_1")
idx_res1 = _r1 if _r1 is not None else tag_to_idx.get("res_id_1")
_a1 = tag_to_idx.get("atom_id_1")
idx_atom1 = _a1 if _a1 is not None else tag_to_idx.get("atom_name_1")
_r2 = tag_to_idx.get("auth_seq_id_2")
idx_res2 = _r2 if _r2 is not None else tag_to_idx.get("res_id_2")
_a2 = tag_to_idx.get("atom_id_2")
idx_atom2 = _a2 if _a2 is not None else tag_to_idx.get("atom_name_2")
idx_upper = tag_to_idx.get("distance_upper_bound_val")
restraints = []
for row in rows:
if all(
idx is not None for idx in [idx_res1, idx_atom1, idx_res2, idx_atom2]
):
restraints.append(
{
"id": row[idx_id] if idx_id < len(row) else None,
"index_1": int(row[idx_res1]),
"atom_name_1": row[idx_atom1],
"index_2": int(row[idx_res2]),
"atom_name_2": row[idx_atom2],
"upper_limit": (
float(row[idx_upper])
if idx_upper and row[idx_upper]
else 5.0
),
}
)
if restraints:
return restraints
except Exception as e:
logger.debug(f"Failed to fetch {cat} for {bmrb_id}: {e}")
continue
return []
|
fetch_chemical_shifts(bmrb_id)
staticmethod
Fetch chemical shifts from BMRB using pynmrstar.
Returns:
| Type |
Description |
dict[int, dict[str, float]]
|
Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.
|
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def fetch_chemical_shifts(bmrb_id: str) -> dict[int, dict[str, float]]:
"""Fetch chemical shifts from BMRB using pynmrstar.
Returns:
Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.
"""
if pynmrstar is None:
logger.error("pynmrstar not installed. Cannot fetch shifts.")
return {}
try:
entry = pynmrstar.Entry.from_database(bmrb_id)
loops = entry.get_loops_by_category("_Atom_chem_shift")
if not loops:
return {}
loop = loops[0]
tag_to_idx = {tag.lower(): i for i, tag in enumerate(loop.tags)}
idx_res = tag_to_idx.get("comp_index_id")
if idx_res is None:
idx_res = tag_to_idx.get("seq_id")
idx_atom = tag_to_idx.get("atom_id")
idx_val = tag_to_idx.get("val")
shifts: dict[int, dict[str, float]] = {}
for row in loop.data:
try:
if idx_res is None or idx_atom is None or idx_val is None:
continue
res_id = int(row[idx_res])
atom_name = row[idx_atom]
val = float(row[idx_val])
if atom_name == "H":
atom_name = "HN"
if res_id not in shifts:
shifts[res_id] = {}
shifts[res_id][atom_name] = val
except (ValueError, TypeError):
continue
return shifts
except Exception as e:
logger.error(f"Failed to fetch shifts for BMRB {bmrb_id}: {e}")
return {}
|
download_pdb(pdb_id, output_path)
staticmethod
Download a PDB file from RCSB.
Parameters:
| Name |
Type |
Description |
Default |
pdb_id
|
str
|
|
required
|
output_path
|
str
|
|
required
|
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def download_pdb(pdb_id: str, output_path: str) -> bool:
"""Download a PDB file from RCSB.
Args:
pdb_id: 4-character PDB ID.
output_path: Destination file path.
"""
url = f"https://files.rcsb.org/download/{pdb_id.upper()}.pdb"
try:
response = requests.get(url)
if response.status_code == 200:
with open(output_path, "w") as f:
f.write(response.text)
return True
except Exception as e:
logger.error(f"Failed to download PDB {pdb_id}: {e}")
return False
|
PDBValidationAPI
Interface to PDBe Validation API for geometric assessment.
Provides peer-reviewed geometric quality metrics compared to the entire PDB.
Source code in synth_pdb/bmrb_api.py
| class PDBValidationAPI:
"""Interface to PDBe Validation API for geometric assessment.
Provides peer-reviewed geometric quality metrics compared to the entire PDB.
"""
# PDBe has unified their API to v2
BASE_URL = "https://www.ebi.ac.uk/pdbe/api/v2/validation"
@staticmethod
def get_validation_summary(pdb_id: str) -> list[dict[str, Any]]:
"""Fetch validation summary for an existing PDB entry.
Returns:
A list containing a single dict of shimmed percentile keys, matching
the legacy synth-pdb response contract. Returns an empty list on
error so callers can always iterate safely.
COMPATIBILITY NOTE:
The PDBe v2 API has changed the response structure. This method
provides a shim to return keys expected by legacy synth-pdb code.
"""
pid = pdb_id.lower()
url = f"{PDBValidationAPI.BASE_URL}/global-percentiles/entry/{pid}"
try:
response = requests.get(url)
response.raise_for_status()
data = response.json().get(pid, {})
# Shim for legacy keys used in tutorials.
shim: dict[str, Any] = {
"absolute_percentile_clashscore": data.get("clashscore", {}).get("absolute"),
"absolute_percentile_ramachandran": data.get("percent-rama-outliers", {}).get(
"absolute"
),
"absolute_percentile_sidechain_outliers": data.get("percent-rota-outliers", {}).get(
"absolute"
),
}
# Return as a list containing a dict to match the legacy API contract
# (the old PDBe v1 endpoint returned a list of analysis entries).
return [shim]
except Exception as e:
logger.error(f"Failed to fetch PDBe summary for {pdb_id}: {e}")
return []
@staticmethod
def get_validation_outliers(pdb_id: str) -> dict[str, Any]:
"""Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry."""
pid = pdb_id.lower()
url = f"{PDBValidationAPI.BASE_URL}/residuewise_outlier_summary/entry/{pid}"
try:
response = requests.get(url)
response.raise_for_status()
return cast(dict[str, Any], response.json().get(pid, {}))
except Exception as e:
logger.error(f"Failed to fetch PDBe outliers for {pdb_id}: {e}")
return {}
|
Functions
get_validation_summary(pdb_id)
staticmethod
Fetch validation summary for an existing PDB entry.
Returns:
| Type |
Description |
list[dict[str, Any]]
|
A list containing a single dict of shimmed percentile keys, matching
|
list[dict[str, Any]]
|
the legacy synth-pdb response contract. Returns an empty list on
|
list[dict[str, Any]]
|
error so callers can always iterate safely.
|
COMPATIBILITY NOTE
The PDBe v2 API has changed the response structure. This method
provides a shim to return keys expected by legacy synth-pdb code.
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def get_validation_summary(pdb_id: str) -> list[dict[str, Any]]:
"""Fetch validation summary for an existing PDB entry.
Returns:
A list containing a single dict of shimmed percentile keys, matching
the legacy synth-pdb response contract. Returns an empty list on
error so callers can always iterate safely.
COMPATIBILITY NOTE:
The PDBe v2 API has changed the response structure. This method
provides a shim to return keys expected by legacy synth-pdb code.
"""
pid = pdb_id.lower()
url = f"{PDBValidationAPI.BASE_URL}/global-percentiles/entry/{pid}"
try:
response = requests.get(url)
response.raise_for_status()
data = response.json().get(pid, {})
# Shim for legacy keys used in tutorials.
shim: dict[str, Any] = {
"absolute_percentile_clashscore": data.get("clashscore", {}).get("absolute"),
"absolute_percentile_ramachandran": data.get("percent-rama-outliers", {}).get(
"absolute"
),
"absolute_percentile_sidechain_outliers": data.get("percent-rota-outliers", {}).get(
"absolute"
),
}
# Return as a list containing a dict to match the legacy API contract
# (the old PDBe v1 endpoint returned a list of analysis entries).
return [shim]
except Exception as e:
logger.error(f"Failed to fetch PDBe summary for {pdb_id}: {e}")
return []
|
get_validation_outliers(pdb_id)
staticmethod
Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry.
Source code in synth_pdb/bmrb_api.py
| @staticmethod
def get_validation_outliers(pdb_id: str) -> dict[str, Any]:
"""Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry."""
pid = pdb_id.lower()
url = f"{PDBValidationAPI.BASE_URL}/residuewise_outlier_summary/entry/{pid}"
try:
response = requests.get(url)
response.raise_for_status()
return cast(dict[str, Any], response.json().get(pid, {}))
except Exception as e:
logger.error(f"Failed to fetch PDBe outliers for {pdb_id}: {e}")
return {}
|
Scientific Basis
BMRB Restraints
In NMR structure calculation, "restraints" are the experimental observations (usually from NOESY experiments) that specify the upper bounds of distances between specific pairs of atoms.
By fetching these for a known protein (like Ubiquitin, BMRB 6457), you can test if synth-pdb's generator or energy minimizer produces structures that satisfy the same constraints as the experimental ensemble.
Validation Percentiles
The PDBValidationAPI provides "percentile scores." A score of 95 means the structure is better than 95% of all structures in the PDB for a given metric (e.g., Ramachandran outliers).
Usage Example
from synth_pdb.bmrb_api import BMRBAPI, PDBValidationAPI
# 1. Fetch metadata for Human Ubiquitin (BMRB 6457)
metadata = BMRBAPI.get_entry_metadata("6457")
print(f"Title: {metadata.get('title')}")
# 2. Fetch experimental distance restraints
restraints = BMRBAPI.fetch_restraints("6457")
print(f"Found {len(restraints)} distance restraints.")
# 3. Fetch experimental chemical shifts
shifts = BMRBAPI.fetch_chemical_shifts("6457")
if 1 in shifts:
print(f"Residue 1 NH shift: {shifts[1].get('HN')} ppm")
# 4. Fetch PDBe validation summary for a related PDB (e.g., 1D3Z)
summary = PDBValidationAPI.get_validation_summary("1D3Z")
percentile = summary[0].get("absolute_percentile_clashscore")
print(f"1D3Z Clashscore Percentile: {percentile}")