Skip to content

bmrb_api Module

The bmrb_api module provides programmatic access to experimental NMR data from the Biological Magnetic Resonance Data Bank (BMRB) and geometric validation metrics from the Protein Data Bank (PDB).

Overview

A critical part of validating synthetic protein models is comparing them to real-world experimental data. This module allows users to fetch ground-truth restraints, chemical shifts, and geometric quality reports to ensure their synthetic structures are biologically realistic.

Key Features

  • Experimental Restraints: Fetch distance constraints (NOEs) that define the 3D fold of a protein.
  • Chemical Shifts: Download peer-reviewed chemical shift assignments for structural validation.
  • PDBe Validation: Retrieve summary reports and outlier lists (Ramachandran, bond lengths, etc.) for established PDB entries.
  • Automated Downloads: Easily download PDB files from RCSB for benchmarking.

API Reference

bmrb_api

Classes

BMRBAPI

Interface to Biological Magnetic Resonance Data Bank (BMRB) API.

This provides empirical validation data for NMR structures by fetching peer-reviewed experimental restraints and chemical shifts.

Source code in synth_pdb/bmrb_api.py
class BMRBAPI:
    """Interface to Biological Magnetic Resonance Data Bank (BMRB) API.

    This provides empirical validation data for NMR structures by fetching
    peer-reviewed experimental restraints and chemical shifts.
    """

    BASE_URL = "https://api.bmrb.io/v2"

    @staticmethod
    def get_entry_metadata(bmrb_id: str) -> dict[str, Any]:
        """Fetch metadata for a BMRB entry.

        Args:
            bmrb_id: BMRB ID (e.g., '6457' for Ubiquitin).
        """
        url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}"
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        return cast(dict[str, Any], data.get(bmrb_id, {}))

    @staticmethod
    def search_entries_with_restraints(search_term: str = "ubiquitin") -> list[str]:
        """Search for BMRB entries that likely have restraint data."""
        url = f"{BMRBAPI.BASE_URL}/search/entry?q={search_term}&field=nmr_star_loop_category&value=_Gen_dist_constraint"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                return [item["entry_id"] for item in response.json().get("results", [])]
        except Exception:
            pass
        return []

    @staticmethod
    def fetch_restraints(bmrb_id: str) -> list[dict[str, Any]]:
        """Fetch distance restraints from BMRB.

        SCIENTIFIC BASIS:
        Restraints in the BMRB are the ground truth for structural modeling.
        Comparing synthetic models against these ensures biological realism.
        """
        # We try both modern _Gen_dist_constraint and legacy names
        categories = ["_Gen_dist_constraint", "Gen_dist_constraint"]

        for cat in categories:
            url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}/loop/{cat}"
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    data = response.json()
                    # Parse the BMRB API v2 response format
                    # Expected format: { "cat_name": { "tags": [...], "data": [[...]] } }
                    loop_key = list(data.keys())[0]
                    tags = data[loop_key]["tags"]
                    rows = data[loop_key]["data"]

                    # Map tags to indices
                    tag_to_idx = {tag.lower(): i for i, tag in enumerate(tags)}

                    # Required tags (handle variations in naming).
                    # Use explicit `is not None` so that index 0 is never treated as
                    # falsy and we don't silently fall through to the wrong key.
                    _id_raw = tag_to_idx.get("id")
                    idx_id = _id_raw if _id_raw is not None else 0
                    _r1 = tag_to_idx.get("auth_seq_id_1")
                    idx_res1 = _r1 if _r1 is not None else tag_to_idx.get("res_id_1")
                    _a1 = tag_to_idx.get("atom_id_1")
                    idx_atom1 = _a1 if _a1 is not None else tag_to_idx.get("atom_name_1")
                    _r2 = tag_to_idx.get("auth_seq_id_2")
                    idx_res2 = _r2 if _r2 is not None else tag_to_idx.get("res_id_2")
                    _a2 = tag_to_idx.get("atom_id_2")
                    idx_atom2 = _a2 if _a2 is not None else tag_to_idx.get("atom_name_2")
                    idx_upper = tag_to_idx.get("distance_upper_bound_val")

                    restraints = []
                    for row in rows:
                        if all(
                            idx is not None for idx in [idx_res1, idx_atom1, idx_res2, idx_atom2]
                        ):
                            restraints.append(
                                {
                                    "id": row[idx_id] if idx_id < len(row) else None,
                                    "index_1": int(row[idx_res1]),
                                    "atom_name_1": row[idx_atom1],
                                    "index_2": int(row[idx_res2]),
                                    "atom_name_2": row[idx_atom2],
                                    "upper_limit": (
                                        float(row[idx_upper])
                                        if idx_upper and row[idx_upper]
                                        else 5.0
                                    ),
                                }
                            )
                    if restraints:
                        return restraints
            except Exception as e:
                logger.debug(f"Failed to fetch {cat} for {bmrb_id}: {e}")
                continue

        return []

    @staticmethod
    def fetch_chemical_shifts(bmrb_id: str) -> dict[int, dict[str, float]]:
        """Fetch chemical shifts from BMRB using pynmrstar.

        Returns:
            Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.
        """
        if pynmrstar is None:
            logger.error("pynmrstar not installed. Cannot fetch shifts.")
            return {}
        try:
            entry = pynmrstar.Entry.from_database(bmrb_id)
            loops = entry.get_loops_by_category("_Atom_chem_shift")
            if not loops:
                return {}

            loop = loops[0]
            tag_to_idx = {tag.lower(): i for i, tag in enumerate(loop.tags)}

            idx_res = tag_to_idx.get("comp_index_id")
            if idx_res is None:
                idx_res = tag_to_idx.get("seq_id")

            idx_atom = tag_to_idx.get("atom_id")
            idx_val = tag_to_idx.get("val")

            shifts: dict[int, dict[str, float]] = {}
            for row in loop.data:
                try:
                    if idx_res is None or idx_atom is None or idx_val is None:
                        continue
                    res_id = int(row[idx_res])
                    atom_name = row[idx_atom]
                    val = float(row[idx_val])

                    if atom_name == "H":
                        atom_name = "HN"

                    if res_id not in shifts:
                        shifts[res_id] = {}
                    shifts[res_id][atom_name] = val
                except (ValueError, TypeError):
                    continue
            return shifts
        except Exception as e:
            logger.error(f"Failed to fetch shifts for BMRB {bmrb_id}: {e}")
            return {}

    @staticmethod
    def download_pdb(pdb_id: str, output_path: str) -> bool:
        """Download a PDB file from RCSB.

        Args:
            pdb_id: 4-character PDB ID.
            output_path: Destination file path.
        """
        url = f"https://files.rcsb.org/download/{pdb_id.upper()}.pdb"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                with open(output_path, "w") as f:
                    f.write(response.text)
                return True
        except Exception as e:
            logger.error(f"Failed to download PDB {pdb_id}: {e}")
        return False
Functions
get_entry_metadata(bmrb_id) staticmethod

Fetch metadata for a BMRB entry.

Parameters:

Name Type Description Default
bmrb_id str

BMRB ID (e.g., '6457' for Ubiquitin).

required
Source code in synth_pdb/bmrb_api.py
@staticmethod
def get_entry_metadata(bmrb_id: str) -> dict[str, Any]:
    """Fetch metadata for a BMRB entry.

    Args:
        bmrb_id: BMRB ID (e.g., '6457' for Ubiquitin).
    """
    url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}"
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    return cast(dict[str, Any], data.get(bmrb_id, {}))
search_entries_with_restraints(search_term='ubiquitin') staticmethod

Search for BMRB entries that likely have restraint data.

Source code in synth_pdb/bmrb_api.py
@staticmethod
def search_entries_with_restraints(search_term: str = "ubiquitin") -> list[str]:
    """Search for BMRB entries that likely have restraint data."""
    url = f"{BMRBAPI.BASE_URL}/search/entry?q={search_term}&field=nmr_star_loop_category&value=_Gen_dist_constraint"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return [item["entry_id"] for item in response.json().get("results", [])]
    except Exception:
        pass
    return []
fetch_restraints(bmrb_id) staticmethod

Fetch distance restraints from BMRB.

SCIENTIFIC BASIS: Restraints in the BMRB are the ground truth for structural modeling. Comparing synthetic models against these ensures biological realism.

Source code in synth_pdb/bmrb_api.py
@staticmethod
def fetch_restraints(bmrb_id: str) -> list[dict[str, Any]]:
    """Fetch distance restraints from BMRB.

    SCIENTIFIC BASIS:
    Restraints in the BMRB are the ground truth for structural modeling.
    Comparing synthetic models against these ensures biological realism.
    """
    # We try both modern _Gen_dist_constraint and legacy names
    categories = ["_Gen_dist_constraint", "Gen_dist_constraint"]

    for cat in categories:
        url = f"{BMRBAPI.BASE_URL}/entry/{bmrb_id}/loop/{cat}"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                # Parse the BMRB API v2 response format
                # Expected format: { "cat_name": { "tags": [...], "data": [[...]] } }
                loop_key = list(data.keys())[0]
                tags = data[loop_key]["tags"]
                rows = data[loop_key]["data"]

                # Map tags to indices
                tag_to_idx = {tag.lower(): i for i, tag in enumerate(tags)}

                # Required tags (handle variations in naming).
                # Use explicit `is not None` so that index 0 is never treated as
                # falsy and we don't silently fall through to the wrong key.
                _id_raw = tag_to_idx.get("id")
                idx_id = _id_raw if _id_raw is not None else 0
                _r1 = tag_to_idx.get("auth_seq_id_1")
                idx_res1 = _r1 if _r1 is not None else tag_to_idx.get("res_id_1")
                _a1 = tag_to_idx.get("atom_id_1")
                idx_atom1 = _a1 if _a1 is not None else tag_to_idx.get("atom_name_1")
                _r2 = tag_to_idx.get("auth_seq_id_2")
                idx_res2 = _r2 if _r2 is not None else tag_to_idx.get("res_id_2")
                _a2 = tag_to_idx.get("atom_id_2")
                idx_atom2 = _a2 if _a2 is not None else tag_to_idx.get("atom_name_2")
                idx_upper = tag_to_idx.get("distance_upper_bound_val")

                restraints = []
                for row in rows:
                    if all(
                        idx is not None for idx in [idx_res1, idx_atom1, idx_res2, idx_atom2]
                    ):
                        restraints.append(
                            {
                                "id": row[idx_id] if idx_id < len(row) else None,
                                "index_1": int(row[idx_res1]),
                                "atom_name_1": row[idx_atom1],
                                "index_2": int(row[idx_res2]),
                                "atom_name_2": row[idx_atom2],
                                "upper_limit": (
                                    float(row[idx_upper])
                                    if idx_upper and row[idx_upper]
                                    else 5.0
                                ),
                            }
                        )
                if restraints:
                    return restraints
        except Exception as e:
            logger.debug(f"Failed to fetch {cat} for {bmrb_id}: {e}")
            continue

    return []
fetch_chemical_shifts(bmrb_id) staticmethod

Fetch chemical shifts from BMRB using pynmrstar.

Returns:

Type Description
dict[int, dict[str, float]]

Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.

Source code in synth_pdb/bmrb_api.py
@staticmethod
def fetch_chemical_shifts(bmrb_id: str) -> dict[int, dict[str, float]]:
    """Fetch chemical shifts from BMRB using pynmrstar.

    Returns:
        Dict[int, Dict[str, float]]: Mapping of res_id -> {atom: value}.
    """
    if pynmrstar is None:
        logger.error("pynmrstar not installed. Cannot fetch shifts.")
        return {}
    try:
        entry = pynmrstar.Entry.from_database(bmrb_id)
        loops = entry.get_loops_by_category("_Atom_chem_shift")
        if not loops:
            return {}

        loop = loops[0]
        tag_to_idx = {tag.lower(): i for i, tag in enumerate(loop.tags)}

        idx_res = tag_to_idx.get("comp_index_id")
        if idx_res is None:
            idx_res = tag_to_idx.get("seq_id")

        idx_atom = tag_to_idx.get("atom_id")
        idx_val = tag_to_idx.get("val")

        shifts: dict[int, dict[str, float]] = {}
        for row in loop.data:
            try:
                if idx_res is None or idx_atom is None or idx_val is None:
                    continue
                res_id = int(row[idx_res])
                atom_name = row[idx_atom]
                val = float(row[idx_val])

                if atom_name == "H":
                    atom_name = "HN"

                if res_id not in shifts:
                    shifts[res_id] = {}
                shifts[res_id][atom_name] = val
            except (ValueError, TypeError):
                continue
        return shifts
    except Exception as e:
        logger.error(f"Failed to fetch shifts for BMRB {bmrb_id}: {e}")
        return {}
download_pdb(pdb_id, output_path) staticmethod

Download a PDB file from RCSB.

Parameters:

Name Type Description Default
pdb_id str

4-character PDB ID.

required
output_path str

Destination file path.

required
Source code in synth_pdb/bmrb_api.py
@staticmethod
def download_pdb(pdb_id: str, output_path: str) -> bool:
    """Download a PDB file from RCSB.

    Args:
        pdb_id: 4-character PDB ID.
        output_path: Destination file path.
    """
    url = f"https://files.rcsb.org/download/{pdb_id.upper()}.pdb"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(output_path, "w") as f:
                f.write(response.text)
            return True
    except Exception as e:
        logger.error(f"Failed to download PDB {pdb_id}: {e}")
    return False

PDBValidationAPI

Interface to PDBe Validation API for geometric assessment.

Provides peer-reviewed geometric quality metrics compared to the entire PDB.

Source code in synth_pdb/bmrb_api.py
class PDBValidationAPI:
    """Interface to PDBe Validation API for geometric assessment.

    Provides peer-reviewed geometric quality metrics compared to the entire PDB.
    """

    # PDBe has unified their API to v2
    BASE_URL = "https://www.ebi.ac.uk/pdbe/api/v2/validation"

    @staticmethod
    def get_validation_summary(pdb_id: str) -> list[dict[str, Any]]:
        """Fetch validation summary for an existing PDB entry.

        Returns:
            A list containing a single dict of shimmed percentile keys, matching
            the legacy synth-pdb response contract.  Returns an empty list on
            error so callers can always iterate safely.

        COMPATIBILITY NOTE:
            The PDBe v2 API has changed the response structure. This method
            provides a shim to return keys expected by legacy synth-pdb code.
        """
        pid = pdb_id.lower()
        url = f"{PDBValidationAPI.BASE_URL}/global-percentiles/entry/{pid}"
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json().get(pid, {})

            # Shim for legacy keys used in tutorials.
            shim: dict[str, Any] = {
                "absolute_percentile_clashscore": data.get("clashscore", {}).get("absolute"),
                "absolute_percentile_ramachandran": data.get("percent-rama-outliers", {}).get(
                    "absolute"
                ),
                "absolute_percentile_sidechain_outliers": data.get("percent-rota-outliers", {}).get(
                    "absolute"
                ),
            }
            # Return as a list containing a dict to match the legacy API contract
            # (the old PDBe v1 endpoint returned a list of analysis entries).
            return [shim]
        except Exception as e:
            logger.error(f"Failed to fetch PDBe summary for {pdb_id}: {e}")
            return []

    @staticmethod
    def get_validation_outliers(pdb_id: str) -> dict[str, Any]:
        """Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry."""
        pid = pdb_id.lower()
        url = f"{PDBValidationAPI.BASE_URL}/residuewise_outlier_summary/entry/{pid}"
        try:
            response = requests.get(url)
            response.raise_for_status()
            return cast(dict[str, Any], response.json().get(pid, {}))
        except Exception as e:
            logger.error(f"Failed to fetch PDBe outliers for {pdb_id}: {e}")
            return {}
Functions
get_validation_summary(pdb_id) staticmethod

Fetch validation summary for an existing PDB entry.

Returns:

Type Description
list[dict[str, Any]]

A list containing a single dict of shimmed percentile keys, matching

list[dict[str, Any]]

the legacy synth-pdb response contract. Returns an empty list on

list[dict[str, Any]]

error so callers can always iterate safely.

COMPATIBILITY NOTE

The PDBe v2 API has changed the response structure. This method provides a shim to return keys expected by legacy synth-pdb code.

Source code in synth_pdb/bmrb_api.py
@staticmethod
def get_validation_summary(pdb_id: str) -> list[dict[str, Any]]:
    """Fetch validation summary for an existing PDB entry.

    Returns:
        A list containing a single dict of shimmed percentile keys, matching
        the legacy synth-pdb response contract.  Returns an empty list on
        error so callers can always iterate safely.

    COMPATIBILITY NOTE:
        The PDBe v2 API has changed the response structure. This method
        provides a shim to return keys expected by legacy synth-pdb code.
    """
    pid = pdb_id.lower()
    url = f"{PDBValidationAPI.BASE_URL}/global-percentiles/entry/{pid}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json().get(pid, {})

        # Shim for legacy keys used in tutorials.
        shim: dict[str, Any] = {
            "absolute_percentile_clashscore": data.get("clashscore", {}).get("absolute"),
            "absolute_percentile_ramachandran": data.get("percent-rama-outliers", {}).get(
                "absolute"
            ),
            "absolute_percentile_sidechain_outliers": data.get("percent-rota-outliers", {}).get(
                "absolute"
            ),
        }
        # Return as a list containing a dict to match the legacy API contract
        # (the old PDBe v1 endpoint returned a list of analysis entries).
        return [shim]
    except Exception as e:
        logger.error(f"Failed to fetch PDBe summary for {pdb_id}: {e}")
        return []
get_validation_outliers(pdb_id) staticmethod

Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry.

Source code in synth_pdb/bmrb_api.py
@staticmethod
def get_validation_outliers(pdb_id: str) -> dict[str, Any]:
    """Fetch detailed geometric outliers (Ramachandran, etc.) for a PDB entry."""
    pid = pdb_id.lower()
    url = f"{PDBValidationAPI.BASE_URL}/residuewise_outlier_summary/entry/{pid}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        return cast(dict[str, Any], response.json().get(pid, {}))
    except Exception as e:
        logger.error(f"Failed to fetch PDBe outliers for {pdb_id}: {e}")
        return {}

Scientific Basis

BMRB Restraints

In NMR structure calculation, "restraints" are the experimental observations (usually from NOESY experiments) that specify the upper bounds of distances between specific pairs of atoms. By fetching these for a known protein (like Ubiquitin, BMRB 6457), you can test if synth-pdb's generator or energy minimizer produces structures that satisfy the same constraints as the experimental ensemble.

Validation Percentiles

The PDBValidationAPI provides "percentile scores." A score of 95 means the structure is better than 95% of all structures in the PDB for a given metric (e.g., Ramachandran outliers).

Usage Example

from synth_pdb.bmrb_api import BMRBAPI, PDBValidationAPI

# 1. Fetch metadata for Human Ubiquitin (BMRB 6457)
metadata = BMRBAPI.get_entry_metadata("6457")
print(f"Title: {metadata.get('title')}")

# 2. Fetch experimental distance restraints
restraints = BMRBAPI.fetch_restraints("6457")
print(f"Found {len(restraints)} distance restraints.")

# 3. Fetch experimental chemical shifts
shifts = BMRBAPI.fetch_chemical_shifts("6457")
if 1 in shifts:
    print(f"Residue 1 NH shift: {shifts[1].get('HN')} ppm")

# 4. Fetch PDBe validation summary for a related PDB (e.g., 1D3Z)
summary = PDBValidationAPI.get_validation_summary("1D3Z")
percentile = summary[0].get("absolute_percentile_clashscore")
print(f"1D3Z Clashscore Percentile: {percentile}")