Skip to content

Commit

Permalink
process pdb before loading it (#17)
Browse files Browse the repository at this point in the history
* add `process_pdb`

* update test data with haddock-related headers

* update dependencies
  • Loading branch information
rvhonorato authored Aug 22, 2024
1 parent 00e70fd commit 988002a
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 30 deletions.
79 changes: 54 additions & 25 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ kd-tree = "0.6.0"
clap = { version = "4.5", features = ["derive"] }
nalgebra = "0.33.0"
itertools = "0.13.0"
pdb-handler = "0.1.0"
pdb-handler = "0.1"
tempfile = "3.12"
68 changes: 64 additions & 4 deletions src/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::SeedableRng;
use std::fs::File;
use std::io::Write;
use std::io::BufReader;
use std::io::Cursor;
use std::io::{Read, Write};

#[derive(Clone)]
pub struct Bead {
Expand Down Expand Up @@ -564,11 +566,10 @@ pub fn get_atoms_from_resnumbers(pdb: &PDB, selection: &[isize]) -> Vec<Atom> {
/// - Any warnings generated during parsing are discarded. If you need to handle warnings,
/// consider modifying the function to return them along with the PDB structure.
pub fn load_pdb(input_pdb: &str) -> Result<PDB, Vec<PDBError>> {
// Pad lines before reading the PDB file
let padded_reader = pdb_handler::pad_lines(input_pdb);
let processed_buf = process_pdb(input_pdb);

let pdb = pdbtbx::open_pdb_raw(
padded_reader,
processed_buf,
pdbtbx::Context::None,
pdbtbx::StrictnessLevel::Loose,
);
Expand All @@ -579,10 +580,53 @@ pub fn load_pdb(input_pdb: &str) -> Result<PDB, Vec<PDBError>> {
}
}

/// Processes a PDB file by removing remarks and padding specific lines.
///
/// This function performs two operations on the input PDB file:
/// 1. Removes all lines that start with "REMARK".
/// 2. Pads lines starting with "ATOM" to a length of 80 characters.
///
/// # Arguments
///
/// * `input_pdb` - A string slice that holds the path to the input PDB file.
///
/// # Returns
///
/// A `BufReader<Cursor<Vec<u8>>>` containing the processed PDB content.
///
/// # Errors
///
/// This function will panic if:
/// - The input file cannot be read.
/// - A temporary file cannot be created or written to.
/// - The `remove_remark` or `pad_lines` functions fail.
///
pub fn process_pdb(input_pdb: &str) -> BufReader<Cursor<Vec<u8>>> {
// Remove remarks from the PDB file
let no_remarks = pdb_handler::remove_remark(input_pdb);

// Convert the bufreader to a string
let mut content = String::new();
no_remarks
.into_inner()
.read_to_string(&mut content)
.unwrap();

let mut temp_file = tempfile::NamedTempFile::new().unwrap();

// Write the content to the temporary file
temp_file.write_all(content.as_bytes()).unwrap();
let temp_path = temp_file.path().to_str().unwrap();

// Pad lines before reading the PDB file
pdb_handler::pad_lines(temp_path)
}

#[cfg(test)]
mod tests {

use std::env;
use std::io::BufRead;

use super::*;

Expand Down Expand Up @@ -625,4 +669,20 @@ mod tests {
let pdb = load_pdb(pdb_path.to_str().unwrap()).unwrap();
assert_eq!(pdb.atoms().count(), 8);
}

#[test]
fn test_process_pdb() {
let processed_buf = process_pdb("tests/data/remark_short_lines.pdb");

let lines: Vec<String> = processed_buf.lines().map(|l| l.unwrap()).collect();

// Check that there are no REMARK lines
assert!(lines.iter().all(|line| !line.starts_with("REMARK")));

// Check that all ATOM lines are 80 characters long
assert!(lines
.iter()
.filter(|line| line.starts_with("ATOM"))
.all(|line| line.len() == 80));
}
}
45 changes: 45 additions & 0 deletions tests/data/complex.pdb
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
REMARK FILENAME="emscoring_1.pdb"
REMARK ===============================================================
REMARK HADDOCK stats for emscoring_1.pdb
REMARK ===============================================================
REMARK HADDOCK score: -54.1511
REMARK ===============================================================
REMARK initial structure 1 - ../0_topoaa/complexyrrk72_y_haddock.pdb
REMARK ===============================================================
REMARK total,bonds,angles,improper,dihe,vdw,elec,air,cdih,coup,rdcs,vean,dani,xpcs,rg
REMARK energies: -106.18, 0, 0, 0, 0, -30.4344, -75.7457, 0, 0, 0, 0, 0, 0, 0, 0
REMARK ===============================================================
REMARK bonds,angles,impropers,dihe,air,cdih,coup,rdcs,vean,dani,xpcs
REMARK rms-dev.: 0,0,0,0,0,0,0, 0, 0, 0, 0
REMARK ===============================================================
REMARK air,cdih,coup,rdcs,vean,dani,xpcs
REMARK >0.3,>5,>1,>0,>5,>0.2,>0.2
REMARK violations.: 0, 0, 0, 0, 0, 0, 0
REMARK ===============================================================
REMARK CVpartition#,violations,rms
REMARK AIRs cross-validation: 0, 0, 0
REMARK ===============================================================
REMARK NCS energy: 0
REMARK ===============================================================
REMARK Symmetry energy: 0
REMARK ===============================================================
REMARK Membrane restraining energy: 0
REMARK ===============================================================
REMARK Local cross-correlation: 0.0000
REMARK ===============================================================
REMARK Desolvation energy: -8.56748
REMARK Internal energy free molecules: -4515.85
REMARK Internal energy complex: -4364.86
REMARK Binding energy: 36.2429
REMARK ===============================================================
REMARK buried surface area: 802.679
REMARK ===============================================================
REMARK Total HADDOCK score without restraints: -54.1511
REMARK ===============================================================
REMARK water - chain-A: 0 0 0
REMARK water - chain-B: 0 0 0
REMARK ===============================================================
REMARK water - water: 0 0 0
REMARK ===============================================================
REMARK DATE:22-Aug-2024 15:23:46 created by user: unknown
REMARK VERSION:1.3U
ATOM 1 N LEU A 929 26.117 -5.693 15.769 1.00 29.81 N
ATOM 2 CA LEU A 929 27.153 -4.857 15.061 1.00 29.93 C
ATOM 3 C LEU A 929 26.486 -3.835 14.143 1.00 29.74 C
Expand Down
53 changes: 53 additions & 0 deletions tests/data/remark_short_lines.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
REMARK FILENAME="emscoring_1.pdb"
REMARK ===============================================================
REMARK HADDOCK stats for emscoring_1.pdb
REMARK ===============================================================
REMARK HADDOCK score: -54.1511
REMARK ===============================================================
REMARK initial structure 1 - ../0_topoaa/complexyrrk72_y_haddock.pdb
REMARK ===============================================================
REMARK total,bonds,angles,improper,dihe,vdw,elec,air,cdih,coup,rdcs,vean,dani,xpcs,rg
REMARK energies: -106.18, 0, 0, 0, 0, -30.4344, -75.7457, 0, 0, 0, 0, 0, 0, 0, 0
REMARK ===============================================================
REMARK bonds,angles,impropers,dihe,air,cdih,coup,rdcs,vean,dani,xpcs
REMARK rms-dev.: 0,0,0,0,0,0,0, 0, 0, 0, 0
REMARK ===============================================================
REMARK air,cdih,coup,rdcs,vean,dani,xpcs
REMARK >0.3,>5,>1,>0,>5,>0.2,>0.2
REMARK violations.: 0, 0, 0, 0, 0, 0, 0
REMARK ===============================================================
REMARK CVpartition#,violations,rms
REMARK AIRs cross-validation: 0, 0, 0
REMARK ===============================================================
REMARK NCS energy: 0
REMARK ===============================================================
REMARK Symmetry energy: 0
REMARK ===============================================================
REMARK Membrane restraining energy: 0
REMARK ===============================================================
REMARK Local cross-correlation: 0.0000
REMARK ===============================================================
REMARK Desolvation energy: -8.56748
REMARK Internal energy free molecules: -4515.85
REMARK Internal energy complex: -4364.86
REMARK Binding energy: 36.2429
REMARK ===============================================================
REMARK buried surface area: 802.679
REMARK ===============================================================
REMARK Total HADDOCK score without restraints: -54.1511
REMARK ===============================================================
REMARK water - chain-A: 0 0 0
REMARK water - chain-B: 0 0 0
REMARK ===============================================================
REMARK water - water: 0 0 0
REMARK ===============================================================
REMARK DATE:22-Aug-2024 15:23:46 created by user: unknown
REMARK VERSION:1.3U
ATOM 1 N LEU A 929 26.117 -5.693 15.769 1.00 29.81 A
ATOM 2 CA LEU A 929 27.153 -4.857 15.061 1.00 29.93 A
ATOM 3 C LEU A 929 26.486 -3.835 14.143 1.00 29.74 A
ATOM 4 O LEU A 929 26.790 -2.625 14.181 1.00 29.50 A
ATOM 5 CB LEU A 929 28.096 -5.742 14.248 1.00 30.37 A
ATOM 6 CG LEU A 929 29.617 -5.471 14.154 1.00 28.52 A
ATOM 7 CD1 LEU A 929 30.109 -5.719 12.711 1.00 27.72 A
ATOM 8 CD2 LEU A 929 30.111 -4.134 14.702 1.00 25.22 A

0 comments on commit 988002a

Please sign in to comment.