Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of CMSParser #4816

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions package/MDAnalysis/topology/CMSParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
import re

from ..lib.util import openany
from ..core.topologyattrs import (
Atomnames,
Atomids,
Resids,
Resnames,
Resnums,
Segids,
Resindices,
)
from ..core.topology import Topology
from .base import TopologyReaderBase, change_squash


class CMSParser(TopologyReaderBase):
"""Parser for CMS file format."""
format = 'CMS'

def __init__(self, filename):
super().__init__(filename=filename)

Check warning on line 23 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L23

Added line #L23 was not covered by tests

def parse(self, **kwargs):
"""Parse the CMS file and debug data extraction."""

with open(self.filename, 'rt') as inf:

Check warning on line 28 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L28

Added line #L28 was not covered by tests
# Read all lines
lines = inf.readlines()

Check warning on line 30 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L30

Added line #L30 was not covered by tests

# Extract the number of atoms (n_atoms) from the m_atom[] line
n_atoms = 0

Check warning on line 33 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L33

Added line #L33 was not covered by tests
for line in lines:
line = line.strip()

Check warning on line 35 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L35

Added line #L35 was not covered by tests
if line.startswith("m_atom["):
# Extract the number of atoms from m_atom[] (e.g., m_atom[100])
parts = line.split('[')
n_atoms = int(parts[1].split(']')[0])
break

Check warning on line 40 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L38-L40

Added lines #L38 - L40 were not covered by tests

# If the number of atoms wasn't found, raise an error
if n_atoms == 0:
raise ValueError("Number of atoms (n_atoms) could not be found in the file.")

Check warning on line 44 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L44

Added line #L44 was not covered by tests

# Create arrays
resids = np.zeros(n_atoms, dtype=np.int32)
resnames = np.zeros(n_atoms, dtype=object)
segids = np.zeros(n_atoms, dtype=object)
atomnames = np.zeros(n_atoms, dtype=object)
atom_ids = np.zeros(n_atoms, dtype=np.int32)

Check warning on line 51 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L47-L51

Added lines #L47 - L51 were not covered by tests

#Parse the atom data after the third occurrence of ":::"
colon_count = 0 # Counter for ":::" markers
atom_block = False

Check warning on line 55 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L54-L55

Added lines #L54 - L55 were not covered by tests

atom_idx = 0

Check warning on line 57 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L57

Added line #L57 was not covered by tests

# Regex pattern to split while keeping quoted strings together
split_pattern = r'".*?"|\S+'

Check warning on line 60 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L60

Added line #L60 was not covered by tests

for line in lines:
line = line.strip()

Check warning on line 63 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L63

Added line #L63 was not covered by tests

# Count occurrences of ":::" and start atom block after the third occurrence
if ":::" in line:
colon_count += 1

Check warning on line 67 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L67

Added line #L67 was not covered by tests
if colon_count < 3:
continue

Check warning on line 69 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L69

Added line #L69 was not covered by tests
else:
atom_block = True
continue # Skip the ":::" marker line

Check warning on line 72 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L71-L72

Added lines #L71 - L72 were not covered by tests

if atom_block:
# Stop processing when encountering a line starting with '}'
if line.startswith("}") or line.startswith('{'):
break

Check warning on line 77 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L77

Added line #L77 was not covered by tests

details = re.findall(split_pattern, line)
details = [item.strip('"') for item in details]

Check warning on line 80 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L79-L80

Added lines #L79 - L80 were not covered by tests

if len(details) >= 7:
try:

Check warning on line 83 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L83

Added line #L83 was not covered by tests
# Extract atom data
atom_id = int(details[0]) # Atom ID
resid = int(details[5]) # Residue ID
segid = details[7].strip() # Segment ID

Check warning on line 87 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L85-L87

Added lines #L85 - L87 were not covered by tests

resname = details[11].strip() # Residue name
atomname = details[12].strip() # Atom name

Check warning on line 90 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L89-L90

Added lines #L89 - L90 were not covered by tests

if atomname == '':
atomname = details[14].strip()

Check warning on line 93 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L93

Added line #L93 was not covered by tests

# Fill the allocated arrays with parsed data
resids[atom_idx] = resid
resnames[atom_idx] = resname
segids[atom_idx] = segid
atomnames[atom_idx] = atomname
atom_ids[atom_idx] = atom_id

Check warning on line 100 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L96-L100

Added lines #L96 - L100 were not covered by tests

atom_idx += 1

Check warning on line 102 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L102

Added line #L102 was not covered by tests

except (ValueError, IndexError):

Check warning on line 104 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L104

Added line #L104 was not covered by tests
# Skip malformed lines
print(f"Skipping invalid line: {line}")

Check warning on line 106 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L106

Added line #L106 was not covered by tests

print(segids)

Check warning on line 108 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L108

Added line #L108 was not covered by tests

attrs = [

Check warning on line 110 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L110

Added line #L110 was not covered by tests
Atomnames(atomnames),
Atomids(atom_ids),
Resids(resids),
Resnames(resnames),
Segids(segids),
]

topology = Topology(n_atoms=n_atoms, n_res=len(resids), n_seg=len(segids), attrs=attrs)

Check warning on line 118 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L118

Added line #L118 was not covered by tests

return topology

Check warning on line 120 in package/MDAnalysis/topology/CMSParser.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/topology/CMSParser.py#L120

Added line #L120 was not covered by tests
3 changes: 2 additions & 1 deletion package/MDAnalysis/topology/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@
__all__ = ['core', 'PSFParser', 'PDBParser', 'PQRParser', 'GROParser',
'CRDParser', 'TOPParser', 'PDBQTParser', 'TPRParser',
'LAMMPSParser', 'XYZParser', 'GMSParser', 'DLPolyParser',
'HoomdXMLParser','GSDParser', 'ITPParser']
'HoomdXMLParser','GSDParser', 'ITPParser', 'CMSParser']

from . import core
from . import PSFParser
Expand All @@ -332,3 +332,4 @@
from . import MinimalParser
from . import ITPParser
from . import FHIAIMSParser
from . import CMSParser
Loading