diff --git a/eppic-cli/pom.xml b/eppic-cli/pom.xml index 4dc59d64c..c33920c18 100644 --- a/eppic-cli/pom.xml +++ b/eppic-cli/pom.xml @@ -8,7 +8,7 @@ eppic-cli - 4.2.10 + 5.0.0 @@ -261,16 +261,7 @@ maven-surefire-plugin - 2.19.1 - - - - - **/TestLargeStructures.java - - - + 2.21.0 diff --git a/eppic-cli/src/main/java/eppic/ChainEvolContext.java b/eppic-cli/src/main/java/eppic/ChainEvolContext.java index 19e859d5b..d89e1df06 100644 --- a/eppic-cli/src/main/java/eppic/ChainEvolContext.java +++ b/eppic-cli/src/main/java/eppic/ChainEvolContext.java @@ -14,9 +14,8 @@ import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Compound; +import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.StructureTools; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; @@ -105,7 +104,7 @@ public String toString() { private boolean isProtein; - private Compound compound; + private EntityInfo entity; /** @@ -122,7 +121,7 @@ public ChainEvolContext(ChainEvolContextList parent, String sequence, String seq this.searchWithFullUniprot = true; this.queryWarnings = new ArrayList(); this.isProtein = true; - this.compound = null; + this.entity = null; } /** @@ -130,18 +129,18 @@ public ChainEvolContext(ChainEvolContextList parent, String sequence, String seq * @param parent * @param compound */ - public ChainEvolContext(ChainEvolContextList parent, Compound compound) { + public ChainEvolContext(ChainEvolContextList parent, EntityInfo compound) { this.parent = parent; Chain chain = compound.getRepresentative(); - this.sequenceId = chain.getChainID(); + this.sequenceId = chain.getName(); this.hasQueryMatch = false; this.searchWithFullUniprot = true; this.queryWarnings = new ArrayList(); - this.isProtein = StructureTools.isProtein(chain); + this.isProtein = chain.isProtein(); - this.compound = compound; + this.entity = compound; this.pdbToUniProtMapper = new PdbToUniProtMapper(compound); @@ -156,8 +155,8 @@ public boolean isProtein() { return isProtein; } - public Compound getCompound() { - return compound; + public EntityInfo getCompound() { + return entity; } /** @@ -526,13 +525,13 @@ public void blastForHomologs(EppicParams params) if (searchMode==HomologsSearchMode.GLOBAL) { LOGGER.info("Using full UniProt sequence {} {}-{} for blast search (entity {})", - query.getUniId(), queryInterv.beg, queryInterv.end, compound.getMolId()); + query.getUniId(), queryInterv.beg, queryInterv.end, entity.getMolId()); searchWithFullUniprot = true; } else if (searchMode==HomologsSearchMode.LOCAL) { LOGGER.info("Using UniProt {} subsequence {}-{} for blast search (entity {})", - query.getUniId(), queryInterv.beg, queryInterv.end, compound.getMolId()); + query.getUniId(), queryInterv.beg, queryInterv.end, entity.getMolId()); searchWithFullUniprot = false; } diff --git a/eppic-cli/src/main/java/eppic/ChainEvolContextList.java b/eppic-cli/src/main/java/eppic/ChainEvolContextList.java index f57d0e04b..de2266f9e 100644 --- a/eppic-cli/src/main/java/eppic/ChainEvolContextList.java +++ b/eppic-cli/src/main/java/eppic/ChainEvolContextList.java @@ -7,9 +7,9 @@ import java.util.List; import java.util.TreeMap; -import org.biojava.nbio.structure.Compound; +import org.biojava.nbio.structure.EntityInfo; +import org.biojava.nbio.structure.EntityType; import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,14 +66,17 @@ public ChainEvolContextList(Structure pdb, EppicParams params) throws SQLExcepti this.useLocalUniprot = false; } - for (Compound chainCluster:pdb.getCompounds()) { - - // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 - if (chainCluster.getChains().isEmpty()) continue; - - ChainEvolContext cec = new ChainEvolContext(this, chainCluster); - - cecs.put(cec.getSequenceId(), cec); + for (EntityInfo chainCluster:pdb.getEntityInfos()) { + + if (chainCluster.getType() == EntityType.POLYMER) { + + // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 + if (chainCluster.getChains().isEmpty()) continue; + + ChainEvolContext cec = new ChainEvolContext(this, chainCluster); + + cecs.put(cec.getSequenceId(), cec); + } } } @@ -115,13 +118,8 @@ public void addChainEvolContext(String representativeChain, ChainEvolContext cec * @return */ public ChainEvolContext getChainEvolContext(String pdbChainCode) { - try { - Compound compound = pdb.getChainByPDB(pdbChainCode).getCompound(); - return cecs.get( compound.getRepresentative().getChainID() ); - } catch (StructureException e) { - LOGGER.error("Unexpected exception",e); - return null; - } + EntityInfo compound = pdb.getPolyChainByPDB(pdbChainCode).getEntityInfo(); + return cecs.get( compound.getRepresentative().getName() ); } /** diff --git a/eppic-cli/src/main/java/eppic/DataModelAdaptor.java b/eppic-cli/src/main/java/eppic/DataModelAdaptor.java index af9f1e686..60fa62858 100644 --- a/eppic-cli/src/main/java/eppic/DataModelAdaptor.java +++ b/eppic-cli/src/main/java/eppic/DataModelAdaptor.java @@ -14,15 +14,9 @@ import java.util.Set; import java.util.TreeSet; -import org.biojava.nbio.structure.Atom; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Compound; -import org.biojava.nbio.structure.ExperimentalTechnique; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.PDBCrystallographicInfo; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureTools; +import org.biojava.nbio.structure.*; import org.biojava.nbio.structure.asa.GroupAsa; +import org.biojava.nbio.structure.cluster.SubunitClustererParameters; import org.biojava.nbio.structure.contact.AtomContact; import org.biojava.nbio.structure.contact.GroupContact; import org.biojava.nbio.structure.contact.GroupContactSet; @@ -94,7 +88,7 @@ public class DataModelAdaptor { public static final int INVALID_ASSEMBLY_ID = 0; private PdbInfoDB pdbInfo; - + private EppicParams params; private RunParametersDB runParameters; @@ -176,19 +170,24 @@ public void setPdbMetadata(Structure pdb) { pdbInfo.setCellBeta(cc.getBeta()); pdbInfo.setCellGamma(cc.getGamma()); } - + + } + + public void setChainClustersData(Structure pdb, Map chainOrigNames) { List chainClusterDBs = new ArrayList(); - for (Compound compound:pdb.getCompounds()) { - - // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 - if (compound.getChains().isEmpty()) continue; + for (EntityInfo compound:pdb.getEntityInfos()) { + + if (compound.getType() == EntityType.POLYMER) { + // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 + if (compound.getChains().isEmpty()) continue; - chainClusterDBs.add(createChainCluster(compound)); + chainClusterDBs.add(createChainCluster(compound, chainOrigNames)); + } } pdbInfo.setNumChainClusters(chainClusterDBs.size()); pdbInfo.setChainClusters(chainClusterDBs); - + initAsymIds2chainIdsMap(pdb); } @@ -199,26 +198,27 @@ public void setPdbMetadata(Structure pdb) { *

* Note that the map should work in most cases, but it's not guaranteed because there is a one-to-many * relationship between author chain ids and asym ids (internal ids). This is the best we can do - * with the data available from Biojava 4.2 - * @param pdb + * with the data available from Biojava 4.2 + * TODO check if we still need with BioJava 5 + * @param pdb the structure */ private void initAsymIds2chainIdsMap(Structure pdb) { asymIds2chainIds = new HashMap<>(); for (Chain c : pdb.getChains()) { - asymIds2chainIds.put(c.getInternalChainID(), c.getChainID()); + asymIds2chainIds.put(c.getId(), c.getName()); } } - - private ChainClusterDB createChainCluster(Compound compound) { + + private ChainClusterDB createChainCluster(EntityInfo compound, Map chainOrigNames) { ChainClusterDB chainClusterDB = new ChainClusterDB(); chainClusterDB.setPdbCode(pdbInfo.getPdbCode()); - chainClusterDB.setRepChain(compound.getRepresentative().getChainID()); - chainClusterDB.setMemberChains(getMemberChainsString(compound)); - chainClusterDB.setNumMembers(compound.getChainIds().size()); - chainClusterDB.setProtein(StructureTools.isProtein(compound.getRepresentative())); + chainClusterDB.setRepChain(compound.getRepresentative().getName()); + chainClusterDB.setMemberChains(getMemberChainsString(compound, chainOrigNames)); + chainClusterDB.setNumMembers(getUniqueChainNames(compound, chainOrigNames).size()); + chainClusterDB.setProtein(compound.getRepresentative().isProtein()); chainClusterDB.setPdbInfo(pdbInfo); @@ -239,7 +239,7 @@ private ChainClusterDB createChainCluster(Compound compound) { residueInfoDB.setChainCluster(chainClusterDB); residueInfoDB.setPdbCode(pdbInfo.getPdbCode()); - residueInfoDB.setRepChain(compound.getRepresentative().getChainID()); + residueInfoDB.setRepChain(compound.getRepresentative().getName()); // NOTE, here there can be 2 behaviours: // 1) there is a SEQRES and getAlignedResIndex gives the actual SEQRES indices @@ -281,7 +281,7 @@ private ChainClusterDB createChainCluster(Compound compound) { return chainClusterDB; } - private List getGroups(Compound compound) { + private List getGroups(EntityInfo compound) { List groups = new ArrayList(); @@ -302,9 +302,9 @@ private List getGroups(Compound compound) { } public void setInterfaces(StructureInterfaceList interfaces) { - - List interfaceClusters = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF); + List interfaceClusters = reduceToNcsUnique(interfaces); + List icDBs = new ArrayList(); for (StructureInterfaceCluster ic:interfaceClusters) { InterfaceClusterDB icDB = new InterfaceClusterDB(); @@ -374,14 +374,14 @@ public void setInterfaces(StructureInterfaceList interfaces) { ContactDB contact = new ContactDB(); Group firstGroup = groupContact.getPair().getFirst(); Group secondGroup = groupContact.getPair().getSecond(); - if (firstGroup.getChain().getCompound()==null) + if (firstGroup.getChain().getEntityInfo()==null) contact.setFirstResNumber(UNKNOWN_RESIDUE_INDEX); else - contact.setFirstResNumber(firstGroup.getChain().getCompound().getAlignedResIndex(firstGroup, firstGroup.getChain()) ); - if (secondGroup.getChain().getCompound()==null) + contact.setFirstResNumber(firstGroup.getChain().getEntityInfo().getAlignedResIndex(firstGroup, firstGroup.getChain()) ); + if (secondGroup.getChain().getEntityInfo()==null) contact.setSecondResNumber(UNKNOWN_RESIDUE_INDEX); else - contact.setSecondResNumber(secondGroup.getChain().getCompound().getAlignedResIndex(secondGroup, secondGroup.getChain()) ); + contact.setSecondResNumber(secondGroup.getChain().getEntityInfo().getAlignedResIndex(secondGroup, secondGroup.getChain()) ); contact.setFirstResType(firstGroup.getPDBName()); contact.setSecondResType(secondGroup.getPDBName()); GroupAsa firstGroupAsa = interf.getFirstGroupAsa(firstGroup.getResidueNumber()); @@ -455,6 +455,58 @@ public int compare(ContactDB first, ContactDB second) { pdbInfo.setMaxNumClashesAnyInterface(Collections.max(numClashesPerInterface)); } + + private List reduceToNcsUnique(StructureInterfaceList interfaces) { + List clusters = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF); + + if (!pdbInfo.isNcsOpsPresent()) { + // no NCS case (normal case), return clusters as is + return clusters; + } + + // NCS case. We need to reduce to the unique-to-NCS set + List interfaceClustersNcs = interfaces.getClustersNcs(); + + List reduced = new ArrayList<>(); + for (StructureInterfaceCluster cluster : clusters) { + Set indices = new TreeSet<>(); + for (StructureInterface interf : cluster.getMembers()) { + indices.add(getCorrespondingClustersIndex(interf, interfaceClustersNcs)); + } + + StructureInterfaceCluster reducedCluster = new StructureInterfaceCluster(); + reducedCluster.setId(cluster.getId()); + reducedCluster.setAverageScore(cluster.getAverageScore()); + for (int i : indices) { + // we add one interface per NCS interface cluster + StructureInterface interf = interfaceClustersNcs.get(i).getMembers().get(0); + if (interf.getCluster()==null) { + LOGGER.warn("Interface {} is not associated to a cluster. Something might be wrong", interf.getId()); + } else if (interf.getCluster().getId() != reducedCluster.getId()) { + LOGGER.warn("Interface {} belongs to cluster {}. It should not be added to cluster id {}", + interf.getId(), interf.getCluster().getId(), reducedCluster.getId()); + } + reducedCluster.addMember(interf); + // we add also the new back-reference to the parent + interf.setCluster(reducedCluster); + } + + reduced.add(reducedCluster); + } + + return reduced; + } + + private static int getCorrespondingClustersIndex(StructureInterface interf, List interfaceClustersNcs) { + for (int i = 0; i< interfaceClustersNcs.size(); i++) { + for (StructureInterface s : interfaceClustersNcs.get(i).getMembers()) { + if (s.getId() == interf.getId()) { + return i; + } + } + } + return -1; + } public void setAssemblies(CrystalAssemblies validAssemblies) { @@ -716,6 +768,7 @@ private static Assembly getMatchingAssembly(Assembly pdbAssembly, CrystalAssembl * For the given PDB bio unit (first in PDB annotation), map the PDB-annotated interfaces * to our interface cluster ids * @param bioUnit + * @param cell * @return the list of matching cluster ids */ private Set matchToInterfaceClusters(BioAssemblyInfo bioUnit, CrystalCell cell) { @@ -734,9 +787,9 @@ private Set matchToInterfaceClusters(BioAssemblyInfo bioUnit, CrystalCe } if (!im.checkTheirsMatch()) { - String msg = ""; + StringBuilder msg = new StringBuilder(); for (SimpleInterface theirI:im.getTheirsNotMatching()) { - msg += theirI.toString()+"\t"; + msg.append(theirI.toString()).append("\t"); } // This actually happens even if the mapping is fine. That's because we enumerate the biounit @@ -744,7 +797,7 @@ private Set matchToInterfaceClusters(BioAssemblyInfo bioUnit, CrystalCe // 2 molecules don't make a contact. LOGGER.info("Some interfaces of PDB bio unit "+EppicParams.PDB_BIOUNIT_TO_USE+ " do not match any of the EPPIC interfaces."+ - " Non-matching interfaces are: "+msg); + " Non-matching interfaces are: "+msg.toString()); } @@ -794,6 +847,10 @@ public void setGeometryScores(List gps, List cps, List iril = new ArrayList(); @@ -1177,7 +1246,7 @@ private void addResidueBurialDetailsOfPartner(List iril, Struct else if (molecId == InterfaceEvolContext.SECOND) chain = interf.getParentChains().getSecond(); - String repChainId = chain.getCompound().getRepresentative().getChainID(); + String repChainId = chain.getEntityInfo().getRepresentative().getName(); ChainClusterDB chainCluster = pdbInfo.getChainCluster(repChainId); @@ -1246,7 +1315,7 @@ else if (molecId==InterfaceEvolContext.SECOND) // residue in the representative chain (the one we store in the residueInfos in chainCluster). // Thus the issues with residue serials in SEQRES/no SEQRES case will hit here! // See the comment in createChainCluster - int resser = chain.getCompound().getAlignedResIndex(group, chain); + int resser = chain.getEntityInfo().getAlignedResIndex(group, chain); if (resser==-1) { if (noseqres) LOGGER.warn("Could not get a residue serial for group '{}' to connect ResidueBurial to ResidueInfo", group.toString()); @@ -1298,11 +1367,11 @@ public void setInterfaceWarnings() { } } - public static String getChainClusterString(Compound compound) { + public static String getChainClusterString(EntityInfo compound) { StringBuilder sb = new StringBuilder(); - sb.append(compound.getRepresentative().getChainID()); + sb.append(compound.getRepresentative().getName()); List uniqChainIds = compound.getChainIds(); @@ -1310,7 +1379,7 @@ public static String getChainClusterString(Compound compound) { sb.append(" ("); for (String chainId:uniqChainIds) { - if (chainId.equals(compound.getRepresentative().getChainID())) { + if (chainId.equals(compound.getRepresentative().getName())) { continue; } @@ -1325,14 +1394,30 @@ public static String getChainClusterString(Compound compound) { return sb.toString(); } - public static String getMemberChainsString(Compound compound) { - List uniqChainIds = compound.getChainIds(); - + private Set getUniqueChainNames(EntityInfo compound, Map chainOrigNames) { + List chains = compound.getChains(); + Set uniqChainNames = new TreeSet<>(); + for (Chain c : chains) { + String chainName; + if(chainOrigNames!=null) { // will only be not null in cases with NCS ops + chainName = chainOrigNames.get(c.getName()); + } else { + chainName = c.getName(); + } + uniqChainNames.add(chainName); + } + return uniqChainNames; + } + + private String getMemberChainsString(EntityInfo compound, Map chainOrigNames) { + + Set uniqChainNames = getUniqueChainNames(compound, chainOrigNames); + StringBuilder sb = new StringBuilder(); int i = 0; - for (String chainId:uniqChainIds) { + for (String chainId:uniqChainNames) { sb.append(chainId); - if (i!=uniqChainIds.size()-1) sb.append(","); + if (i!=uniqChainNames.size()-1) sb.append(","); i++; } return sb.toString(); @@ -1361,8 +1446,9 @@ public static boolean isDisulfideInteraction(GroupContact groupContact) { /** * Finds the symmetry of the biounit with the biojava quat symmetry algorithms + * @param pdb the au of the structure * @param bioUnitNumber - * @return an array of size 4 with members: symmetry, stoichiometry, pseudosymmetry, pseudoStoichiometry + * @return an array of size 2 with members: symmetry, stoichiometry */ private static String[] getSymmetry(Structure pdb, int bioUnitNumber) { @@ -1372,7 +1458,7 @@ private static String[] getSymmetry(Structure pdb, int bioUnitNumber) { pdb.getPDBHeader().getBioAssemblies().get(bioUnitNumber).getTransforms().size() == 0){ LOGGER.warn("Could not load transformations for PDB biounit {}. Will not assign a symmetry value to it.", bioUnitNumber); - return new String[]{null,null,null,null}; + return new String[]{null,null}; } List transformations = @@ -1381,72 +1467,32 @@ private static String[] getSymmetry(Structure pdb, int bioUnitNumber) { BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); - Structure bioAssembly = builder.rebuildQuaternaryStructure(pdb, transformations); + Structure bioAssembly = builder.rebuildQuaternaryStructure(pdb, transformations, true, false); QuatSymmetryParameters parameters = new QuatSymmetryParameters(); parameters.setOnTheFly(true); - parameters.setLocalSymmetry(false); - parameters.setVerbose(false); + SubunitClustererParameters clusterParams = new SubunitClustererParameters(); - QuatSymmetryDetector detector = new QuatSymmetryDetector(bioAssembly, parameters); + // TODO not sure if this is still possible in biojava 5 + //if (!detector.hasProteinSubunits()) { + // LOGGER.info("No protein chains in biounit {}, can't calculate symmetry. Will not assign a symmetry value to it.", bioUnitNumber); + // return new String[]{null,null}; + //} - if (!detector.hasProteinSubunits()) { - LOGGER.info("No protein chains in biounit {}, can't calculate symmetry. Will not assign a symmetry value to it.", bioUnitNumber); - return new String[]{null,null,null,null}; - } - - List globalResults = detector.getGlobalSymmetry(); + QuatSymmetryResults globalResults = QuatSymmetryDetector.calcGlobalSymmetry(bioAssembly, parameters, clusterParams); - if (globalResults.isEmpty()) { + if (globalResults == null) { LOGGER.warn("No global symmetry found for biounit {}. Will not assign a symmetry value to it.", bioUnitNumber); - return new String[]{null, null, null, null}; + return new String[]{null, null}; } - String symmetry = null; - String stoichiometry = null; - String pseudoSymmetry = null; - String pseudoStoichiometry = null; - - - if (globalResults.size()>2) { - StringBuilder sb = new StringBuilder(); - for (QuatSymmetryResults r:globalResults) { - sb.append(r.getSymmetry()+" "); - } - LOGGER.warn("More than 2 symmetry results found for biounit {}. The {} results are: {}", - bioUnitNumber, globalResults.size(), sb.toString()); - } + String symmetry = globalResults.getSymmetry(); - for (QuatSymmetryResults r:globalResults) { - - if (r.getSubunits().isPseudoSymmetric()) { - pseudoSymmetry = r.getSymmetry(); - pseudoStoichiometry = r.getSubunits().getStoichiometry(); - LOGGER.info("Pseudosymmetry {} (stoichiometry {}) found in biounit {}", - pseudoSymmetry, pseudoStoichiometry, bioUnitNumber); - } else { - symmetry = r.getSymmetry(); - stoichiometry = r.getSubunits().getStoichiometry(); - LOGGER.info("Symmetry {} (stoichiometry {}) found in biounit {}", - symmetry, stoichiometry, bioUnitNumber); - } - - } - // note: if there's no pseudosymmetry in the results then it remains null - - - if (symmetry==null) { - // this should not happen, will there ever be no global symmetry (non-pseudo) in the results? - LOGGER.warn("Could not find global symmetry for biounit {}. Will not assign a symmetry value to it.", bioUnitNumber); - } else if (stoichiometry==null){ - LOGGER.warn("Symmetry found for biounit {}, but no stoichiometry value associated to it.", bioUnitNumber); - } - - if (pseudoSymmetry!=null && pseudoStoichiometry==null) { - LOGGER.warn("Pseudosymmetry found for biounit {}, but no stoichiometry value associated to it", bioUnitNumber); - } + String stoichiometry = globalResults.getStoichiometry().toString(); + LOGGER.info("Symmetry {} (stoichiometry {}) found in biounit {}", + symmetry, stoichiometry, bioUnitNumber); - return new String[]{symmetry, stoichiometry, pseudoSymmetry, pseudoStoichiometry}; + return new String[]{symmetry, stoichiometry}; } } diff --git a/eppic-cli/src/main/java/eppic/EppicParams.java b/eppic-cli/src/main/java/eppic/EppicParams.java index 9c159d977..975cea12c 100644 --- a/eppic-cli/src/main/java/eppic/EppicParams.java +++ b/eppic-cli/src/main/java/eppic/EppicParams.java @@ -894,6 +894,10 @@ public void setInFile(File inFile) { public boolean isGenerateOutputCoordFiles() { return generateOutputCoordFiles; } + + public void setGenerateOutputCoordFiles(boolean generateOutputCoordFiles) { + this.generateOutputCoordFiles = generateOutputCoordFiles; + } public boolean isGenerateThumbnails() { return generateThumbnails; @@ -903,6 +907,10 @@ public boolean isGenerateDiagrams() { return generateDiagrams; } + public void setGenerateDiagrams(boolean generateDiagrams) { + this.generateDiagrams = generateDiagrams; + } + public boolean isGeneratePdbFiles() { return generatePdbFiles; } @@ -910,6 +918,10 @@ public boolean isGeneratePdbFiles() { public boolean isGenerateModelSerializedFile() { return generateModelSerializedFile; } + + public void setGenerateModelSerializedFile(boolean generateModelSerializedFile) { + this.generateModelSerializedFile = generateModelSerializedFile; + } public boolean isNoBlast() { return noBlast; diff --git a/eppic-cli/src/main/java/eppic/InterfaceEvolContext.java b/eppic-cli/src/main/java/eppic/InterfaceEvolContext.java index 598b9bb93..a30214943 100644 --- a/eppic-cli/src/main/java/eppic/InterfaceEvolContext.java +++ b/eppic-cli/src/main/java/eppic/InterfaceEvolContext.java @@ -8,7 +8,6 @@ import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.contact.StructureInterface; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -280,9 +279,9 @@ public int getMinNumSeqs() { public static boolean isProtein(StructureInterface interf, int molecId) { if (molecId==FIRST) { - return StructureTools.isProtein(interf.getMolecules().getFirst()[0].getGroup().getChain()); + return interf.getMolecules().getFirst()[0].getGroup().getChain().isProtein(); } else if (molecId==SECOND) { - return StructureTools.isProtein(interf.getMolecules().getSecond()[0].getGroup().getChain()); + return interf.getMolecules().getSecond()[0].getGroup().getChain().isProtein(); } else { throw new IllegalArgumentException("Fatal error! Wrong molecId "+molecId); } diff --git a/eppic-cli/src/main/java/eppic/Main.java b/eppic-cli/src/main/java/eppic/Main.java index 0f96e4aba..722dd9a5b 100644 --- a/eppic-cli/src/main/java/eppic/Main.java +++ b/eppic-cli/src/main/java/eppic/Main.java @@ -14,19 +14,15 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.SortedSet; +import java.util.*; import java.util.zip.GZIPOutputStream; +import eppic.model.InterfaceClusterDB; +import eppic.model.InterfaceDB; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.core.LoggerContext; import org.biojava.nbio.core.sequence.io.util.IOUtils; -import org.biojava.nbio.structure.Compound; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.*; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.contact.StructureInterface; import org.biojava.nbio.structure.contact.StructureInterfaceCluster; @@ -51,12 +47,13 @@ import eppic.assembly.LatticeGraph3D; import eppic.assembly.gui.LatticeGUIMustache; import eppic.commons.util.FileTypeGuesser; -import eppic.commons.util.StructureUtils; import eppic.predictors.CombinedClusterPredictor; import eppic.predictors.CombinedPredictor; import eppic.predictors.GeometryClusterPredictor; import eppic.predictors.GeometryPredictor; +import javax.vecmath.Matrix4d; + /** * The eppic main class to execute the CLI workflow. * @@ -258,16 +255,26 @@ public void doLoadPdb() throws EppicException { modelAdaptor = new DataModelAdaptor(); modelAdaptor.setParams(params); modelAdaptor.setPdbMetadata(pdb); - - } public void doFindInterfaces() throws EppicException { params.getProgressLog().println("Calculating possible interfaces..."); - StructureUtils.expandNcsOps(pdb); + LOGGER.info("Calculating possible interfaces"); - CrystalBuilder interfFinder = new CrystalBuilder(pdb); + CrystalBuilder interfFinder; + Map chainOrigNames = null; + if (modelAdaptor.getPdbInfo().isNcsOpsPresent()) { + chainOrigNames = new HashMap<>(); + Map chainNcsOps = new HashMap<>(); + CrystalBuilder.expandNcsOps(pdb,chainOrigNames,chainNcsOps); + interfFinder = new CrystalBuilder(pdb,chainOrigNames,chainNcsOps); + } else { + interfFinder = new CrystalBuilder(pdb); + } + + modelAdaptor.setChainClustersData(pdb, chainOrigNames); + interfaces = interfFinder.getUniqueInterfaces(EppicParams.INTERFACE_DIST_CUTOFF); LOGGER.info("Calculating ASAs"); interfaces.calcAsas(params.getnSpherePointsASAcalc(), params.getNumThreads(), params.getMinSizeCofactorForAsa()); @@ -521,6 +528,11 @@ public void doWriteCoordFiles() throws EppicException { // INTERFACE files for (StructureInterface interf : interfaces) { + // a hack necessary to handle reduced redundancy in structures with NCS + if (modelAdaptor.getPdbInfo().isNcsOpsPresent() && modelAdaptor.getPdbInfo().getInterface(interf.getId())==null) { + LOGGER.info("Skipping generation of interface coordinate file for redundant NCS interface {}", interf.getId()); + continue; + } File outputFile = params.getOutputFile(EppicParams.INTERFACES_COORD_FILES_SUFFIX + "." + interf.getId() + EppicParams.MMCIF_FILE_EXTENSION); PrintStream ps = new PrintStream(new GZIPOutputStream(new FileOutputStream(outputFile))); ps.print(interf.toMMCIF()); @@ -589,6 +601,16 @@ public void doWriteAssemblyDiagrams() throws EppicException { // TODO this is not going to work for contracted graphs: both clusterIds and interfaceids are wrong! see issue https://github.com/eppic-team/eppic/issues/148 SortedSet clusterIds = GraphUtils.getDistinctInterfaceClusters(a.getAssemblyGraph().getSubgraph()); Set interfaceIds = GraphUtils.getDistinctInterfaces(a.getAssemblyGraph().getSubgraph()); + if (modelAdaptor.getPdbInfo().isNcsOpsPresent()) { + // we have to hack the interface list removing the redundant NCS interfaces. In model (and wui) they aren't present + Set nonRedundantSet = new HashSet<>(); + for (InterfaceClusterDB icdb : modelAdaptor.getPdbInfo().getInterfaceClusters()) { + for (InterfaceDB idb : icdb.getInterfaces()) { + nonRedundantSet.add(idb.getInterfaceId()); + } + } + interfaceIds.removeIf( (Integer interfId) -> !nonRedundantSet.contains(interfId)); + } latticeGraph.filterEngagedClusters(clusterIds); LatticeGUIMustache guiThumb = new LatticeGUIMustache(LatticeGUIMustache.TEMPLATE_ASSEMBLY_DIAGRAM_THUMB, latticeGraph); @@ -608,8 +630,11 @@ public void doWriteAssemblyDiagrams() throws EppicException { //runner.generateFromDot(dotFile, pngFile, fileFormat); // Generate thumbs via pipe - runner.generateFromDot(guiThumb, pngFile, fileFormat); - + if (params.getGraphvizExe()==null) { + LOGGER.warn("GRAPHVIZ_EXE was not specified in eppic.conf. Will not generate assembly {} png", a.getId()); + } else { + runner.generateFromDot(guiThumb, pngFile, fileFormat); + } // 2. Generate the json file for the dynamic js graph in the wui @@ -723,6 +748,11 @@ public void doWritePymolFiles() throws EppicException { try { for (StructureInterface interf:interfaces) { + // a hack necessary to handle reduced redundancy in structures with NCS + if (modelAdaptor.getPdbInfo().isNcsOpsPresent() && modelAdaptor.getPdbInfo().getInterface(interf.getId())==null) { + LOGGER.info("Skipping generation of PyMOL interface files for redundant NCS interface {}", interf.getId()); + continue; + } File cifFile = params.getOutputFile(EppicParams.INTERFACES_COORD_FILES_SUFFIX+"."+interf.getId()+ EppicParams.MMCIF_FILE_EXTENSION); pr.generateInterfacePng(interf, cifFile, @@ -768,12 +798,15 @@ private void findUniqueChains() { StringBuilder sb = new StringBuilder(); sb.append("Unique sequences: "); - for (Compound chainCluster:pdb.getCompounds()) { - // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 - if (chainCluster.getChains().isEmpty()) continue; + for (EntityInfo chainCluster:pdb.getEntityInfos()) { - sb.append(DataModelAdaptor.getChainClusterString(chainCluster)); - sb.append(" "); + if (chainCluster.getType() == EntityType.POLYMER) { + // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 + if (chainCluster.getChains().isEmpty()) continue; + + sb.append(DataModelAdaptor.getChainClusterString(chainCluster)); + sb.append(" "); + } } LOGGER.info(sb.toString()); diff --git a/eppic-cli/src/main/java/eppic/PdbToUniProtMapper.java b/eppic-cli/src/main/java/eppic/PdbToUniProtMapper.java index fe15d25da..80648e2b7 100644 --- a/eppic-cli/src/main/java/eppic/PdbToUniProtMapper.java +++ b/eppic-cli/src/main/java/eppic/PdbToUniProtMapper.java @@ -16,7 +16,7 @@ import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; import org.biojava.nbio.structure.AminoAcid; import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Compound; +import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.GroupType; import org.slf4j.Logger; @@ -44,7 +44,7 @@ public class PdbToUniProtMapper implements Serializable { */ private Map> alignments; - private Compound compound; + private EntityInfo entity; private Map sequences; private UnirefEntry uniProtReference; @@ -64,9 +64,9 @@ public class PdbToUniProtMapper implements Serializable { private boolean sequenceFromAtom; - public PdbToUniProtMapper(Compound compound) { + public PdbToUniProtMapper(EntityInfo entity) { - this.compound = compound; + this.entity = entity; initSequences(); @@ -91,7 +91,7 @@ private void initSequences() { sequences = new TreeMap(); - Chain chain = compound.getRepresentative(); + Chain chain = entity.getRepresentative(); // it looks like biojava interprets MSEs as METs, at least for the sequence, so no issues here String repSequenceSeqRes = chain.getSeqResSequence(); @@ -99,15 +99,15 @@ private void initSequences() { // for files without a SEQRES, it will be empty, we get it from atom groups if (repSequenceSeqRes.isEmpty()) { LOGGER.warn("Could not get a sequence from SEQRES for entity {} (chains {}). Getting it from ATOM instead", - compound.getMolId(), compound.getChainIds().toString()); + entity.getMolId(), entity.getChainIds().toString()); - for (Chain c:compound.getChains()) { + for (Chain c:entity.getChains()) { String seq = getAtomSequence(c); - sequences.put(c.getChainID(), seq); + sequences.put(c.getName(), seq); if (seq.isEmpty()) { - LOGGER.warn("Sequence from ATOM records for chain {} has length 0",chain.getChainID()); + LOGGER.warn("Sequence from ATOM records for chain {} has length 0",chain.getName()); } } @@ -117,7 +117,7 @@ private void initSequences() { } else { // we add just the 1 representative sequence - sequences.put(chain.getChainID() ,repSequenceSeqRes); + sequences.put(chain.getName() ,repSequenceSeqRes); this.sequenceFromAtom = false; } } @@ -127,7 +127,7 @@ private void initAlignments() throws CompoundNotFoundException { this.alignments = new TreeMap>(); if (sequenceFromAtom) { - LOGGER.info("PDB sequences are from ATOM, will have one alignment per member chain of entity {}",compound.getMolId()); + LOGGER.info("PDB sequences are from ATOM, will have one alignment per member chain of entity {}",entity.getMolId()); } @@ -442,7 +442,7 @@ private void initMatchingInterval() { if (minBeg == Integer.MAX_VALUE || maxEnd == 0) { - LOGGER.warn("Could not find a matching interval for entity {}", compound.getMolId()); + LOGGER.warn("Could not find a matching interval for entity {}", entity.getMolId()); matchingIntervalUniProtCoords = null; matchingIntervalPdbCoords = null; @@ -468,13 +468,13 @@ public boolean isPdbGroupMatchingUniProt(Group g) { SequencePair alignment = null; if (sequenceFromAtom) { // we get the corresponding alignment for the chain - alignment = alignments.get(c.getChainID()); + alignment = alignments.get(c.getName()); } else { // we should have just the one alignment for the SEQRES sequence alignment = alignments.values().iterator().next(); - if (compound.getChains().size()>1 && alignments.size()>1) + if (entity.getChains().size()>1 && alignments.size()>1) LOGGER.warn("More than 1 alignment for entity {} contained in pdb-to-uniprot mapper, expected only 1: something is wrong!", - compound.getMolId()); + entity.getMolId()); } int resser = getSeqresSerial(g); @@ -515,13 +515,13 @@ public int getUniProtIndexForPdbGroup(Group g, boolean positionWithinSubinterval SequencePair alignment = null; if (sequenceFromAtom) { // we get the corresponding alignment for the chain - alignment = alignments.get(c.getChainID()); + alignment = alignments.get(c.getName()); } else { // we should have just the one alignment for the SEQRES sequence alignment = alignments.values().iterator().next(); - if (compound.getChains().size()>1 && alignments.size()>1) + if (entity.getChains().size()>1 && alignments.size()>1) LOGGER.warn("More than 1 alignment for entity {} contained in pdb-to-uniprot mapper, expected only 1: something is wrong!", - compound.getMolId()); + entity.getMolId()); } int resser = getSeqresSerial(g); @@ -574,9 +574,9 @@ public Group getPdbGroupFromUniProtIndex(int uniProtIndex, String chainId) { } else { // we should have just the one alignment for the SEQRES sequence alignment = alignments.values().iterator().next(); - if (compound.getChains().size()>1 && alignments.size()>1) + if (entity.getChains().size()>1 && alignments.size()>1) LOGGER.warn("More than 1 alignment for entity {} contained in pdb-to-uniprot mapper, expected only 1: something is wrong!", - compound.getMolId()); + entity.getMolId()); } int alnIdx = alignment.getTarget().getAlignmentIndexAt(uniProtIndex); @@ -588,8 +588,8 @@ public Group getPdbGroupFromUniProtIndex(int uniProtIndex, String chainId) { // getting the relevant chain Chain chain = null; - for (Chain c:compound.getChains()) { - if (c.getChainID().equals(chainId)) { + for (Chain c:entity.getChains()) { + if (c.getName().equals(chainId)) { chain = c; } } @@ -652,7 +652,7 @@ private int getSeqresSerial(Group g) { else { // IMPORTANT NOTE: this won't work for groups that are not in ATOM groups, due to // seqres groups not having residue numbers in BioJava - return compound.getAlignedResIndex(g, g.getChain()); + return entity.getAlignedResIndex(g, g.getChain()); } } diff --git a/eppic-cli/src/main/java/eppic/PymolRunner.java b/eppic-cli/src/main/java/eppic/PymolRunner.java index 9feb22a58..173e45ea5 100644 --- a/eppic-cli/src/main/java/eppic/PymolRunner.java +++ b/eppic-cli/src/main/java/eppic/PymolRunner.java @@ -455,11 +455,11 @@ public void generateChainPse(Chain chain, StructureInterfaceList interfaces, List cores = null; List rims = null; - if (interf.getMoleculeIds().getFirst().equals(chain.getChainID())) { + if (interf.getMoleculeIds().getFirst().equals(chain.getName())) { cores = interf.getCoreResidues(caCutoffGeom, minAsaForSurface).getFirst(); rims = interf.getRimResidues(caCutoffGeom, minAsaForSurface).getFirst(); - } else if (interf.getMoleculeIds().getSecond().equals(chain.getChainID())) { + } else if (interf.getMoleculeIds().getSecond().equals(chain.getName())) { cores = interf.getCoreResidues(caCutoffGeom, minAsaForSurface).getSecond(); rims = interf.getRimResidues(caCutoffGeom, minAsaForSurface).getSecond(); @@ -482,12 +482,12 @@ public void generateChainPse(Chain chain, StructureInterfaceList interfaces, writeCommand(cmd, pml); - if (interf.getMoleculeIds().getFirst().equals(chain.getChainID())) { + if (interf.getMoleculeIds().getFirst().equals(chain.getName())) { cores = interf.getCoreResidues(caCutoffCoreSurf, minAsaForSurface).getFirst(); rims = interf.getRimResidues(caCutoffCoreSurf, minAsaForSurface).getFirst(); - } else if (interf.getMoleculeIds().getSecond().equals(chain.getChainID())) { + } else if (interf.getMoleculeIds().getSecond().equals(chain.getName())) { cores = interf.getCoreResidues(caCutoffCoreSurf, minAsaForSurface).getSecond(); rims = interf.getRimResidues(caCutoffCoreSurf, minAsaForSurface).getSecond(); @@ -589,9 +589,9 @@ private String getResiSelString(List list) { // we need to escape the negative residues in pymol with a backslash if (pdbSerial.startsWith("-")) pdbSerial = "\\"+pdbSerial; - int currentSerial = c.getCompound().getAlignedResIndex(list.get(i), c); + int currentSerial = c.getEntityInfo().getAlignedResIndex(list.get(i), c); int prevSerial = -1; - if (i>0) prevSerial = c.getCompound().getAlignedResIndex(list.get(i-1),c); + if (i>0) prevSerial = c.getEntityInfo().getAlignedResIndex(list.get(i-1),c); if (i==0) { lastSerial = currentSerial; diff --git a/eppic-cli/src/main/java/eppic/analysis/FindRedundantEntries.java b/eppic-cli/src/main/java/eppic/analysis/FindRedundantEntries.java index 4e0577d87..90223f66e 100644 --- a/eppic-cli/src/main/java/eppic/analysis/FindRedundantEntries.java +++ b/eppic-cli/src/main/java/eppic/analysis/FindRedundantEntries.java @@ -9,10 +9,7 @@ import java.util.Set; import java.util.TreeMap; -import org.biojava.nbio.structure.Compound; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.*; import org.biojava.nbio.structure.align.util.AtomCache; import eppic.commons.blast.BlastException; @@ -110,17 +107,21 @@ private static void writeFastaFile(Set pdbCodes, File file) throws IOExc continue; } - for (Compound chainCluster:pdb.getCompounds()) { - // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 - if (chainCluster.getChains().isEmpty()) continue; - - String seq = chainCluster.getRepresentative().getSeqResSequence(); - if (seq.matches("X+")) continue; // if it's an all X sequence we don't want it (blastclust doesn't like them) - if (seq.length()<12) continue; // we ignore too small sequences (blastclust doesn't like them) - // at the moment Biojava's getSeqResSequence uses XXXX for nucleotides, so the above condition captures this already - //if (seq.isNucleotide()) continue; // some sets (like Bahadur's monomers) contain DNA/RNA: ignore - Sequence s = new Sequence(pdbCode+chainCluster.getRepresentative().getChainID(),seq); - s.writeToPrintStream(ps); + for (EntityInfo chainCluster:pdb.getEntityInfos()) { + + if (chainCluster.getType() == EntityType.POLYMER) { + // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 + if (chainCluster.getChains().isEmpty()) continue; + + String seq = chainCluster.getRepresentative().getSeqResSequence(); + if (seq.matches("X+")) + continue; // if it's an all X sequence we don't want it (blastclust doesn't like them) + if (seq.length() < 12) continue; // we ignore too small sequences (blastclust doesn't like them) + // at the moment Biojava's getSeqResSequence uses XXXX for nucleotides, so the above condition captures this already + //if (seq.isNucleotide()) continue; // some sets (like Bahadur's monomers) contain DNA/RNA: ignore + Sequence s = new Sequence(pdbCode + chainCluster.getRepresentative().getName(), seq); + s.writeToPrintStream(ps); + } } } diff --git a/eppic-cli/src/main/java/eppic/analysis/compare/InterfaceMatcher.java b/eppic-cli/src/main/java/eppic/analysis/compare/InterfaceMatcher.java index e042838ed..c66b374fb 100644 --- a/eppic-cli/src/main/java/eppic/analysis/compare/InterfaceMatcher.java +++ b/eppic-cli/src/main/java/eppic/analysis/compare/InterfaceMatcher.java @@ -8,6 +8,8 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.vecmath.Matrix4d; @@ -17,6 +19,8 @@ import eppic.model.InterfaceDB; public class InterfaceMatcher { + + private static final Pattern CHAIN_ID_REGEX = Pattern.compile("^(\\w+)\\d+n$"); private List ourInterfaceClusters; @@ -190,15 +194,32 @@ private void matchThem() { } } + + private String ncsChainToStandardChain(String chainId){ + if (chainId.length()>2 && chainId.endsWith("n")) { + Matcher m = CHAIN_ID_REGEX.matcher(chainId); + if (m.matches()) { + return m.group(1); + } + } + return chainId; + } private boolean areMatching(SimpleInterface theirI, InterfaceDB ourI) { String ourChain1 = ourI.getChain1(); String ourChain2 = ourI.getChain2(); + + if (ourI.getInterfaceCluster().getPdbInfo().isNcsOpsPresent()) { + // we have special chain ids for NCS case, e.g. A1n + // see https://github.com/eppic-team/eppic/issues/141 , this is probably not solving the problem, but it helps + ourChain1 = ncsChainToStandardChain(ourChain1); + ourChain2 = ncsChainToStandardChain(ourChain2); + } String theirChain1 = theirI.getChain1(); String theirChain2 = theirI.getChain2(); - boolean invertedChains = false; + boolean invertedChains; if (theirChain1.equals(ourChain1) && theirChain2.equals(ourChain2)) { invertedChains = false; } diff --git a/eppic-cli/src/main/java/eppic/assembly/Assembly.java b/eppic-cli/src/main/java/eppic/assembly/Assembly.java index 2283eba4e..ffc8c8935 100644 --- a/eppic-cli/src/main/java/eppic/assembly/Assembly.java +++ b/eppic-cli/src/main/java/eppic/assembly/Assembly.java @@ -38,18 +38,22 @@ import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureTools; +import org.biojava.nbio.structure.cluster.Subunit; +import org.biojava.nbio.structure.cluster.SubunitClusterer; +import org.biojava.nbio.structure.cluster.SubunitClustererMethod; +import org.biojava.nbio.structure.cluster.SubunitClustererParameters; import org.biojava.nbio.structure.contact.StructureInterfaceCluster; import org.biojava.nbio.structure.io.FileConvert; import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools; import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; import org.biojava.nbio.structure.io.mmcif.model.AtomSite; -import org.biojava.nbio.structure.symmetry.core.AxisAligner; +import org.biojava.nbio.structure.symmetry.axis.AxisAligner; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; import org.biojava.nbio.structure.symmetry.core.Rotation; import org.biojava.nbio.structure.symmetry.core.RotationGroup; -import org.biojava.nbio.structure.symmetry.core.Subunits; +import org.biojava.nbio.structure.symmetry.core.Stoichiometry; import org.biojava.nbio.structure.xtal.CrystalCell; import org.jgrapht.UndirectedGraph; import org.jgrapht.event.ConnectedComponentTraversalEvent; @@ -552,48 +556,32 @@ private Vector3d centerSymmetrically( * @return */ private static QuatSymmetryResults getQuatSymm( Collection vertices) { - // hack subunits - List caCoords = new ArrayList(); - List folds = new ArrayList(); - List pseudo = new ArrayList(); - List chainIds = new ArrayList(); - List models = new ArrayList(); - List seqIDmin = new ArrayList(); - List seqIDmax = new ArrayList(); - List clusterIDs = new ArrayList(); - int fold = 1; - Character chain = 'A'; + List subunits = new ArrayList<>(); + for (ChainVertex vert : vertices ){ - Point3d[] coords = getDummyCoordinates(vert.getChain()); - if (coords.length==0) { - logger.warn("0-length coordinate array. Can't calculate quaternary symmetry!"); - } - caCoords.add(coords); + Atom[] ca = StructureTools.getRepresentativeAtomArray(vert.getChain()); - if (vertices.size() % fold == 0){ - folds.add(fold); //the folds are the common denominators + if (ca.length == 0) { + // e.g. 2k4g, chain A + logger.info("No representative atoms for chain with name {}. Using all atoms to get symmetry for structure packing", vert.getChain().getName()); + ca = StructureTools.getAllAtomArray(vert.getChain()); } - fold++; - pseudo.add(false); - chainIds.add(chain+""); - chain++; - models.add(0); - seqIDmax.add(1.0); - seqIDmin.add(1.0); - clusterIDs.add(0); + Subunit subunit = new Subunit(ca, vert.getChain().getId(), null, vert.getChain().getStructure()); + + subunits.add(subunit); } - //Create directly the subunits, because we know the aligned CA - Subunits globalSubunits = new Subunits(caCoords, clusterIDs, - pseudo, seqIDmin, seqIDmax, - folds, chainIds, models); + SubunitClustererParameters clusterParams = new SubunitClustererParameters(true); + clusterParams.setClustererMethod(SubunitClustererMethod.SEQUENCE); + + Stoichiometry globalSubunits = SubunitClusterer.cluster(subunits, clusterParams); //Quaternary Symmetry Detection QuatSymmetryParameters param = new QuatSymmetryParameters(); QuatSymmetryResults gSymmetry = - QuatSymmetryDetector.calcQuatSymmetry(globalSubunits, param); + QuatSymmetryDetector.calcGlobalSymmetry(globalSubunits, param); return gSymmetry; } @@ -605,7 +593,8 @@ private static QuatSymmetryResults getQuatSymm( Collection vertices * @param c * @return */ - private static Point3d[] getDummyCoordinates(Chain c) { + @SuppressWarnings("unused") + private static Atom[] getDummyCoordinates(Chain c) { // Using the centroid gave poor quality since it doesn't establish the orientation. // Use the centroids of each third of the protein @@ -613,12 +602,12 @@ private static Point3d[] getDummyCoordinates(Chain c) { if (ca.length<3) { // in some cases we find no CAs or Ps, let's use all atoms then, see issue #167 // see also issue #195. For chains with fewer than 1 or 2 representative atoms we need to resort to all atoms too, e.g. 5VVV chain B - logger.info("Fewer than 3 representative atoms in chain {}. Resorting to all atoms for calculating symmetry to pack structure.", c.getChainID()); + logger.info("Fewer than 3 representative atoms in chain {}. Resorting to all atoms for calculating symmetry to pack structure.", c.getName()); ca = StructureTools.getAllAtomArray(c); } if (ca.length<3) { - logger.warn("Fewer than 3 atoms in chain {} even after resorting to all atoms. Problems might happen in symmetry calculation to pack structure.", c.getChainID()); - return Calc.atomsToPoints(ca); + logger.warn("Fewer than 3 atoms in chain {} even after resorting to all atoms. Problems might happen in symmetry calculation to pack structure.", c.getName()); + return ca; } Atom[] ca1 = Arrays.copyOfRange(ca, 0,ca.length/3); @@ -628,7 +617,7 @@ private static Point3d[] getDummyCoordinates(Chain c) { dummy[0] = Calc.getCentroid(ca1); dummy[1] = Calc.getCentroid(ca2); dummy[2] = Calc.getCentroid(ca3); - return Calc.atomsToPoints(dummy); + return dummy; } /** @@ -665,7 +654,7 @@ private static List transformChains(Map place transmat.set(1., trans); transmat.mul(m); - Chain chain = (Chain) structure.getChainByPDB(v.getChainId()).clone(); + Chain chain = (Chain) structure.getPolyChainByPDB(v.getChainId()).clone(); Calc.transform(chain, transmat); chains.add(new ChainVertex(chain,v.getOpId())); } @@ -900,7 +889,7 @@ public void writeToMmCifFile(File file) throws IOException, StructureException { int numChains = structure.size(); Set uniqueChains = new HashSet(); for (ChainVertex cv:structure) { - uniqueChains.add(cv.getChain().getChainID()); + uniqueChains.add(cv.getChain().getName()); } if (numChains != uniqueChains.size()) symRelatedChainsExist = true; @@ -915,7 +904,7 @@ public void writeToMmCifFile(File file) throws IOException, StructureException { int atomId = 1; for (ChainVertex cv:structure) { - String chainId = cv.getChain().getChainID()+"_"+cv.getOpId(); + String chainId = cv.getChain().getName()+"_"+cv.getOpId(); for (Group g: cv.getChain().getAtomGroups()) { for (Atom a: g.getAtoms()) { diff --git a/eppic-cli/src/main/java/eppic/assembly/ChainVertex.java b/eppic-cli/src/main/java/eppic/assembly/ChainVertex.java index bb32c5b01..2856f12b5 100644 --- a/eppic-cli/src/main/java/eppic/assembly/ChainVertex.java +++ b/eppic-cli/src/main/java/eppic/assembly/ChainVertex.java @@ -49,7 +49,7 @@ public void setOpId(int i) { @Override public String getChainId() { - return c.getChainID(); + return c.getName(); } public Chain getChain() { @@ -98,7 +98,7 @@ public boolean equals(Object obj) { @Override public int getEntityId() { - return c.getCompound().getMolId(); + return c.getEntityInfo().getMolId(); } } \ No newline at end of file diff --git a/eppic-cli/src/main/java/eppic/assembly/CrystalAssemblies.java b/eppic-cli/src/main/java/eppic/assembly/CrystalAssemblies.java index 6c05aaafc..866642715 100644 --- a/eppic-cli/src/main/java/eppic/assembly/CrystalAssemblies.java +++ b/eppic-cli/src/main/java/eppic/assembly/CrystalAssemblies.java @@ -12,10 +12,7 @@ import java.util.TreeMap; import java.util.TreeSet; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Compound; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; +import org.biojava.nbio.structure.*; import org.biojava.nbio.structure.contact.StructureInterfaceList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -296,16 +293,18 @@ private void initEntityMaps() { idx2ChainIds = new HashMap(); int i = 0; - for (Compound c:structure.getCompounds()) { + for (EntityInfo c:structure.getEntityInfos()) { + if (c.getType() != EntityType.POLYMER) continue; + entityId2Idx.put(c.getMolId(),i); idx2EntityId.put(i,c.getMolId()); i++; } i = 0; - for (Chain c:structure.getChains()) { - chainIds2Idx.put(c.getChainID(),i); - idx2ChainIds.put(i,c.getChainID()); + for (Chain c:structure.getPolyChains()) { + chainIds2Idx.put(c.getName(),i); + idx2ChainIds.put(i,c.getName()); i++; } } @@ -435,7 +434,7 @@ public String getChainId(int chainIdx) { * @return */ public String getRepresentativeChainIdForEntityIndex(int index) { - return structure.getCompoundById(getEntityId(index)).getRepresentative().getChainID(); + return structure.getEntityById(getEntityId(index)).getRepresentative().getName(); } /** @@ -443,7 +442,7 @@ public String getRepresentativeChainIdForEntityIndex(int index) { * @return */ public int getNumChainsInStructure() { - return structure.getChains().size(); + return structure.getPolyChains().size(); } @Override diff --git a/eppic-cli/src/main/java/eppic/assembly/LatticeGraph.java b/eppic-cli/src/main/java/eppic/assembly/LatticeGraph.java index 3bbc494ae..1f7ebc219 100644 --- a/eppic-cli/src/main/java/eppic/assembly/LatticeGraph.java +++ b/eppic-cli/src/main/java/eppic/assembly/LatticeGraph.java @@ -189,7 +189,7 @@ public Point3d getReferenceCoordinate(String chainId) throws StructureException return referencePoints.get(null); } else { if( ! referencePoints.containsKey(chainId)) { - Point3d centroid = GeomTools.getCentroid(structure.getChainByPDB(chainId)); + Point3d centroid = GeomTools.getCentroid(structure.getPolyChainByPDB(chainId)); referencePoints.put(chainId,centroid); return centroid; } @@ -281,10 +281,10 @@ private void initLatticeGraphTopologically(List interfaces, SpaceGroup sg = getSpaceGroup(structure); final int numOps = sg.getNumOperators(); - for (Chain c:structure.getChains()) { + for (Chain c:structure.getPolyChains()) { - if (c.getCompound()==null) { - logger.warn("Chain {} will not be added to the graph because it does not have an entity associated to it.", c.getChainID()); + if (c.getEntityInfo()==null) { + logger.warn("Chain {} will not be added to the graph because it does not have an entity associated to it.", c.getName()); continue; } @@ -327,10 +327,10 @@ private void initLatticeGraphTopologically(List interfaces, V sVertex = vertexFactory.createVertex(); - sVertex.setChain(structure.getChainByPDB(sourceChainId)); + sVertex.setChain(structure.getPolyChainByPDB(sourceChainId)); sVertex.setOpId(j); V tVertex = vertexFactory.createVertex(); - tVertex.setChain(structure.getChainByPDB(targetChainId)); + tVertex.setChain(structure.getPolyChainByPDB(targetChainId)); tVertex.setOpId(k); E edge = edgeFactory.createEdge(sVertex, tVertex); diff --git a/eppic-cli/src/main/java/eppic/assembly/LatticeGraph3D.java b/eppic-cli/src/main/java/eppic/assembly/LatticeGraph3D.java index 8ac8bb1e6..6814c4521 100644 --- a/eppic-cli/src/main/java/eppic/assembly/LatticeGraph3D.java +++ b/eppic-cli/src/main/java/eppic/assembly/LatticeGraph3D.java @@ -89,8 +89,8 @@ public LatticeGraph3D(Structure struc, List interfaces) thro // Compute centroids in AU chainCentroid = new HashMap(); - for(Chain c: structure.getChains() ) { - chainCentroid.put(c.getChainID(), GeomTools.getCentroid(c)); + for(Chain c: structure.getPolyChains() ) { + chainCentroid.put(c.getId(), GeomTools.getCentroid(c)); } // Compute 3D layout @@ -127,8 +127,8 @@ public LatticeGraph3D(LatticeGraph(); - for(Chain c: structure.getChains() ) { - chainCentroid.put(c.getChainID(), GeomTools.getCentroid(c)); + for(Chain c: structure.getPolyChains() ) { + chainCentroid.put(c.getName(), GeomTools.getCentroid(c)); } // Compute 3D layout @@ -457,7 +457,7 @@ public void writeCellToMmCifFile(PrintWriter out) throws IOException, StructureE int numChains = structure.size(); Set uniqueChains = new HashSet(); for (ChainVertex3D cv:getGraph().vertexSet()) { - uniqueChains.add(cv.getChain().getChainID()); + uniqueChains.add(cv.getChain().getName()); } if (numChains != uniqueChains.size()) symRelatedChainsExist = true; @@ -479,9 +479,9 @@ public void writeCellToMmCifFile(PrintWriter out) throws IOException, StructureE int atomId = 1; for (ChainVertex3D cv:getGraph().vertexSet()) { - String chainId = cv.getChain().getChainID()+"_"+cv.getOpId(); + String chainId = cv.getChain().getName()+"_"+cv.getOpId(); //TODO maybe need to clone and transform here? - Matrix4d m = getUnitCellTransformationOrthonormal(cv.getChain().getChainID(), cv.getOpId()); + Matrix4d m = getUnitCellTransformationOrthonormal(cv.getChain().getName(), cv.getOpId()); //Point3d refCoord = graph.getReferenceCoordinate(cv.getChainId()); Chain newChain = (Chain) cv.getChain().clone(); @@ -524,7 +524,7 @@ public void writeCellToMmCifFile(PrintWriter out) throws IOException, StructureE public Set getUnitCellTransforms() throws StructureException { Set transforms = new HashSet<>(); for (ChainVertex3D cv:getGraph().vertexSet()) { - Matrix4d m = getUnitCellTransformationOrthonormal(cv.getChain().getChainID(), cv.getOpId()); + Matrix4d m = getUnitCellTransformationOrthonormal(cv.getChain().getName(), cv.getOpId()); transforms.add(m); } diff --git a/eppic-cli/src/main/java/eppic/assembly/SubAssembly.java b/eppic-cli/src/main/java/eppic/assembly/SubAssembly.java index 7f0f9dddd..f5526cbaa 100644 --- a/eppic-cli/src/main/java/eppic/assembly/SubAssembly.java +++ b/eppic-cli/src/main/java/eppic/assembly/SubAssembly.java @@ -201,7 +201,7 @@ public AssemblyDescription getDescription() { .mapToObj(i -> crystalAssemblies.getChainId(i)) .collect(Collectors.toList()); List nodeChains = connectedGraph.vertexSet().stream() - .map(v -> v.getChain().getChainID()) + .map(v -> v.getChain().getName()) .collect(Collectors.toList()); Stoichiometry chainStoich = new Stoichiometry<>(nodeChains,chains); diff --git a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUI.java b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUI.java index 7a51af156..b72c5b315 100644 --- a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUI.java +++ b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUI.java @@ -31,7 +31,7 @@ import org.biojava.nbio.structure.gui.BiojavaJmol; import org.biojava.nbio.structure.io.MMCIFFileReader; import org.biojava.nbio.structure.io.PDBFileReader; -import org.biojava.nbio.structure.io.util.FileDownloadUtils; +import org.biojava.nbio.core.util.FileDownloadUtils; import org.biojava.nbio.structure.xtal.CrystalBuilder; import org.biojava.nbio.structure.xtal.CrystalCell; import org.biojava.nbio.structure.xtal.CrystalTransform; @@ -134,8 +134,8 @@ public LatticeGUI(Structure struc, StructureInterfaceList interfaces) throws Str // Compute AU positions for each vertex chainCentroid = new HashMap(); - for(Chain c: structure.getChains() ) { - chainCentroid.put(c.getChainID(), getCentroid(c)); + for(Chain c: structure.getPolyChains() ) { + chainCentroid.put(c.getId(), getCentroid(c)); } //assignColorsById(); diff --git a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJGraph.java b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJGraph.java index f9d0772af..23db5c62f 100644 --- a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJGraph.java +++ b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJGraph.java @@ -28,7 +28,7 @@ import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.align.util.AtomCache; -import org.biojava.nbio.structure.symmetry.core.AxisAligner; +import org.biojava.nbio.structure.symmetry.axis.AxisAligner; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; import org.biojava.nbio.structure.symmetry.core.Rotation; import org.biojava.nbio.structure.symmetry.core.RotationGroup; diff --git a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJmol.java b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJmol.java index 2180ca162..eb2812c03 100644 --- a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJmol.java +++ b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIJmol.java @@ -5,12 +5,9 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; -import javax.swing.JFrame; +import javax.swing.*; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; @@ -19,7 +16,7 @@ import org.biojava.nbio.structure.gui.BiojavaJmol; import org.biojava.nbio.structure.io.MMCIFFileReader; import org.biojava.nbio.structure.io.PDBFileReader; -import org.biojava.nbio.structure.io.util.FileDownloadUtils; +import org.biojava.nbio.core.util.FileDownloadUtils; import org.jgrapht.UndirectedGraph; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +30,6 @@ import eppic.assembly.LatticeGraph3D; import eppic.assembly.OrientedCircle; import eppic.assembly.ParametricCircularArc; -import eppic.commons.util.StructureUtils; /** * Jmol viewer for LatticeGraph. @@ -56,7 +52,6 @@ public class LatticeGUIJmol { * Jmol needs to load the structure directly from strucFile. * @param struc Structure to build the graph * @param strucFile Path to the structure of the asymmetric unit. - * @param interfaceIds List of interfaces to show, or null for all * @throws StructureException */ public LatticeGUIJmol(Structure struc, File strucFile) throws StructureException { @@ -189,8 +184,13 @@ public static void main(String[] args) throws IOException, StructureException { logger.error("Unable to read structure or file {}",input); System.exit(1); } - - StructureUtils.expandNcsOps(struc); + + // TODO deal with NCS ops, since biojava 5 the handling of NCS has changed + //StructureTools.expandNcsOps(struc); + //if (struc.getCrystallographicInfo().getNcsOperators()!=null) { + // CrystalBuilder.expandNcsOps(struc, new HashMap<>(), new HashMap<>()); + // //interfFinder = new CrystalBuilder(pdb,chainOrigNames,chainNcsOps); + //} LatticeGUIJmol gui = new LatticeGUIJmol(struc, file); if(interfaceIds != null) { @@ -240,7 +240,7 @@ public BiojavaJmol display() { jmol.evalString(getJmolCommands()); - jmol.getFrame().setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + jmol.getFrame().setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); return jmol; diff --git a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache.java b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache.java index 76e8d6707..f460e674c 100644 --- a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache.java +++ b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache.java @@ -28,7 +28,7 @@ import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.contact.StructureInterface; -import org.biojava.nbio.structure.io.util.FileDownloadUtils; +import org.biojava.nbio.core.util.FileDownloadUtils; import org.jgrapht.UndirectedGraph; import org.jgrapht.graph.Pseudograph; import org.slf4j.Logger; diff --git a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache3D.java b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache3D.java index ea0044e8d..d6aa00dbf 100644 --- a/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache3D.java +++ b/eppic-cli/src/main/java/eppic/assembly/gui/LatticeGUIMustache3D.java @@ -12,7 +12,7 @@ import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.contact.StructureInterface; -import org.biojava.nbio.structure.io.util.FileDownloadUtils; +import org.biojava.nbio.core.util.FileDownloadUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/eppic-cli/src/main/java/eppic/assembly/layout/QuaternaryOrientationLayout.java b/eppic-cli/src/main/java/eppic/assembly/layout/QuaternaryOrientationLayout.java index b2723e5b2..25811dec2 100644 --- a/eppic-cli/src/main/java/eppic/assembly/layout/QuaternaryOrientationLayout.java +++ b/eppic-cli/src/main/java/eppic/assembly/layout/QuaternaryOrientationLayout.java @@ -10,13 +10,16 @@ import javax.vecmath.AxisAngle4d; import javax.vecmath.Point3d; -import org.biojava.nbio.structure.symmetry.core.AxisAligner; -import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; -import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; -import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; -import org.biojava.nbio.structure.symmetry.core.Rotation; -import org.biojava.nbio.structure.symmetry.core.RotationGroup; -import org.biojava.nbio.structure.symmetry.core.Subunits; +import org.biojava.nbio.structure.AminoAcidImpl; +import org.biojava.nbio.structure.Atom; +import org.biojava.nbio.structure.AtomImpl; +import org.biojava.nbio.structure.Group; +import org.biojava.nbio.structure.cluster.Subunit; +import org.biojava.nbio.structure.cluster.SubunitClusterer; +import org.biojava.nbio.structure.cluster.SubunitClustererMethod; +import org.biojava.nbio.structure.cluster.SubunitClustererParameters; +import org.biojava.nbio.structure.symmetry.axis.AxisAligner; +import org.biojava.nbio.structure.symmetry.core.*; import org.jgrapht.UndirectedGraph; import org.jgrapht.alg.ConnectivityInspector; import org.jgrapht.graph.MaskFunctor; @@ -46,9 +49,12 @@ public void projectLatticeGraph(UndirectedGraph graph) { // Orient QuatSymmetryResults gSymmetry = QuaternaryOrientationLayout.getQuatSymm(subgraph,vertexPositioner); RotationGroup pointgroup = gSymmetry.getRotationGroup(); + if (gSymmetry.getMethod()== SymmetryPerceptionMethod.ROTO_TRANSLATION) { + // this happens for cases like 5cti, assembly {1,2,3} since biojava 5 + pointgroup = null;// helical case, we set this to null so that we go to helical case below + } AxisAligner aligner = AxisAligner.getInstance(gSymmetry); Point3d center = aligner.getGeometricCenter(); - AxisAngle4d axis = null; if (pointgroup==null) { // pointgroup is null for 1y4m @@ -81,47 +87,39 @@ public void projectLatticeGraph(UndirectedGraph graph) { public static QuatSymmetryResults getQuatSymm( UndirectedGraph subgraph, VertexPositioner vertexPositioner) { - List caCoords = new ArrayList(); - List folds = new ArrayList(); - List pseudo = new ArrayList(); - List chainIds = new ArrayList(); - List models = new ArrayList(); - List seqIDmin = new ArrayList(); - List seqIDmax = new ArrayList(); - List clusterIDs = new ArrayList(); - int fold = 1; - Character chain = 'A'; - + List subunits = new ArrayList<>(); + for (V vert : subgraph.vertexSet() ){ Point3d centroid = vertexPositioner.getPosition(vert); - caCoords.add(new Point3d[] {centroid}); + Atom atom = new AtomImpl(); + atom.setCoords(new double[] {centroid.x, centroid.y, centroid.z}); + // setting a dummy group so that subunit clusterer doesn't break + Group g = new AminoAcidImpl(); + ((AminoAcidImpl) g).setAminoType('A'); + g.setPDBName("ALA"); + atom.setGroup(g); + Atom[] caCoords = new Atom[] {atom}; - if (subgraph.vertexSet().size() % fold == 0){ - folds.add(fold); //the folds are the common denominators - } - fold++; - pseudo.add(false); - chainIds.add(chain+""); - chain++; - models.add(0); - seqIDmax.add(1.0); - seqIDmin.add(1.0); - clusterIDs.add(0); + Subunit subunit = new Subunit(caCoords, null, null, null); + + subunits.add(subunit); } - //Create directly the subunits, because we know the aligned CA - Subunits globalSubunits = new Subunits(caCoords, clusterIDs, - pseudo, seqIDmin, seqIDmax, - folds, chainIds, models); - + SubunitClustererParameters clusterParams = new SubunitClustererParameters(true); + clusterParams.setSequenceIdentityThreshold(1.0); + clusterParams.setClustererMethod(SubunitClustererMethod.SEQUENCE); + + Stoichiometry globalSubunits = SubunitClusterer.cluster(subunits, clusterParams); + //Quaternary Symmetry Detection QuatSymmetryParameters param = new QuatSymmetryParameters(); QuatSymmetryResults gSymmetry = - QuatSymmetryDetector.calcQuatSymmetry(globalSubunits, param); + QuatSymmetryDetector.calcGlobalSymmetry(globalSubunits, param); return gSymmetry; } + public static UndirectedMaskSubgraph getVertexSubgraph( final UndirectedGraph graph, final Set connected) { diff --git a/eppic-cli/src/main/java/eppic/commons/pisa/PisaInterface.java b/eppic-cli/src/main/java/eppic/commons/pisa/PisaInterface.java index 46fc7501f..eb86d2e39 100644 --- a/eppic-cli/src/main/java/eppic/commons/pisa/PisaInterface.java +++ b/eppic-cli/src/main/java/eppic/commons/pisa/PisaInterface.java @@ -223,7 +223,7 @@ private static void setGroupAsas (PisaMolecule firstMolecule, PisaMolecule secon continue; } // TODO check that insCode=0 is right as default insCode - ResidueNumber resNumber = new ResidueNumber(chain1.getChainID(), num, insCode); + ResidueNumber resNumber = new ResidueNumber(chain1.getName(), num, insCode); Group g = null; try { g = chain1.getGroupByPDB(resNumber); @@ -255,7 +255,7 @@ private static void setGroupAsas (PisaMolecule firstMolecule, PisaMolecule secon continue; } // TODO check that insCode=0 is right as default insCode - ResidueNumber resNumber = new ResidueNumber(chain2.getChainID(), num, insCode); + ResidueNumber resNumber = new ResidueNumber(chain2.getName(), num, insCode); Group g = null; try { g = chain2.getGroupByPDB(resNumber); @@ -281,13 +281,8 @@ private static void setGroupAsas (PisaMolecule firstMolecule, PisaMolecule secon * @return */ private Chain findChainForPisaMolecule(PisaMolecule molecule, Structure pdb) { - if (molecule.isProtein()) { - try { - return pdb.getChainByPDB(molecule.getChainId()); - } catch (StructureException e) { - System.err.println("Could not find chain for PISA chain "+molecule.getChainId()); - return null; - } + if (molecule.isProtein()) { + return pdb.getPolyChainByPDB(molecule.getChainId()); } // TODO what to do with the non-polymeric chains from PISA???? // String pisaNonPolyChainId = molecule.getChainId(); diff --git a/eppic-cli/src/main/java/eppic/commons/sequence/MultipleSequenceAlignment.java b/eppic-cli/src/main/java/eppic/commons/sequence/MultipleSequenceAlignment.java index 965d03eff..779962c20 100644 --- a/eppic-cli/src/main/java/eppic/commons/sequence/MultipleSequenceAlignment.java +++ b/eppic-cli/src/main/java/eppic/commons/sequence/MultipleSequenceAlignment.java @@ -522,7 +522,7 @@ public String getTagFromIndex(int i) { * @param seqTag * @return */ - public int getIndexFromTag(int seqTag) { + public int getIndexFromTag(String seqTag) { return tags2indices.get(seqTag); } diff --git a/eppic-cli/src/main/java/eppic/commons/util/DbConfigGenerator.java b/eppic-cli/src/main/java/eppic/commons/util/DbConfigGenerator.java index a536d78f7..257ef3514 100644 --- a/eppic-cli/src/main/java/eppic/commons/util/DbConfigGenerator.java +++ b/eppic-cli/src/main/java/eppic/commons/util/DbConfigGenerator.java @@ -35,6 +35,9 @@ public static Map createDatabaseProperties(File configurationFil String user = null; String pwd = null; + // default hbm2ddl mode is validate so that in normal production operations there's no risk of altering the schema + String hbm2ddlMode = "validate"; + // port is the only optional property if (properties.getProperty("port")!=null && !properties.getProperty("port").isEmpty()) port = properties.getProperty("port").trim(); @@ -54,6 +57,10 @@ public static Map createDatabaseProperties(File configurationFil } else { throw new IOException("Missing property 'host' in config file "+configurationFile); } + // one more (optional) property to be able to control externally the behaviour of hibernate on startup (create tables, update, validate etc) + if (properties.getProperty("hibernate.hbm2ddl.auto")!=null && !properties.getProperty("hibernate.hbm2ddl.auto").isEmpty()) { + hbm2ddlMode = properties.getProperty("hibernate.hbm2ddl.auto").trim(); + } if (dbName == null) { // in this case the dbname must be present in file @@ -77,6 +84,8 @@ public static Map createDatabaseProperties(File configurationFil map.put("javax.persistence.jdbc.user", user); map.put("javax.persistence.jdbc.password", pwd); + map.put("hibernate.hbm2ddl.auto", hbm2ddlMode); + map.put("hibernate.c3p0.min_size", "5"); map.put("hibernate.c3p0.max_size", "20"); map.put("hibernate.c3p0.timeout", "1800"); diff --git a/eppic-cli/src/main/java/eppic/commons/util/StructureUtils.java b/eppic-cli/src/main/java/eppic/commons/util/StructureUtils.java deleted file mode 100644 index 2b7b409b8..000000000 --- a/eppic-cli/src/main/java/eppic/commons/util/StructureUtils.java +++ /dev/null @@ -1,72 +0,0 @@ -package eppic.commons.util; - -import java.util.ArrayList; -import java.util.List; - -import javax.vecmath.Matrix4d; - -import org.biojava.nbio.structure.Calc; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.PDBCrystallographicInfo; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureTools; - -/** - * Some needed functions to expand ncs operators taken from biojava 5.0.0-SNAPSHOT - * When eppic switches from biojava 4.2.1 to 5.0.0 we should remove this and use the biojava functions in - * {@link StructureTools}. - * - * @author Jose Duarte - */ -public class StructureUtils { - /** - * Expands the NCS operators in the given Structure adding new chains as needed. - * The new chains are assigned ids of the form: original_chain_id+ncs_operator_index+"n" - * @param structure - */ - public static void expandNcsOps(Structure structure) { - PDBCrystallographicInfo xtalInfo = structure.getCrystallographicInfo(); - if (xtalInfo ==null) return; - - if (xtalInfo.getNcsOperators()==null || xtalInfo.getNcsOperators().length==0) return; - - List chainsToAdd = new ArrayList<>(); - int i = 0; - for (Matrix4d m:xtalInfo.getNcsOperators()) { - i++; - - for (Chain c:structure.getChains()) { - Chain clonedChain = (Chain)c.clone(); - String newChainId = c.getChainID()+i+"n"; - clonedChain.setChainID(newChainId); - clonedChain.setInternalChainID(newChainId); - setChainIdsInResidueNumbers(clonedChain, newChainId); - Calc.transform(clonedChain, m); - chainsToAdd.add(clonedChain); - c.getCompound().addChain(clonedChain); - } - } - - for (Chain c:chainsToAdd) { - structure.addChain(c); - } - } - - /** - * Auxiliary method to reset chain ids of residue numbers in a chain. - * Used when cloning chains and resetting their ids: one needs to take care of - * resetting the ids within residue numbers too. - * @param c - * @param newChainId - */ - private static void setChainIdsInResidueNumbers(Chain c, String newChainId) { - for (Group g:c.getAtomGroups()) { - g.setResidueNumber(newChainId, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode()); - } - for (Group g:c.getSeqResGroups()) { - if (g.getResidueNumber()==null) continue; - g.setResidueNumber(newChainId, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode()); - } - } -} diff --git a/eppic-cli/src/main/java/eppic/tools/EnumerateInterfaces.java b/eppic-cli/src/main/java/eppic/tools/EnumerateInterfaces.java index 6891cd4d5..c1e279ea4 100644 --- a/eppic-cli/src/main/java/eppic/tools/EnumerateInterfaces.java +++ b/eppic-cli/src/main/java/eppic/tools/EnumerateInterfaces.java @@ -21,13 +21,7 @@ import javax.vecmath.AxisAngle4d; import javax.vecmath.Vector3d; -import org.biojava.nbio.structure.Chain; -import org.biojava.nbio.structure.Compound; -import org.biojava.nbio.structure.Group; -import org.biojava.nbio.structure.PDBCrystallographicInfo; -import org.biojava.nbio.structure.Structure; -import org.biojava.nbio.structure.StructureException; -import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.*; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.contact.Pair; import org.biojava.nbio.structure.contact.StructureInterface; @@ -209,17 +203,22 @@ public static void main(String[] args) throws Exception { outBaseName = inputFile.getName().substring(0, inputFile.getName().lastIndexOf(".")); } + int countPolyEntities = 0; + for (EntityInfo chainCluster:pdb.getEntityInfos()) { + if (chainCluster.getType() == EntityType.POLYMER) countPolyEntities++; + } + System.out.println(pdb.getPDBCode()+" - "+pdb.getPolyChains().size()+" poly chains ("+countPolyEntities+" sequence unique) "); - System.out.println(pdb.getPDBCode()+" - "+pdb.getChains()+" chains ("+pdb.getCompounds().size()+" sequence unique) "); - - for (Compound chainCluster:pdb.getCompounds()) { - // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 - if (chainCluster.getChains().isEmpty()) continue; - System.out.println(DataModelAdaptor.getChainClusterString(chainCluster)); + for (EntityInfo chainCluster:pdb.getEntityInfos()) { + if (chainCluster.getType() == EntityType.POLYMER) { + // in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26 + if (chainCluster.getChains().isEmpty()) continue; + System.out.println(DataModelAdaptor.getChainClusterString(chainCluster)); + } } System.out.println("Chains: "); - for (Chain chain:pdb.getChains()) { - System.out.println(chain.getInternalChainID()+"("+chain.getChainID()+")"); + for (Chain chain:pdb.getPolyChains()) { + System.out.println(chain.getId()+"("+chain.getName()+")"); } PDBCrystallographicInfo xtalInfo = pdb.getCrystallographicInfo(); @@ -359,8 +358,8 @@ public static void main(String[] args) throws Exception { if (writeDir!=null) { Set chainIds = new TreeSet(); - for (Chain chain:pdb.getChains()) { - chainIds.add(chain.getChainID()); + for (Chain chain:pdb.getPolyChains()) { + chainIds.add(chain.getName()); } pr.generateInterfacesPse(inputFile, chainIds, new File(writeDir,outBaseName+".allinterfaces.pml"), diff --git a/eppic-cli/src/test/java/eppic/TestInterfaceMatching.java b/eppic-cli/src/test/java/eppic/TestInterfaceMatching.java index 3cfc51661..6a3f5d42e 100644 --- a/eppic-cli/src/test/java/eppic/TestInterfaceMatching.java +++ b/eppic-cli/src/test/java/eppic/TestInterfaceMatching.java @@ -40,8 +40,8 @@ public void test4hwd() throws EppicException { Structure s = m.getStructure(); - for (Chain c : s.getChains()) { - System.out.println("Chain id "+c.getChainID() + ", chain asym id "+c.getInternalChainID()); + for (Chain c : s.getPolyChains()) { + System.out.println("Chain id "+c.getName() + ", chain asym id "+c.getId()); } diff --git a/eppic-cli/src/test/java/eppic/TestLargeStructures.java b/eppic-cli/src/test/java/eppic/TestLargeStructures.java index 542ee1528..f8cf9eb52 100644 --- a/eppic-cli/src/test/java/eppic/TestLargeStructures.java +++ b/eppic-cli/src/test/java/eppic/TestLargeStructures.java @@ -1,16 +1,25 @@ package eppic; +//import org.junit.Ignore; +import ch.systemsx.sybit.crkwebui.shared.model.AssemblyScore; +import eppic.assembly.TestLatticeGraph; +import eppic.model.*; +import org.biojava.nbio.structure.Structure; +import org.biojava.nbio.structure.contact.StructureInterface; +import org.biojava.nbio.structure.contact.StructureInterfaceCluster; +import org.biojava.nbio.structure.contact.StructureInterfaceList; +import org.biojava.nbio.structure.xtal.CrystalBuilder; import org.junit.Test; -import eppic.model.ChainClusterDB; -import eppic.model.InterfaceClusterDB; -import eppic.model.InterfaceDB; -import eppic.model.PdbInfoDB; +import javax.vecmath.Matrix4d; import static org.junit.Assert.*; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * An integration test that makes sure that large structures are correctly handled @@ -30,6 +39,8 @@ public class TestLargeStructures { * All author chain ids are 2 chars long. * @throws IOException */ + // can be long and memory hungry, ignore if needed + //@Ignore @Test public void test4v9e() throws IOException { @@ -48,6 +59,10 @@ public void test4v9e() throws IOException { m.run(params); PdbInfoDB pdbInfo = m.getDataModelAdaptor().getPdbInfo(); + + // the title should be set, this checks that DataModelAdaptor.setPdbMetadata worked + assertNotNull(pdbInfo.getTitle()); + assertTrue(pdbInfo.getTitle().length()>2); ChainClusterDB cc = pdbInfo.getChainCluster("AA"); assertEquals(36, cc.getNumMembers()); @@ -70,9 +85,176 @@ public void test4v9e() throws IOException { assertEquals(2, idb.getResidueBurials().get(0).getResidueInfo().getRepChain().length()); } } - + + // delete all files and then the dir + File[] files = outDir.listFiles(); + for (File f : files) f.delete(); outDir.delete(); } + /** + * NCS output needs to be less redundant. + * Issue https://github.com/eppic-team/eppic/issues/205 + * @throws IOException + */ + //@Ignore // test is very heavy (it writes all coordinate files which takes half of the time or more), ignore if needed + @Test + public void test1auy() throws IOException { + + File outDir = new File(TMPDIR, "eppicTestLargeStructures"); + + outDir.mkdir(); + + assertTrue(outDir.isDirectory()); + + + String pdbId = "1auy"; + EppicParams params = Utils.generateEppicParams(pdbId, outDir); + + params.setGenerateOutputCoordFiles(true); + + Main m = new Main(); + + m.run(params); + + PdbInfoDB pdbInfo = m.getDataModelAdaptor().getPdbInfo(); + + assertTrue(pdbInfo.isNcsOpsPresent()); + + assertEquals(1, pdbInfo.getNumChainClusters()); + ChainClusterDB ccdb = pdbInfo.getChainClusters().get(0); + assertEquals(3, ccdb.getNumMembers()); + + assertEquals(10, pdbInfo.getInterfaceClusters().size()); + + assertEquals(4, pdbInfo.getAssemblies().size()); + + // the cluster members should be reduced to NCS equivalents: it should be a low number + int count = 0; + for (InterfaceClusterDB interfCluster : pdbInfo.getInterfaceClusters()) { + assertTrue(interfCluster.size()<10); + assertTrue(interfCluster.getAvgContactOverlapScore() > 0); + for (InterfaceDB idb : interfCluster.getInterfaces()) { + // can't assert this, the n chains are still in some interfaces + //assertFalse(idb.getChain1().endsWith("n")); + assertEquals(interfCluster.getClusterId(), idb.getClusterId()); + count++; + } + } + + assertTrue(count<20); + + File[] files = outDir.listFiles((d, name) -> (name.endsWith(".cif.gz") && name.contains(".interface.") )); + + assertNotNull(files); + + assertEquals(count, files.length); + + // test for issue #141 + AssemblyDB icoAssembly = pdbInfo.getAssemblies().get(3); + // check that this really is the icosahedral assembly + assertEquals("I", icoAssembly.getAssemblyContents().get(0).getSymmetry()); + assertEquals(180, icoAssembly.getAssemblyContents().get(0).getMmSize()); + boolean pdb1Annotation = false; + for (AssemblyScoreDB as : icoAssembly.getAssemblyScores()) { + if (as.getMethod().equals("pdb1") && as.getCallName().equals("bio")) { + pdb1Annotation = true; + } + } + + assertTrue(pdb1Annotation); + + // delete all files and then the dir + files = outDir.listFiles(); + for (File f : files) f.delete(); + + outDir.delete(); + + } + + /** + * As an extra test for NCS: some sanity checks that the grouping by NCS and clustering by contact + * overlap score are consistent with each other. + * @throws Exception + */ + @Test + public void testInterfaceNcsGrouping() throws Exception { + Structure s = TestLatticeGraph.getStructure("1auy"); + + Map chainOrigNames = new HashMap<>(); + Map chainNcsOps = new HashMap<>(); + CrystalBuilder.expandNcsOps(s,chainOrigNames,chainNcsOps); + CrystalBuilder cb = new CrystalBuilder(s,chainOrigNames,chainNcsOps); + + StructureInterfaceList interfaces = cb.getUniqueInterfaces(); + int spherePoints = StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS / 10; + interfaces.calcAsas(spherePoints, + Runtime.getRuntime().availableProcessors(), + StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE); + interfaces.removeInterfacesBelowArea(); + + List full = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF); + List ncs = interfaces.getClustersNcs(); + + int idx = 0; + for (StructureInterfaceCluster c : ncs) { + int refId = 0; + int jdx = 0; + for (StructureInterface i : c.getMembers()) { + // it seems that ncs list does not filter for area (bug in biojava 5.0.0), this is a workaround + if (i.getTotalArea() clusters) { + for (StructureInterfaceCluster c : clusters) { + for (StructureInterface i : c.getMembers()) { + if (interf.getId() == i.getId()) return c; + } + } + return null; + } + +// private StructureInterface findCorrespondingInterf(StructureInterface interf, List clusters) { +// for (StructureInterfaceCluster c : clusters) { +// for (StructureInterface i : c.getMembers()) { +// if (interf.getId() == i.getId()) return i; +// } +// } +// return null; +// } } diff --git a/eppic-cli/src/test/java/eppic/assembly/TestAssemblyDiagrams.java b/eppic-cli/src/test/java/eppic/assembly/TestAssemblyDiagrams.java new file mode 100644 index 000000000..ee3672a7f --- /dev/null +++ b/eppic-cli/src/test/java/eppic/assembly/TestAssemblyDiagrams.java @@ -0,0 +1,55 @@ +package eppic.assembly; + +import eppic.EppicParams; +import eppic.Main; +import eppic.Utils; +import eppic.model.PdbInfoDB; +import org.junit.Test; + +import java.io.File; + +import static org.junit.Assert.*; + +public class TestAssemblyDiagrams { + + private static final String TMPDIR = System.getProperty("java.io.tmpdir"); + + + /** + * Testing that assembly diagram json files are properly generated + */ + @Test + public void testAssemblyDiagramGeneration() { + File outDir = new File(TMPDIR, "eppicTestAssemblyDiagrams"); + + outDir.mkdir(); + + assertTrue(outDir.isDirectory()); + + + String pdbId = "5cti"; + EppicParams params = Utils.generateEppicParams(pdbId, outDir); + params.setGenerateDiagrams(true); + + Main m = new Main(); + + m.run(params); + + PdbInfoDB pdbInfo = m.getDataModelAdaptor().getPdbInfo(); + + File[] files = outDir.listFiles((d, name) -> (name.endsWith(".json") && name.contains(".diagram.") )); + assertNotNull(files); + assertEquals(pdbInfo.getAssemblies().size(), files.length); + + files = outDir.listFiles((d, name) -> (name.endsWith(".json") && name.contains(".latticeGraph.") )); + assertNotNull(files); + // there's always 1 additional file for the whole unit cell (named with a "*") + assertEquals(pdbInfo.getAssemblies().size() + 1, files.length); + + + // delete all files and then the dir + files = outDir.listFiles(); + for (File f : files) f.delete(); + outDir.delete(); + } +} diff --git a/eppic-cli/src/test/java/eppic/assembly/TestLatticeGraph.java b/eppic-cli/src/test/java/eppic/assembly/TestLatticeGraph.java index e24a2fcf1..6688e3c95 100644 --- a/eppic-cli/src/test/java/eppic/assembly/TestLatticeGraph.java +++ b/eppic-cli/src/test/java/eppic/assembly/TestLatticeGraph.java @@ -9,8 +9,11 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import javax.vecmath.Matrix4d; import javax.vecmath.Point3i; import org.biojava.nbio.structure.Structure; @@ -25,7 +28,6 @@ import org.slf4j.LoggerFactory; import eppic.EppicParams; -import eppic.commons.util.StructureUtils; public class TestLatticeGraph { @@ -477,7 +479,7 @@ public static CrystalAssemblies getCrystalAssemblies(String pdbId) throws IOExce public static CrystalAssemblies getCrystalAssemblies(String pdbId, boolean forceContracted) throws IOException, StructureException { Structure s = getStructure(pdbId); - + StructureInterfaceList interfaces = getAllInterfaces(s); CrystalAssemblies crystalAssemblies = new CrystalAssemblies(s, interfaces, forceContracted); @@ -493,8 +495,6 @@ public static Structure getStructure(String pdbId) throws IOException, Structure StructureIO.setAtomCache(cache); Structure s = StructureIO.getStructure(pdbId); - // we need to expand the ncs ops to be able to test properly entries with ncs ops - StructureUtils.expandNcsOps(s); return s; } @@ -505,8 +505,19 @@ public static StructureInterfaceList getAllInterfaces(Structure s) { public static StructureInterfaceList getAllInterfaces(Structure s, boolean fast) { logger.info("Calculating interfaces for "+s.getIdentifier().toString()); - - CrystalBuilder cb = new CrystalBuilder(s); + + CrystalBuilder cb; + + // we need to expand the ncs ops to be able to test properly entries with ncs ops + if (s.getCrystallographicInfo().getNcsOperators()!=null) { + Map chainOrigNames = new HashMap<>(); + Map chainNcsOps = new HashMap<>(); + CrystalBuilder.expandNcsOps(s,chainOrigNames,chainNcsOps); + cb = new CrystalBuilder(s,chainOrigNames,chainNcsOps); + } else { + cb = new CrystalBuilder(s); + } + StructureInterfaceList interfaces = cb.getUniqueInterfaces(); int spherePoints = StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS; if (fast) spherePoints = spherePoints / 10; diff --git a/eppic-cli/src/test/java/eppic/assembly/TestSymmetryDetection.java b/eppic-cli/src/test/java/eppic/assembly/TestSymmetryDetection.java index 6550accf8..2eafafd13 100644 --- a/eppic-cli/src/test/java/eppic/assembly/TestSymmetryDetection.java +++ b/eppic-cli/src/test/java/eppic/assembly/TestSymmetryDetection.java @@ -5,7 +5,7 @@ import java.io.IOException; import org.biojava.nbio.structure.StructureException; -import org.junit.Ignore; +//import org.junit.Ignore; import org.junit.Test; /** @@ -98,7 +98,7 @@ public void test3r93D4HeteromericAssembly() throws IOException, StructureExcepti } } - @Ignore // this test takes extra long to run, 19s currently on my laptop + //@Ignore // this test takes extra long to run, 19s currently on my laptop @Test public void test1auyIcosahedralAssembly() throws IOException, StructureException { @@ -107,6 +107,9 @@ public void test1auyIcosahedralAssembly() throws IOException, StructureException CrystalAssemblies assemblies = TestLatticeGraph.getCrystalAssemblies("1auy"); + // 4 assemblies including the icosahedral + assertEquals(4, assemblies.size()); + for (Assembly a:assemblies) { if (a.toString().equals("{2,3}")) { diff --git a/eppic-model/src/main/resources/META-INF/persistence.xml b/eppic-model/src/main/resources/META-INF/persistence.xml index 59ba9d81d..5fb00a233 100644 --- a/eppic-model/src/main/resources/META-INF/persistence.xml +++ b/eppic-model/src/main/resources/META-INF/persistence.xml @@ -19,6 +19,7 @@ +