Skip to content

Commit 20b4891

Browse files
authored
Fix firstOnly selection behavior
`firstOnly` used to select one match per combination (object+chain+segi), so if one object had multiple chains, each chain would match once. This makes it so that `firstOnly` will only match one time per object, on the first segi+chain available (in alphabetical order).
1 parent 889738c commit 20b4891

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

findseq.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -333,46 +333,53 @@ def findseq(needle, haystack='*', selName=None, het=0, firstOnly=0):
333333
return None
334334

335335
# remove hetero atoms (waters/ligands/etc) from consideration?
336+
hstk = cmd.get_unused_name()
336337
if bool(int(het)):
337-
cmd.select("__h", "br. " + haystack)
338+
cmd.select(hstk, f"byres {haystack}")
338339
else:
339-
cmd.select("__h", "br. " + haystack + " and not het")
340+
cmd.select(hstk, f"byres {haystack} and not het")
340341

341342
# get the AAs in the haystack
342343
IDs = defaultdict(list)
343344
AAs = defaultdict(list)
344345
for obj in cmd.get_object_list():
345-
for atom in cmd.get_model(f"%{obj} and (name ca) and __h").atom:
346+
for atom in cmd.get_model(f"%{obj} and (name ca) and {hstk}").atom:
346347
IDs[(obj, atom.segi, atom.chain)].append(atom.resi)
347348
AAs[(obj, atom.segi, atom.chain)].append(ONE_LETTER[atom.resn])
348349

349350
reNeedle = re.compile(needle.upper())
350-
# make an empty selection to which we add residues
351-
cmd.select(rSelName, 'None')
352351

353-
for key in AAs:
352+
matches = defaultdict(list)
353+
for key in sorted(AAs):
354354
obj, segi, chain = key
355355

356+
if int(firstOnly) and len(matches[obj]) >= 1:
357+
# ignore other chains
358+
continue
359+
356360
chain_sequence = "".join(AAs[key])
357361
it = reNeedle.finditer(chain_sequence)
358362
for i in it:
359363
start, stop = i.span()
360364
resi = "+".join(IDs[key][start:stop])
361365

362-
sel = f'__h and %{obj} and resi {resi}'
366+
sel = f'{hstk} and %{obj} and resi {resi}'
363367

364368
if chain:
365369
sel += f' and chain {chain}'
366370
if segi:
367371
sel += f' and segi {segi}'
368372

369-
sel = f'{rSelName} or ({sel})'
370-
cmd.select(rSelName, sel)
373+
matches[obj].append(sel)
374+
375+
# remove multiple matches in the same chain
376+
if int(firstOnly):
377+
for obj, v in matches.items():
378+
matches[obj] = v[:1]
371379

372-
if int(firstOnly):
373-
break
380+
cmd.select(rSelName, " or ".join(f"({s})" for v in matches.values() for s in v))
381+
cmd.delete(hstk)
374382

375-
cmd.delete("__h")
376383
cnt = cmd.count_atoms(rSelName)
377384
if not cnt:
378385
print("Sequence was not found")

0 commit comments

Comments
 (0)