diff --git a/README.md b/README.md index 3febd88..00ac090 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ Our LRAR baseline models are also available: * ` lr_ar_640M ` -An example of generating one sequence randomly sampled from the train distribution length can be found in +An example of unconditionally generating a sequence of a specified length can be found in [this notebook](https://github.com/microsoft/evodiff/tree/main/examples/evodiff.ipynb). To evaluate the generated sequences, we implement our self-consistency Omegafold ESM-IF pipeline, as shown in diff --git a/evodiff/conditional_generation.py b/evodiff/conditional_generation.py index a7bcba2..7294849 100644 --- a/evodiff/conditional_generation.py +++ b/evodiff/conditional_generation.py @@ -394,7 +394,7 @@ def generate_scaffold(model, PDB_ID, motif_start_idxs, motif_end_idxs, scaffold_ p = torch.nn.functional.softmax(p, dim=1) # softmax over categorical probs p_sample = torch.multinomial(p, num_samples=1) sample[:, i] = p_sample.squeeze() - print("new sequence", [tokenizer.untokenize(s) for s in sample]) + print("Generated sequence:", [tokenizer.untokenize(s) for s in sample]) untokenized = [tokenizer.untokenize(s) for s in sample] return untokenized, new_start_idxs, new_end_idxs @@ -484,6 +484,29 @@ def scramble_input(sequences, start_idxs, end_idxs): sequences = [[s] for s in sequences] return scrambled_seqs, sequences, scrambled_idrs, original_idrs, start_idxs, end_idxs +def inpaint_simple(model, sequence, start_idx, end_idx, tokenizer=Tokenizer(), device='cuda'): + "used in examples for simplicity" + all_aas = tokenizer.all_aas + idr_length = end_idx - start_idx + masked_sequence = sequence[0:start_idx] + '#' * idr_length + sequence[end_idx:] + tokenized_sequence = torch.tensor(tokenizer.tokenizeMSA(masked_sequence)) + + loc = np.arange(start_idx, end_idx) + sample = tokenized_sequence.to(torch.long) + sample = sample.to(device) + np.random.shuffle(loc) + with torch.no_grad(): + for i in tqdm(loc): + timestep = torch.tensor([0]) # placeholder but not called in model + timestep = timestep.to(device) + prediction = model(sample.unsqueeze(0), timestep) + p = prediction[:, i, :len(all_aas)-6] + p = torch.nn.functional.softmax(p, dim=1) + p_sample = torch.multinomial(p, num_samples=1) + sample[i] = p_sample.squeeze() + untokenized_seq = tokenizer.untokenize(sample) + untokenized_idr = tokenizer.untokenize(sample[start_idx:end_idx]) + return sample, untokenized_seq, untokenized_idr def inpaint(model, tokenized_sequences, start_idxs, end_idxs, sequences, tokenizer=Tokenizer(), device='cuda', random_baseline=False, data_top_dir='/'): if random_baseline: diff --git a/evodiff/data.py b/evodiff/data.py index ca9180f..9dbe6e2 100644 --- a/evodiff/data.py +++ b/evodiff/data.py @@ -14,6 +14,78 @@ import os from torch.utils.data import Subset +def subsample_msa(path_to_msa, n_sequences=64, max_seq_len=512, selection_type='random'): + alphabet = PROTEIN_ALPHABET + tokenizer = Tokenizer(alphabet) + alpha = np.array(list(alphabet)) + gap_idx = tokenizer.alphabet.index(GAP) + + if not os.path.exists(path_to_msa): + print("PATH TO MSA DOES NOT EXIST") + path = path_to_msa + parsed_msa = parse_fasta(path) + + aligned_msa = [[char for char in seq if (char.isupper() or char == '-') and not char == '.'] for seq in + parsed_msa] + aligned_msa = [''.join(seq) for seq in aligned_msa] + + tokenized_msa = [tokenizer.tokenizeMSA(seq) for seq in aligned_msa] + tokenized_msa = np.array([l.tolist() for l in tokenized_msa]) + msa_seq_len = len(tokenized_msa[0]) + + if msa_seq_len > max_seq_len: + slice_start = np.random.choice(msa_seq_len - max_seq_len + 1) + seq_len = max_seq_len + else: + slice_start = 0 + seq_len = msa_seq_len + + # Slice to 512 + sliced_msa_seq = tokenized_msa[:, slice_start: slice_start + max_seq_len] + anchor_seq = sliced_msa_seq[0] # This is the query sequence in MSA + + # slice out all-gap rows + sliced_msa = [seq for seq in sliced_msa_seq if (list(set(seq)) != [gap_idx])] + msa_num_seqs = len(sliced_msa) + + if msa_num_seqs < n_sequences: + output = np.full(shape=(n_sequences, seq_len), fill_value=tokenizer.pad_id) + output[:msa_num_seqs] = sliced_msa + raise Exception("msa num_seqs < self.n_sequences, indicates dataset not filtered properly") + elif msa_num_seqs > n_sequences: + if selection_type == 'random': + random_idx = np.random.choice(msa_num_seqs - 1, size=n_sequences - 1, replace=False) + 1 + anchor_seq = np.expand_dims(anchor_seq, axis=0) + output = np.concatenate((anchor_seq, np.array(sliced_msa)[random_idx.astype(int)]), axis=0) + elif selection_type == "MaxHamming": + output = [list(anchor_seq)] + msa_subset = sliced_msa[1:] + msa_ind = np.arange(msa_num_seqs)[1:] + random_ind = np.random.choice(msa_ind) + random_seq = sliced_msa[random_ind] + output.append(list(random_seq)) + random_seq = np.expand_dims(random_seq, axis=0) + msa_subset = np.delete(msa_subset, (random_ind - 1), axis=0) + m = len(msa_ind) - 1 + distance_matrix = np.ones((n_sequences - 2, m)) + + for i in range(n_sequences - 2): + curr_dist = cdist(random_seq, msa_subset, metric='hamming') + curr_dist = np.expand_dims(np.array(curr_dist), axis=0) # shape is now (1,msa_num_seqs) + distance_matrix[i] = curr_dist + col_min = np.min(distance_matrix, axis=0) # (1,num_choices) + max_ind = np.argmax(col_min) + random_ind = max_ind + random_seq = msa_subset[random_ind] + output.append(list(random_seq)) + random_seq = np.expand_dims(random_seq, axis=0) + msa_subset = np.delete(msa_subset, random_ind, axis=0) + distance_matrix = np.delete(distance_matrix, random_ind, axis=1) + else: + output = sliced_msa + + output = [''.join(seq) for seq in alpha[output]] + return output, output[0] def read_openfold_files(data_dir, filename): """ diff --git a/evodiff/generate.py b/evodiff/generate.py index 306061f..566b62a 100644 --- a/evodiff/generate.py +++ b/evodiff/generate.py @@ -191,7 +191,7 @@ def generate_oaardm(model, tokenizer, seq_len, penalty=None, batch_size=3, devic loc = np.arange(seq_len) np.random.shuffle(loc) with torch.no_grad(): - for i in loc: + for i in tqdm(loc): timestep = torch.tensor([0] * batch_size) # placeholder but not called in model timestep = timestep.to(device) prediction = model(sample, timestep) #, input_mask=input_mask.unsqueeze(-1)) #sample prediction given input @@ -217,7 +217,7 @@ def generate_oaardm(model, tokenizer, seq_len, penalty=None, batch_size=3, devic def generate_autoreg(model, tokenizer, samples=100, batch_size=1, max_seq_len=1024): # Generates 1 seq at a time, no batching, to make it easier to deal w variable seq lengths # Generates until max length or until stop token is predicted - model.eval().cuda() + #model.eval().cuda() device = model.device() start = tokenizer.start_id @@ -260,7 +260,7 @@ def generate_d3pm(model, tokenizer, Q, Q_bar, timesteps, seq_len, batch_size=3, """ Generate a random start string from uniform dist and convert to predictions """ - model.eval().cuda() + #model.eval() #device = model.device() sample = torch.randint(0, tokenizer.K, (batch_size, seq_len)) diff --git a/evodiff/generate-msa.py b/evodiff/generate_msa.py similarity index 88% rename from evodiff/generate-msa.py rename to evodiff/generate_msa.py index 33871ba..719e75e 100644 --- a/evodiff/generate-msa.py +++ b/evodiff/generate_msa.py @@ -1,32 +1,18 @@ import argparse -import json import evodiff import os import numpy as np -import torch -import pandas as pd -from sequence_models.esm import MSATransformer -from sequence_models.constants import MSA_ALPHABET, MSA_PAD, MASK -from evodiff.utils import Tokenizer -from sequence_models.utils import parse_fasta -from evodiff.model import MSATransformerTime -from evodiff.data import read_idr_files from tqdm import tqdm import pathlib import glob -import string - from evodiff.data import A3MMSADataset, IDRDataset from torch.utils.data import Subset -from sequence_models.samplers import SortishSampler, ApproxBatchSampler from torch.utils.data import DataLoader import torch from sequence_models.collaters import MSAAbsorbingCollater from evodiff.collaters import D3PMCollaterMSA from sequence_models.constants import MSA_ALPHABET from evodiff.utils import Tokenizer -from scipy.spatial.distance import hamming, cdist - home = str(pathlib.Path.home()) def main(): @@ -235,6 +221,52 @@ def generate_msa(model, tokenizer, batch_size, n_sequences, seq_length, penalty_ untokenized = [[tokenizer.untokenize(msa.flatten())] for msa in sample] return sample, untokenized # return output and untokenized output +def generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, batch_size=1, penalty_value=2, device='gpu', + start_msa=True, selection_type='MaxHamming'): + mask_id = tokenizer.mask_id + src = torch.full((batch_size, n_sequences, seq_length), fill_value=mask_id) + + valid_msas = [] + query_sequences = [] + for i in range(batch_size): + #print(path_to_msa) + valid_msa, query_sequence = evodiff.data.subsample_msa(path_to_msa, n_sequences=n_sequences, + max_seq_len=seq_length, selection_type=selection_type) + valid_msa = torch.tensor(np.array([tokenizer.tokenizeMSA(seq) for seq in valid_msa])) + valid_msas.append(valid_msa) + query_sequences.append(query_sequence) + + for i in range(batch_size): + seq_len = len(query_sequences[i]) + src[i, 1:n_sequences, :seq_len] = valid_msas[i][1:n_sequences, :seq_len].squeeze() + padding = torch.full((n_sequences, seq_length-seq_len), fill_value=tokenizer.pad_id) + src[i, :, seq_len:] = padding + x_indices = np.arange(0,1) + y_indices = np.arange(seq_len) + src = src.to(device) + sample = src.clone() + if start_msa: + all_ind = np.transpose([np.tile(x_indices, len(y_indices)), np.repeat(y_indices, len(x_indices))]) + np.random.shuffle(all_ind) + + # ONLY USING ON BATCH_SIZE=1 for now + with torch.no_grad(): + for i in tqdm(all_ind): + random_x, random_y = i + preds = model(sample) # Output shape of preds is (BS=1, N=64, L, n_tokens=31) + p = preds[:, random_x, random_y, :] + if random_x == 0 : # for first row don't let p_softmax predict gaps + p = preds[:, random_x, random_y, :tokenizer.K-1] + p_softmax = torch.nn.functional.softmax(p, dim=1) + # Penalize gaps + penalty = torch.ones(p.shape).to(p.device) + penalty[:, -1] += penalty_value + p_softmax /= penalty + p_sample = torch.multinomial(input=p_softmax, num_samples=1) + p_sample = p_sample.squeeze() + sample[:, random_x, random_y] = p_sample + untokenized = [[tokenizer.untokenize(msa[0])] for msa in sample] # return query sequence only + return sample, untokenized # return query sequences only def generate_msa_d3pm(model, batch_size, n_sequences, seq_length, Q_bar=None, Q=None, tokenizer=Tokenizer(), start_query=False, data_top_dir='../data', selection_type='MaxHamming', out_path='../ref/', diff --git a/examples/evodiff.ipynb b/examples/evodiff.ipynb index 4926698..27e2e25 100644 --- a/examples/evodiff.ipynb +++ b/examples/evodiff.ipynb @@ -1,262 +1,124 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "# EvoDiff\n", + "# EvoDiff Examples\n", + "\n", + "In this notebook we will overview how to approach the following topics: \n", + "\n", + "* Installation \n", + "* Unconditional generation \n", + "* Evolutionary guided generation \n", + "* Inpainting of intrinsically disordered regions \n", + "* Scaffolding functional motifs \n", "\n", - "In this work, we exploit sequence as the universal design space for proteins to develop a general-purpose deep learning framework for controllable protein generation. We introduce a diffusion modeling framework, EvoDiff, that combines evolutionary-scale data with the distinct conditioning capabilities of diffusion models to achieve controllable protein design in sequence space alone. EvoDiff generates high-fidelity, diverse, and structurally-plausible proteins that fully cover natural sequence and functional space. By operating in the universal protein design space, EvoDiff can generate disordered regions and scaffold functional structural motifs without any explicit structural information. We envision that EvoDiff will expand capabilities in protein engineering beyond the structure-function paradigm towards programmable, sequence-first design.\n", + "## Installation\n", "\n", - "To download our code, we recommend creating a clean conda environment with python ```v3.8.5```. You can do so by running ```conda create --name evodiff python=3.8.5```. In that new environment, to download our code, run:" + "To download and run our code, first open this notebook in a clean conda environment. We recommend creating it with python ```v3.8.5```. You can do so by running ```conda create --name evodiff python=3.8.5```. In that new environment, to download our code, run:" ] }, { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting evodiff\n", - " Obtaining dependency information for evodiff from https://files.pythonhosted.org/packages/2b/12/85fb5e97b1f759ea30512368209476d6eb815654ac6c6470c016d99fb370/evodiff-0.9.9-py3-none-any.whl.metadata\n", - " Downloading evodiff-0.9.9-py3-none-any.whl.metadata (21 kB)\n", - "Collecting pandas (from evodiff)\n", - " Obtaining dependency information for pandas from https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl.metadata (18 kB)\n", - "Collecting lmdb (from evodiff)\n", - " Using cached lmdb-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl (101 kB)\n", - "Collecting numpy (from evodiff)\n", - " Obtaining dependency information for numpy from https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl.metadata (5.6 kB)\n", - "Collecting sequence-models (from evodiff)\n", - " Obtaining dependency information for sequence-models from https://files.pythonhosted.org/packages/35/dd/1ddb71510ae24d58bdd88a582de8b831d1faf9bcefc41d17b9835ae3562d/sequence_models-1.7.0-py3-none-any.whl.metadata\n", - " Using cached sequence_models-1.7.0-py3-none-any.whl.metadata (15 kB)\n", - "Collecting mlflow (from evodiff)\n", - " Obtaining dependency information for mlflow from https://files.pythonhosted.org/packages/5d/c8/2ddb5e1d0f75a088b8580868ca439221d3a1d9649a52eb7f940ecc5b5c9f/mlflow-2.6.0-py3-none-any.whl.metadata\n", - " Using cached mlflow-2.6.0-py3-none-any.whl.metadata (12 kB)\n", - "Collecting scikit-learn (from evodiff)\n", - " Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/33/cb/0e41fad5b30fd66925e47952ddc720d078bdfd8397584a4873ec1cf590ca/scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata (11 kB)\n", - "Collecting blosum (from evodiff)\n", - " Using cached blosum-2.0.2-py3-none-any.whl (21 kB)\n", - "Collecting seaborn (from evodiff)\n", - " Using cached seaborn-0.12.2-py3-none-any.whl (293 kB)\n", - "Collecting matplotlib (from evodiff)\n", - " Obtaining dependency information for matplotlib from https://files.pythonhosted.org/packages/b2/b6/c6596fbc30899e6da31450053054d7da61a21a3f510544fb7cb7658a3de3/matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl.metadata\n", - " Using cached matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl.metadata (5.6 kB)\n", - "Collecting fair-esm (from evodiff)\n", - " Using cached fair_esm-2.0.0-py3-none-any.whl (93 kB)\n", - "Collecting tqdm (from evodiff)\n", - " Obtaining dependency information for tqdm from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata\n", - " Using cached tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n", - "Collecting biotite (from evodiff)\n", - " Downloading biotite-0.38.0.tar.gz (32.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32.7/32.7 MB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting requests (from evodiff)\n", - " Obtaining dependency information for requests from https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl.metadata\n", - " Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n", - "Collecting mdanalysis (from evodiff)\n", - " Using cached MDAnalysis-2.4.3-cp38-cp38-macosx_10_9_x86_64.whl (3.4 MB)\n", - "Collecting pdb-tools (from evodiff)\n", - " Using cached pdb_tools-2.5.0-py3-none-any.whl (207 kB)\n", - "Collecting msgpack>=0.5.6 (from biotite->evodiff)\n", - " Using cached msgpack-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl (73 kB)\n", - "Collecting networkx>=2.0 (from biotite->evodiff)\n", - " Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n", - "Collecting charset-normalizer<4,>=2 (from requests->evodiff)\n", - " Obtaining dependency information for charset-normalizer<4,>=2 from https://files.pythonhosted.org/packages/79/55/9aef5046a1765acacf28f80994f5a964ab4f43ab75208b1265191a11004b/charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata (31 kB)\n", - "Collecting idna<4,>=2.5 (from requests->evodiff)\n", - " Using cached idna-3.4-py3-none-any.whl (61 kB)\n", - "Collecting urllib3<3,>=1.21.1 (from requests->evodiff)\n", - " Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/9b/81/62fd61001fa4b9d0df6e31d47ff49cfa9de4af03adecf339c7bc30656b37/urllib3-2.0.4-py3-none-any.whl.metadata\n", - " Using cached urllib3-2.0.4-py3-none-any.whl.metadata (6.6 kB)\n", - "Collecting certifi>=2017.4.17 (from requests->evodiff)\n", - " Obtaining dependency information for certifi>=2017.4.17 from https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl.metadata\n", - " Using cached certifi-2023.7.22-py3-none-any.whl.metadata (2.2 kB)\n", - "Collecting contourpy>=1.0.1 (from matplotlib->evodiff)\n", - " Obtaining dependency information for contourpy>=1.0.1 from https://files.pythonhosted.org/packages/ff/dd/5d44bc3a5993c25b75b7aef4f810ebd74ef9057dd2a4eab37eba240ee401/contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata (5.7 kB)\n", - "Collecting cycler>=0.10 (from matplotlib->evodiff)\n", - " Using cached cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", - "Collecting fonttools>=4.22.0 (from matplotlib->evodiff)\n", - " Obtaining dependency information for fonttools>=4.22.0 from https://files.pythonhosted.org/packages/52/51/147c24a058779345d22e828391830db64e37afec205ca53a37ff3b65f961/fonttools-4.42.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached fonttools-4.42.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (150 kB)\n", - "Collecting kiwisolver>=1.0.1 (from matplotlib->evodiff)\n", - " Obtaining dependency information for kiwisolver>=1.0.1 from https://files.pythonhosted.org/packages/ca/78/2bff6dbedc619a614871005c32f106f24c3366e1025afff0fdfc2b56b7c8/kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl.metadata (6.4 kB)\n", - "Requirement already satisfied: packaging>=20.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff) (23.1)\n", - "Collecting pillow>=6.2.0 (from matplotlib->evodiff)\n", - " Obtaining dependency information for pillow>=6.2.0 from https://files.pythonhosted.org/packages/5a/29/aa1678cae507a480a6d75453c1de98940e5eb6bd8f0e8e8347ec29a4dfc0/Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl.metadata\n", - " Using cached Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl.metadata (9.5 kB)\n", - "Collecting pyparsing<3.1,>=2.3.1 (from matplotlib->evodiff)\n", - " Using cached pyparsing-3.0.9-py3-none-any.whl (98 kB)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff) (2.8.2)\n", - "Collecting importlib-resources>=3.2.0 (from matplotlib->evodiff)\n", - " Obtaining dependency information for importlib-resources>=3.2.0 from https://files.pythonhosted.org/packages/25/d4/592f53ce2f8dde8be5720851bd0ab71cc2e76c55978e4163ef1ab7e389bb/importlib_resources-6.0.1-py3-none-any.whl.metadata\n", - " Using cached importlib_resources-6.0.1-py3-none-any.whl.metadata (4.0 kB)\n", - "Collecting biopython>=1.80 (from mdanalysis->evodiff)\n", - " Using cached biopython-1.81-cp38-cp38-macosx_10_9_x86_64.whl (2.7 MB)\n", - "Collecting GridDataFormats>=0.4.0 (from mdanalysis->evodiff)\n", - " Using cached GridDataFormats-1.0.1-py3-none-any.whl (2.1 MB)\n", - "Collecting mmtf-python>=1.0.0 (from mdanalysis->evodiff)\n", - " Using cached mmtf_python-1.1.3-py2.py3-none-any.whl (25 kB)\n", - "Collecting joblib>=0.12 (from mdanalysis->evodiff)\n", - " Obtaining dependency information for joblib>=0.12 from https://files.pythonhosted.org/packages/10/40/d551139c85db202f1f384ba8bcf96aca2f329440a844f924c8a0040b6d02/joblib-1.3.2-py3-none-any.whl.metadata\n", - " Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)\n", - "Collecting scipy>=1.5.0 (from mdanalysis->evodiff)\n", - " Using cached scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl (35.0 MB)\n", - "Collecting threadpoolctl (from mdanalysis->evodiff)\n", - " Obtaining dependency information for threadpoolctl from https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl.metadata\n", - " Using cached threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)\n", - "Collecting fasteners (from mdanalysis->evodiff)\n", - " Using cached fasteners-0.18-py3-none-any.whl (18 kB)\n", - "Collecting gsd>=1.9.3 (from mdanalysis->evodiff)\n", - " Obtaining dependency information for gsd>=1.9.3 from https://files.pythonhosted.org/packages/b1/89/9235f1c3c0d3d9a55e70d49c6f4a3e61d27598602f93f8a467d9dc03e4cb/gsd-3.1.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached gsd-3.1.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (4.3 kB)\n", - "Collecting click<9,>=7.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for click<9,>=7.0 from https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl.metadata\n", - " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n", - "Collecting cloudpickle<3 (from mlflow->evodiff)\n", - " Using cached cloudpickle-2.2.1-py3-none-any.whl (25 kB)\n", - "Collecting databricks-cli<1,>=0.8.7 (from mlflow->evodiff)\n", - " Using cached databricks_cli-0.17.7-py3-none-any.whl\n", - "Collecting entrypoints<1 (from mlflow->evodiff)\n", - " Using cached entrypoints-0.4-py3-none-any.whl (5.3 kB)\n", - "Collecting gitpython<4,>=2.1.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for gitpython<4,>=2.1.0 from https://files.pythonhosted.org/packages/0f/c6/bb9e2276b6fed126aa21e292493b45a3df4cfba7cbfcf2ab8809a6b0e718/GitPython-3.1.35-py3-none-any.whl.metadata\n", - " Downloading GitPython-3.1.35-py3-none-any.whl.metadata (10 kB)\n", - "Collecting pyyaml<7,>=5.1 (from mlflow->evodiff)\n", - " Obtaining dependency information for pyyaml<7,>=5.1 from https://files.pythonhosted.org/packages/7f/5d/2779ea035ba1e533c32ed4a249b4e0448f583ba10830b21a3cddafe11a4e/PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (2.1 kB)\n", - "Collecting protobuf<5,>=3.12.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for protobuf<5,>=3.12.0 from https://files.pythonhosted.org/packages/fe/f3/957db80e5b9f7fd7df97e5554fdc57919dfad24e89291223fd04a0e3c84f/protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl.metadata\n", - " Downloading protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl.metadata (540 bytes)\n", - "Collecting pytz<2024 (from mlflow->evodiff)\n", - " Obtaining dependency information for pytz<2024 from https://files.pythonhosted.org/packages/32/4d/aaf7eff5deb402fd9a24a1449a8119f00d74ae9c2efa79f8ef9994261fc2/pytz-2023.3.post1-py2.py3-none-any.whl.metadata\n", - " Downloading pytz-2023.3.post1-py2.py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: importlib-metadata!=4.7.0,<7,>=3.7.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff) (6.8.0)\n", - "Collecting sqlparse<1,>=0.4.0 (from mlflow->evodiff)\n", - " Using cached sqlparse-0.4.4-py3-none-any.whl (41 kB)\n", - "Collecting alembic!=1.10.0,<2 (from mlflow->evodiff)\n", - " Obtaining dependency information for alembic!=1.10.0,<2 from https://files.pythonhosted.org/packages/a2/8b/46919127496036c8e990b2b236454a0d8655fd46e1df2fd35610a9cbc842/alembic-1.12.0-py3-none-any.whl.metadata\n", - " Downloading alembic-1.12.0-py3-none-any.whl.metadata (7.2 kB)\n", - "Collecting docker<7,>=4.0.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for docker<7,>=4.0.0 from https://files.pythonhosted.org/packages/db/be/3032490fa33b36ddc8c4b1da3252c6f974e7133f1a50de00c6b85cca203a/docker-6.1.3-py3-none-any.whl.metadata\n", - " Using cached docker-6.1.3-py3-none-any.whl.metadata (3.5 kB)\n", - "Collecting Flask<3 (from mlflow->evodiff)\n", - " Obtaining dependency information for Flask<3 from https://files.pythonhosted.org/packages/fd/56/26f0be8adc2b4257df20c1c4260ddd0aa396cf8e75d90ab2f7ff99bc34f9/flask-2.3.3-py3-none-any.whl.metadata\n", - " Using cached flask-2.3.3-py3-none-any.whl.metadata (3.6 kB)\n", - "Collecting querystring-parser<2 (from mlflow->evodiff)\n", - " Using cached querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n", - "Collecting sqlalchemy<3,>=1.4.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for sqlalchemy<3,>=1.4.0 from https://files.pythonhosted.org/packages/d0/cd/2c23739c701a299b22fbb2403aa79a43a40def4064049975e3e5beac40ff/SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl.metadata (9.4 kB)\n", - "Collecting pyarrow<13,>=4.0.0 (from mlflow->evodiff)\n", - " Obtaining dependency information for pyarrow<13,>=4.0.0 from https://files.pythonhosted.org/packages/75/a2/87fe24ab2c6efc6ad2335a2fc6bc33363fc70f67f18a3c18c494a4783aa2/pyarrow-12.0.1-cp38-cp38-macosx_10_14_x86_64.whl.metadata\n", - " Using cached pyarrow-12.0.1-cp38-cp38-macosx_10_14_x86_64.whl.metadata (3.0 kB)\n", - "Collecting markdown<4,>=3.3 (from mlflow->evodiff)\n", - " Obtaining dependency information for markdown<4,>=3.3 from https://files.pythonhosted.org/packages/1a/b5/228c1cdcfe138f1a8e01ab1b54284c8b83735476cb22b6ba251656ed13ad/Markdown-3.4.4-py3-none-any.whl.metadata\n", - " Using cached Markdown-3.4.4-py3-none-any.whl.metadata (6.9 kB)\n", - "Collecting gunicorn<22 (from mlflow->evodiff)\n", - " Obtaining dependency information for gunicorn<22 from https://files.pythonhosted.org/packages/0e/2a/c3a878eccb100ccddf45c50b6b8db8cf3301a6adede6e31d48e8531cab13/gunicorn-21.2.0-py3-none-any.whl.metadata\n", - " Using cached gunicorn-21.2.0-py3-none-any.whl.metadata (4.1 kB)\n", - "Collecting Jinja2<4,>=2.11 (from mlflow->evodiff)\n", - " Using cached Jinja2-3.1.2-py3-none-any.whl (133 kB)\n", - "Collecting tzdata>=2022.1 (from pandas->evodiff)\n", - " Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n", - "Collecting Mako (from alembic!=1.10.0,<2->mlflow->evodiff)\n", - " Using cached Mako-1.2.4-py3-none-any.whl (78 kB)\n", - "Requirement already satisfied: typing-extensions>=4 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from alembic!=1.10.0,<2->mlflow->evodiff) (4.7.1)\n", - "Collecting pyjwt>=1.7.0 (from databricks-cli<1,>=0.8.7->mlflow->evodiff)\n", - " Obtaining dependency information for pyjwt>=1.7.0 from https://files.pythonhosted.org/packages/2b/4f/e04a8067c7c96c364cef7ef73906504e2f40d690811c021e1a1901473a19/PyJWT-2.8.0-py3-none-any.whl.metadata\n", - " Using cached PyJWT-2.8.0-py3-none-any.whl.metadata (4.2 kB)\n", - "Collecting oauthlib>=3.1.0 (from databricks-cli<1,>=0.8.7->mlflow->evodiff)\n", - " Using cached oauthlib-3.2.2-py3-none-any.whl (151 kB)\n", - "Collecting tabulate>=0.7.7 (from databricks-cli<1,>=0.8.7->mlflow->evodiff)\n", - " Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)\n", - "Requirement already satisfied: six>=1.10.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from databricks-cli<1,>=0.8.7->mlflow->evodiff) (1.16.0)\n", - "Collecting urllib3<3,>=1.21.1 (from requests->evodiff)\n", - " Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/c5/05/c214b32d21c0b465506f95c4f28ccbcba15022e000b043b72b3df7728471/urllib3-1.26.16-py2.py3-none-any.whl.metadata\n", - " Using cached urllib3-1.26.16-py2.py3-none-any.whl.metadata (48 kB)\n", - "Collecting websocket-client>=0.32.0 (from docker<7,>=4.0.0->mlflow->evodiff)\n", - " Obtaining dependency information for websocket-client>=0.32.0 from https://files.pythonhosted.org/packages/0b/50/49e0d7342e5d441d43b525d6c84656ea40aea3e58d530004d07b22bc9b04/websocket_client-1.6.3-py3-none-any.whl.metadata\n", - " Downloading websocket_client-1.6.3-py3-none-any.whl.metadata (7.7 kB)\n", - "Collecting Werkzeug>=2.3.7 (from Flask<3->mlflow->evodiff)\n", - " Obtaining dependency information for Werkzeug>=2.3.7 from https://files.pythonhosted.org/packages/9b/59/a7c32e3d8d0e546a206e0552a2c04444544f15c1da4a01df8938d20c6ffc/werkzeug-2.3.7-py3-none-any.whl.metadata\n", - " Using cached werkzeug-2.3.7-py3-none-any.whl.metadata (4.1 kB)\n", - "Collecting itsdangerous>=2.1.2 (from Flask<3->mlflow->evodiff)\n", - " Using cached itsdangerous-2.1.2-py3-none-any.whl (15 kB)\n", - "Collecting blinker>=1.6.2 (from Flask<3->mlflow->evodiff)\n", - " Using cached blinker-1.6.2-py3-none-any.whl (13 kB)\n", - "Collecting gitdb<5,>=4.0.1 (from gitpython<4,>=2.1.0->mlflow->evodiff)\n", - " Using cached gitdb-4.0.10-py3-none-any.whl (62 kB)\n", - "Collecting mrcfile (from GridDataFormats>=0.4.0->mdanalysis->evodiff)\n", - " Using cached mrcfile-1.4.3-py2.py3-none-any.whl (43 kB)\n", - "Requirement already satisfied: zipp>=0.5 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from importlib-metadata!=4.7.0,<7,>=3.7.0->mlflow->evodiff) (3.16.2)\n", - "Collecting MarkupSafe>=2.0 (from Jinja2<4,>=2.11->mlflow->evodiff)\n", - " Obtaining dependency information for MarkupSafe>=2.0 from https://files.pythonhosted.org/packages/f8/33/e9e83b214b5f8d9a60b26e60051734e7657a416e5bce7d7f1c34e26badad/MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl.metadata\n", - " Using cached MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl.metadata (3.0 kB)\n", - "Collecting greenlet!=0.4.17 (from sqlalchemy<3,>=1.4.0->mlflow->evodiff)\n", - " Using cached greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl (241 kB)\n", - "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython<4,>=2.1.0->mlflow->evodiff)\n", - " Using cached smmap-5.0.0-py3-none-any.whl (24 kB)\n", - "Downloading evodiff-0.9.9-py3-none-any.whl (67 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.4/67.4 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl (19.8 MB)\n", - "Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n", - "Using cached matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl (7.4 MB)\n", - "Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", - "Using cached mlflow-2.6.0-py3-none-any.whl (18.3 MB)\n", - "Using cached pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl (11.7 MB)\n", - "Using cached scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl (10.1 MB)\n", - "Using cached sequence_models-1.7.0-py3-none-any.whl (65 kB)\n", - "Downloading alembic-1.12.0-py3-none-any.whl (226 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.0/226.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", - "\u001b[?25hUsing cached certifi-2023.7.22-py3-none-any.whl (158 kB)\n", - "Using cached charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl (124 kB)\n", - "Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", - "Using cached contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl (243 kB)\n", - "Using cached docker-6.1.3-py3-none-any.whl (148 kB)\n", - "Using cached flask-2.3.3-py3-none-any.whl (96 kB)\n", - "Using cached fonttools-4.42.1-cp38-cp38-macosx_10_9_x86_64.whl (2.2 MB)\n", - "Downloading GitPython-3.1.35-py3-none-any.whl (188 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.8/188.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached gsd-3.1.1-cp38-cp38-macosx_10_9_x86_64.whl (256 kB)\n", - "Using cached gunicorn-21.2.0-py3-none-any.whl (80 kB)\n", - "Using cached importlib_resources-6.0.1-py3-none-any.whl (34 kB)\n", - "Using cached joblib-1.3.2-py3-none-any.whl (302 kB)\n", - "Using cached kiwisolver-1.4.5-cp38-cp38-macosx_10_9_x86_64.whl (68 kB)\n", - "Using cached Markdown-3.4.4-py3-none-any.whl (94 kB)\n", - "Using cached Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl (3.4 MB)\n", - "Downloading protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl (409 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m409.4/409.4 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n", - "\u001b[?25hUsing cached pyarrow-12.0.1-cp38-cp38-macosx_10_14_x86_64.whl (24.7 MB)\n", - "Downloading pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m502.5/502.5 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n", - "\u001b[?25hUsing cached PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl (191 kB)\n", - "Using cached SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl (2.1 MB)\n", - "Using cached threadpoolctl-3.2.0-py3-none-any.whl (15 kB)\n", - "Using cached urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n", - "Using cached MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl (13 kB)\n", - "Using cached PyJWT-2.8.0-py3-none-any.whl (22 kB)\n", - "Downloading websocket_client-1.6.3-py3-none-any.whl (57 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.3/57.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached werkzeug-2.3.7-py3-none-any.whl (242 kB)\n", - "Building wheels for collected packages: biotite\n", - " Building wheel for biotite (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for biotite: filename=biotite-0.38.0-cp38-cp38-macosx_10_9_x86_64.whl size=36079903 sha256=831108ee30781ec038bcf906831f43785952884fa375cd5a72dc117e92ef01a9\n", - " Stored in directory: /Users/nityathakkar/Library/Caches/pip/wheels/75/bb/a1/a72e26ee42d6f2fca30f63be42e803ea431cf8e03f11e1b488\n", - "Successfully built biotite\n", - "Installing collected packages: pytz, pdb-tools, msgpack, lmdb, fair-esm, websocket-client, urllib3, tzdata, tqdm, threadpoolctl, tabulate, sqlparse, smmap, sequence-models, querystring-parser, pyyaml, pyparsing, pyjwt, protobuf, pillow, oauthlib, numpy, networkx, mmtf-python, MarkupSafe, kiwisolver, joblib, itsdangerous, importlib-resources, idna, gunicorn, greenlet, fonttools, fasteners, entrypoints, cycler, cloudpickle, click, charset-normalizer, certifi, blosum, blinker, Werkzeug, sqlalchemy, scipy, requests, pyarrow, pandas, mrcfile, markdown, Mako, Jinja2, gsd, gitdb, contourpy, biopython, scikit-learn, matplotlib, GridDataFormats, gitpython, Flask, docker, databricks-cli, biotite, alembic, seaborn, mlflow, mdanalysis, evodiff\n", - "Successfully installed Flask-2.3.3 GridDataFormats-1.0.1 Jinja2-3.1.2 Mako-1.2.4 MarkupSafe-2.1.3 Werkzeug-2.3.7 alembic-1.12.0 biopython-1.81 biotite-0.38.0 blinker-1.6.2 blosum-2.0.2 certifi-2023.7.22 charset-normalizer-3.2.0 click-8.1.7 cloudpickle-2.2.1 contourpy-1.1.0 cycler-0.11.0 databricks-cli-0.17.7 docker-6.1.3 entrypoints-0.4 evodiff-0.9.9 fair-esm-2.0.0 fasteners-0.18 fonttools-4.42.1 gitdb-4.0.10 gitpython-3.1.35 greenlet-2.0.2 gsd-3.1.1 gunicorn-21.2.0 idna-3.4 importlib-resources-6.0.1 itsdangerous-2.1.2 joblib-1.3.2 kiwisolver-1.4.5 lmdb-1.4.1 markdown-3.4.4 matplotlib-3.7.2 mdanalysis-2.4.3 mlflow-2.6.0 mmtf-python-1.1.3 mrcfile-1.4.3 msgpack-1.0.5 networkx-3.1 numpy-1.24.4 oauthlib-3.2.2 pandas-2.0.3 pdb-tools-2.5.0 pillow-10.0.0 protobuf-4.24.3 pyarrow-12.0.1 pyjwt-2.8.0 pyparsing-3.0.9 pytz-2023.3.post1 pyyaml-6.0.1 querystring-parser-1.2.4 requests-2.31.0 scikit-learn-1.3.0 scipy-1.10.1 seaborn-0.12.2 sequence-models-1.7.0 smmap-5.0.0 sqlalchemy-2.0.20 sqlparse-0.4.4 tabulate-0.9.0 threadpoolctl-3.2.0 tqdm-4.66.1 tzdata-2023.3 urllib3-1.26.16 websocket-client-1.6.3\n" + "Found existing installation: evodiff 0.2.3\n", + "Uninstalling evodiff-0.2.3:\n", + " Successfully uninstalled evodiff-0.2.3\n", + "Collecting evodiff==0.2.3\n", + " Obtaining dependency information for evodiff==0.2.3 from https://files.pythonhosted.org/packages/82/2f/872ed91402c7174cc1e8f542139011327963cb0d96a57db8998e3e091194/evodiff-0.2.3-py3-none-any.whl.metadata\n", + " Using cached evodiff-0.2.3-py3-none-any.whl.metadata (14 kB)\n", + "Requirement already satisfied: pandas in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.0.3)\n", + "Requirement already satisfied: lmdb in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (1.4.1)\n", + "Requirement already satisfied: numpy in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (1.23.5)\n", + "Requirement already satisfied: sequence-models in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (1.7.0)\n", + "Requirement already satisfied: mlflow in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.6.0)\n", + "Requirement already satisfied: scikit-learn in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (1.2.2)\n", + "Requirement already satisfied: blosum in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.0.2)\n", + "Requirement already satisfied: seaborn in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (0.12.2)\n", + "Requirement already satisfied: matplotlib in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (3.7.2)\n", + "Requirement already satisfied: fair-esm in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.0.0)\n", + "Requirement already satisfied: tqdm in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (4.65.0)\n", + "Requirement already satisfied: biotite in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (0.37.0)\n", + "Requirement already satisfied: requests in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.31.0)\n", + "Requirement already satisfied: mdanalysis in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.4.3)\n", + "Requirement already satisfied: pdb-tools in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from evodiff==0.2.3) (2.5.0)\n", + "Requirement already satisfied: msgpack>=0.5.6 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from biotite->evodiff==0.2.3) (1.0.5)\n", + "Requirement already satisfied: networkx>=2.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from biotite->evodiff==0.2.3) (3.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from requests->evodiff==0.2.3) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from requests->evodiff==0.2.3) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from requests->evodiff==0.2.3) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from requests->evodiff==0.2.3) (2023.7.22)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (4.42.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (10.0.0)\n", + "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (2.8.2)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from matplotlib->evodiff==0.2.3) (6.0.1)\n", + "Requirement already satisfied: biopython>=1.80 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (1.81)\n", + "Requirement already satisfied: GridDataFormats>=0.4.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (1.0.1)\n", + "Requirement already satisfied: mmtf-python>=1.0.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (1.1.3)\n", + "Requirement already satisfied: joblib>=0.12 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (1.2.0)\n", + "Requirement already satisfied: scipy>=1.5.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (1.10.1)\n", + "Requirement already satisfied: threadpoolctl in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (2.2.0)\n", + "Requirement already satisfied: fasteners in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (0.18)\n", + "Requirement already satisfied: gsd>=1.9.3 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mdanalysis->evodiff==0.2.3) (3.1.1)\n", + "Requirement already satisfied: click<9,>=7.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (8.1.7)\n", + "Requirement already satisfied: cloudpickle<3 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (2.2.1)\n", + "Requirement already satisfied: databricks-cli<1,>=0.8.7 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (0.17.7)\n", + "Requirement already satisfied: entrypoints<1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (0.4)\n", + "Requirement already satisfied: gitpython<4,>=2.1.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (3.1.32)\n", + "Requirement already satisfied: pyyaml<7,>=5.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (6.0.1)\n", + "Requirement already satisfied: protobuf<5,>=3.12.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (4.24.2)\n", + "Requirement already satisfied: pytz<2024 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (2023.3)\n", + "Requirement already satisfied: importlib-metadata!=4.7.0,<7,>=3.7.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (6.8.0)\n", + "Requirement already satisfied: sqlparse<1,>=0.4.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (0.4.4)\n", + "Requirement already satisfied: alembic!=1.10.0,<2 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (1.11.3)\n", + "Requirement already satisfied: docker<7,>=4.0.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (6.1.3)\n", + "Requirement already satisfied: Flask<3 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (2.3.3)\n", + "Requirement already satisfied: querystring-parser<2 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (1.2.4)\n", + "Requirement already satisfied: sqlalchemy<3,>=1.4.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (2.0.20)\n", + "Requirement already satisfied: pyarrow<13,>=4.0.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (12.0.1)\n", + "Requirement already satisfied: markdown<4,>=3.3 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (3.4.4)\n", + "Requirement already satisfied: gunicorn<22 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (21.2.0)\n", + "Requirement already satisfied: Jinja2<4,>=2.11 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from mlflow->evodiff==0.2.3) (3.1.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from pandas->evodiff==0.2.3) (2023.3)\n", + "Requirement already satisfied: Mako in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from alembic!=1.10.0,<2->mlflow->evodiff==0.2.3) (1.2.4)\n", + "Requirement already satisfied: typing-extensions>=4 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from alembic!=1.10.0,<2->mlflow->evodiff==0.2.3) (4.7.1)\n", + "Requirement already satisfied: pyjwt>=1.7.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from databricks-cli<1,>=0.8.7->mlflow->evodiff==0.2.3) (2.8.0)\n", + "Requirement already satisfied: oauthlib>=3.1.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from databricks-cli<1,>=0.8.7->mlflow->evodiff==0.2.3) (3.2.2)\n", + "Requirement already satisfied: tabulate>=0.7.7 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from databricks-cli<1,>=0.8.7->mlflow->evodiff==0.2.3) (0.9.0)\n", + "Requirement already satisfied: six>=1.10.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from databricks-cli<1,>=0.8.7->mlflow->evodiff==0.2.3) (1.16.0)\n", + "Requirement already satisfied: websocket-client>=0.32.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from docker<7,>=4.0.0->mlflow->evodiff==0.2.3) (1.6.2)\n", + "Requirement already satisfied: Werkzeug>=2.3.7 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from Flask<3->mlflow->evodiff==0.2.3) (2.3.7)\n", + "Requirement already satisfied: itsdangerous>=2.1.2 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from Flask<3->mlflow->evodiff==0.2.3) (2.1.2)\n", + "Requirement already satisfied: blinker>=1.6.2 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from Flask<3->mlflow->evodiff==0.2.3) (1.6.2)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from gitpython<4,>=2.1.0->mlflow->evodiff==0.2.3) (4.0.10)\n", + "Requirement already satisfied: mrcfile in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from GridDataFormats>=0.4.0->mdanalysis->evodiff==0.2.3) (1.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from importlib-metadata!=4.7.0,<7,>=3.7.0->mlflow->evodiff==0.2.3) (3.16.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from Jinja2<4,>=2.11->mlflow->evodiff==0.2.3) (2.1.1)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from sqlalchemy<3,>=1.4.0->mlflow->evodiff==0.2.3) (2.0.2)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/nityathakkar/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages (from gitdb<5,>=4.0.1->gitpython<4,>=2.1.0->mlflow->evodiff==0.2.3) (5.0.0)\n", + "Using cached evodiff-0.2.3-py3-none-any.whl (67 kB)\n", + "Installing collected packages: evodiff\n", + "Successfully installed evodiff-0.2.3\n" ] } ], @@ -267,7 +129,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "You will also need to install PyTorch. We tested our models on `v2.0.1`. Change the below line to install the pytorch version that works for your system." ] @@ -275,16 +141,17 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting package metadata (current_repodata.json): done\n", - "Solving environment: failed with initial frozen solve. Retrying with flexible solve.\n", - "Solving environment: failed with repodata from current_repodata.json, will retry with next repodata source.\n", - "Collecting package metadata (repodata.json): done\n", "Solving environment: done\n", "\n", "\n", @@ -309,86 +176,11 @@ " - torchvision\n", "\n", "\n", - "The following packages will be downloaded:\n", - "\n", - " package | build\n", - " ---------------------------|-----------------\n", - " cryptography-41.0.3 | py38h30e54ef_0 1.2 MB\n", - " ------------------------------------------------------------\n", - " Total: 1.2 MB\n", - "\n", - "The following NEW packages will be INSTALLED:\n", - "\n", - " blas pkgs/main/osx-64::blas-1.0-mkl None\n", - " brotlipy pkgs/main/osx-64::brotlipy-0.7.0-py38h9ed2024_1003 None\n", - " certifi pkgs/main/osx-64::certifi-2023.7.22-py38hecd8cb5_0 None\n", - " cffi pkgs/main/osx-64::cffi-1.15.1-py38h6c40b1e_3 None\n", - " charset-normalizer pkgs/main/noarch::charset-normalizer-2.0.4-pyhd3eb1b0_0 None\n", - " cpuonly pytorch/noarch::cpuonly-2.0-0 None\n", - " cryptography pkgs/main/osx-64::cryptography-41.0.3-py38h30e54ef_0 None\n", - " ffmpeg pytorch/osx-64::ffmpeg-4.3-h0a44026_0 None\n", - " filelock pkgs/main/osx-64::filelock-3.9.0-py38hecd8cb5_0 None\n", - " freetype pkgs/main/osx-64::freetype-2.12.1-hd8bbffd_0 None\n", - " gettext pkgs/main/osx-64::gettext-0.21.0-h7535e17_0 None\n", - " giflib pkgs/main/osx-64::giflib-5.2.1-h6c40b1e_3 None\n", - " gmp pkgs/main/osx-64::gmp-6.2.1-he9d5cce_3 None\n", - " gmpy2 pkgs/main/osx-64::gmpy2-2.1.2-py38hd5de756_0 None\n", - " gnutls pkgs/main/osx-64::gnutls-3.6.15-hed9c0bf_0 None\n", - " icu pkgs/main/osx-64::icu-58.2-h0a44026_3 None\n", - " idna pkgs/main/osx-64::idna-3.4-py38hecd8cb5_0 None\n", - " intel-openmp pkgs/main/osx-64::intel-openmp-2023.1.0-ha357a0b_43547 None\n", - " jinja2 pkgs/main/osx-64::jinja2-3.1.2-py38hecd8cb5_0 None\n", - " jpeg pkgs/main/osx-64::jpeg-9e-h6c40b1e_1 None\n", - " lame pkgs/main/osx-64::lame-3.100-h1de35cc_0 None\n", - " lcms2 pkgs/main/osx-64::lcms2-2.12-hf1fd2bf_0 None\n", - " lerc pkgs/main/osx-64::lerc-3.0-he9d5cce_0 None\n", - " libdeflate pkgs/main/osx-64::libdeflate-1.8-h9ed2024_5 None\n", - " libiconv pkgs/main/osx-64::libiconv-1.16-hca72f7f_2 None\n", - " libidn2 pkgs/main/osx-64::libidn2-2.3.4-h6c40b1e_0 None\n", - " libpng pkgs/main/osx-64::libpng-1.6.39-h6c40b1e_0 None\n", - " libtasn1 pkgs/main/osx-64::libtasn1-4.19.0-h6c40b1e_0 None\n", - " libtiff pkgs/main/osx-64::libtiff-4.4.0-h2cd0358_2 None\n", - " libunistring pkgs/main/osx-64::libunistring-0.9.10-h9ed2024_0 None\n", - " libwebp pkgs/main/osx-64::libwebp-1.2.4-hf6ce154_1 None\n", - " libwebp-base pkgs/main/osx-64::libwebp-base-1.2.4-h6c40b1e_1 None\n", - " libxml2 pkgs/main/osx-64::libxml2-2.9.14-hbf8cd5e_0 None\n", - " llvm-openmp pkgs/main/osx-64::llvm-openmp-14.0.6-h0dcd299_0 None\n", - " lz4-c pkgs/main/osx-64::lz4-c-1.9.4-hcec6c5f_0 None\n", - " markupsafe pkgs/main/osx-64::markupsafe-2.1.1-py38hca72f7f_0 None\n", - " mkl pkgs/main/osx-64::mkl-2023.1.0-h8e150cf_43559 None\n", - " mkl-service pkgs/main/osx-64::mkl-service-2.4.0-py38h6c40b1e_1 None\n", - " mkl_fft pkgs/main/osx-64::mkl_fft-1.3.6-py38h07fba90_1 None\n", - " mkl_random pkgs/main/osx-64::mkl_random-1.2.2-py38h07fba90_1 None\n", - " mpc pkgs/main/osx-64::mpc-1.1.0-h6ef4df4_1 None\n", - " mpfr pkgs/main/osx-64::mpfr-4.0.2-h9066e36_1 None\n", - " mpmath pkgs/main/osx-64::mpmath-1.3.0-py38hecd8cb5_0 None\n", - " nettle pkgs/main/osx-64::nettle-3.7.3-h230ac6f_1 None\n", - " networkx pkgs/main/osx-64::networkx-3.1-py38hecd8cb5_0 None\n", - " numpy pkgs/main/osx-64::numpy-1.23.5-py38h47b59a4_1 None\n", - " numpy-base pkgs/main/osx-64::numpy-base-1.23.5-py38hcfaf2c3_1 None\n", - " openh264 pkgs/main/osx-64::openh264-2.1.1-h8346a28_0 None\n", - " pillow pkgs/main/osx-64::pillow-9.4.0-py38hcec6c5f_0 None\n", - " pycparser pkgs/main/noarch::pycparser-2.21-pyhd3eb1b0_0 None\n", - " pyopenssl pkgs/main/osx-64::pyopenssl-23.2.0-py38hecd8cb5_0 None\n", - " pysocks pkgs/main/osx-64::pysocks-1.7.1-py38_1 None\n", - " pytorch pytorch/osx-64::pytorch-2.0.1-py3.8_0 None\n", - " pytorch-mutex pytorch/noarch::pytorch-mutex-1.0-cpu None\n", - " requests pkgs/main/osx-64::requests-2.31.0-py38hecd8cb5_0 None\n", - " sympy pkgs/main/osx-64::sympy-1.11.1-py38hecd8cb5_0 None\n", - " tbb pkgs/main/osx-64::tbb-2021.8.0-ha357a0b_0 None\n", - " torchaudio pytorch/osx-64::torchaudio-2.0.2-py38_cpu None\n", - " torchvision pytorch/osx-64::torchvision-0.15.2-py38_cpu None\n", - " urllib3 pkgs/main/osx-64::urllib3-1.26.16-py38hecd8cb5_0 None\n", - " zstd pkgs/main/osx-64::zstd-1.5.2-hcb37349_0 None\n", - "\n", - "The following packages will be UPDATED:\n", - "\n", - " ca-certificates conda-forge::ca-certificates-2023.7.2~ --> pkgs/main::ca-certificates-2023.08.22-hecd8cb5_0 None\n", - "\n", - "\n", - "\n", - "Downloading and Extracting Packages\n", - "cryptography-41.0.3 | 1.2 MB | ##################################### | 100% \n", + "The following packages will be SUPERSEDED by a higher-priority channel:\n", + "\n", + " certifi conda-forge/noarch::certifi-2023.7.22~ --> pkgs/main/osx-64::certifi-2023.7.22-py38hecd8cb5_0 None\n", + "\n", + "\n", "Preparing transaction: done\n", "Verifying transaction: done\n", "Executing transaction: done\n", @@ -404,7 +196,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "You also need PyTorch Geometric and PyTorch Scatter installed" ] @@ -412,7 +208,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -432,44 +232,8 @@ "\n", "\n", "\n", - "## Package Plan ##\n", - "\n", - " environment location: /Users/nityathakkar/opt/anaconda3/envs/evodiff\n", - "\n", - " added / updated specs:\n", - " - pyg\n", - "\n", - "\n", - "The following NEW packages will be INSTALLED:\n", - "\n", - " appdirs pkgs/main/noarch::appdirs-1.4.4-pyhd3eb1b0_0 None\n", - " joblib pkgs/main/osx-64::joblib-1.2.0-py38hecd8cb5_0 None\n", - " libgfortran pkgs/main/osx-64::libgfortran-5.0.0-11_3_0_hecd8cb5_28 None\n", - " libgfortran5 pkgs/main/osx-64::libgfortran5-11.3.0-h9dfd629_28 None\n", - " pooch pkgs/main/noarch::pooch-1.4.0-pyhd3eb1b0_0 None\n", - " pyg pyg/osx-64::pyg-2.3.1-py38_torch_2.0.0_cpu None\n", - " pyparsing pkgs/main/osx-64::pyparsing-3.0.9-py38hecd8cb5_0 None\n", - " scikit-learn pkgs/main/osx-64::scikit-learn-1.2.2-py38hcec6c5f_0 None\n", - " scipy pkgs/main/osx-64::scipy-1.10.1-py38hf241641_1 None\n", - " threadpoolctl pkgs/main/noarch::threadpoolctl-2.2.0-pyh0d69192_0 None\n", - " tqdm pkgs/main/osx-64::tqdm-4.65.0-py38h01d92e1_0 None\n", - "\n", - "\n", - "Preparing transaction: done\n", - "Verifying transaction: done\n", - "Executing transaction: \\ \n", + "# All requested packages already installed.\n", "\n", - " Installed package of scikit-learn can be accelerated using scikit-learn-intelex.\n", - " More details are available here: https://intel.github.io/scikit-learn-intelex\n", - "\n", - " For example:\n", - "\n", - " $ conda install scikit-learn-intelex\n", - " $ python -m sklearnex my_application.py\n", - "\n", - " \n", - "\n", - "done\n", "Retrieving notices: ...working... done\n", "\n", "Note: you may need to restart the kernel to use updated packages.\n" @@ -483,14 +247,17 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting package metadata (current_repodata.json): | WARNING conda.models.version:get_matcher(542): Using .* with relational operator is superfluous and deprecated and will be removed in a future version of conda. Your spec was 1.7.1.*, but conda is ignoring the .* and treating it as 1.7.1\n", - "done\n", + "Collecting package metadata (current_repodata.json): done\n", "Solving environment: done\n", "\n", "\n", @@ -512,31 +279,11 @@ " - torch-scatter\n", "\n", "\n", - "The following packages will be downloaded:\n", - "\n", - " package | build\n", - " ---------------------------|-----------------\n", - " libuv-1.46.0 | h0c2f820_0 387 KB conda-forge\n", - " ------------------------------------------------------------\n", - " Total: 387 KB\n", - "\n", - "The following NEW packages will be INSTALLED:\n", - "\n", - " libprotobuf conda-forge/osx-64::libprotobuf-3.20.3-hbc0c0cd_0 None\n", - " libuv conda-forge/osx-64::libuv-1.46.0-h0c2f820_0 None\n", - " ninja conda-forge/osx-64::ninja-1.11.1-hb8565cd_0 None\n", - " torch-scatter conda-forge/osx-64::torch-scatter-2.1.1-py38hf47b5ae_1 None\n", - "\n", "The following packages will be SUPERSEDED by a higher-priority channel:\n", "\n", - " ca-certificates pkgs/main::ca-certificates-2023.08.22~ --> conda-forge::ca-certificates-2023.7.22-h8857fd0_0 None\n", " certifi pkgs/main/osx-64::certifi-2023.7.22-p~ --> conda-forge/noarch::certifi-2023.7.22-pyhd8ed1ab_0 None\n", - " pytorch pytorch::pytorch-2.0.1-py3.8_0 --> pkgs/main::pytorch-2.0.1-cpu_py38h77673e4_0 None\n", - "\n", "\n", "\n", - "Downloading and Extracting Packages\n", - "libuv-1.46.0 | 387 KB | ##################################### | 100% \n", "Preparing transaction: done\n", "Verifying transaction: done\n", "Executing transaction: done\n", @@ -551,19 +298,30 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "## Download model from zenodo\n", + "## Unconditional sequence generation\n", + "\n", + "### Generate a sequence with EvoDiff-Seq-OADM 38M\n", "\n", - "#### Example to download OAAR 38M model\n" + "First, download model information from zenodo. For demonstration purposes, we show an example using the smaller 38M model here, and generation on a CPU. If you are interested in using the model EvoDiff-Seq-OADM 640M, make sure you have ~7GB available to store model checkpoint. Similarly, here we showcase generation on a CPU however, if you have a GPU available, change the device inputs\n", + "\n", + "Anything needed to run uncondtional generation is saved in the checkpoint" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from evodiff.pretrained import OA_DM_38M\n", @@ -573,25 +331,46 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "## Unconditional generation\n", + "To generate one sequence, run:\n", "\n", - "#### To generate one sequence, run:" + "The only thing you need to define is the desired sequence length via `seq_len` input " ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:05<00:00, 17.88it/s]" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Generated string: ['MRHVVILERLCVQVHGTHEDFGHRSSPEDAEEQSYEVGGLRVTALTLLPDAAPEEEPTALTALRAVGPAAFEIVDAEGRRETVIGIRPALRTATPLVTWD']\n" + "Generated sequence: ['MKLFITAAGAAAFVAASAAAQAGPCAEGHANTHTPQSSHSPITAEVGAINFVAVPTDKLADHQGPFDEPSVQTDVVVITPNAEALTLEEAELSALIDETG']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] } ], @@ -599,179 +378,369 @@ "from evodiff.generate import generate_oaardm\n", "\n", "seq_len = 100\n", - "i_sample, i_string = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n", - "print(\"Generated string:\", i_string)" + "tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n", + "print(\"Generated sequence:\", generated_sequence)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "To generate an MSA, there are a few options:\n", - "1) --start-query flag: start with the query and generate the alignment\n", - "2) --start-msa flag: start with the alignment and generate the query\n", - "3) no flag: generates the entire MSA unconditionally\n", + "### Generate a sequence with EvoDiff-D3PM-Uniform 38M\n", "\n", - "NOTE: you can only specify one of the above flags at a time. You cannot specify both (--start-query & --start-msa) together\n", + "Again, we show an example here using the smaller model weights. For D3PM models we need additional inputs for inference, so we download checkpoints with `return_all=True`. If you are using a BLOSUM model, make sure to download the blosum matrix file in `data/` to your local files" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sohl-dickstein\n" + ] + } + ], + "source": [ + "from evodiff.pretrained import D3PM_UNIFORM_38M\n", "\n", - "Please view [evodiff/generate-msa.py](https://github.com/microsoft/evodiff/blob/main/evodiff/generate-msa.py) for more information." + "checkpoint = D3PM_UNIFORM_38M(return_all=True)\n", + "model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "## Conditional generation\n" + "We can then generate 1 sequence via the following, where again only `seq_len` needs to be defined: " ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████| 499/499 [00:24<00:00, 20.09it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final seq ['MKGTVNYSSSKTQIKLAQTHPDKAAMEMKTTSLKLCRHQNQLFLNRTYSTRSSTIKASRSGLLSNWGQHRRGTFSSKYDQNPNGIAPCCMTTFEHKREDR']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ - "#### If you wish to generate a sequence, you can either generate a scaffold structure that supports a desired motif or an IDR. \n", + "from evodiff.generate import generate_d3pm\n", + "\n", + "seq_len = 100 \n", "\n", - "In this notebook, we provide the scaffold example. Please view the [examples/README.md](https://github.com/microsoft/evodiff/blob/main/examples/README.md) file for information on generating an IDR. Note that the IDR generation pulls from a database that specifies much of the information you need to manually specify in the scaffold setting." + "tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "#### We provide PDB files in the [examples/scaffolding-pdbs](https://github.com/microsoft/evodiff/tree/main/examples/scaffolding-pdbs) folder. You can use the following code segment to visualize the various PDB files and pick one." + "## Conditional generation\n", + "\n", + "\n", + "### Evolutionary guided sequence generation with EvoDiff-MSA \n", + "\n", + "To generate a sequence, given a multiple sequence alignment, you must have an MSA avaialble. Our `generate-msa.py` code samples the validation dataset of openfold, then subsamples an MSA `n_sequences` x `seq_length`, and generates a new query sequence for that sampled MSA.\n", + "\n", + "To run the following code on a custom MSA, you must provide the path to an MSA saved as an A3M file and specify the subsampling of `n_sequences` by `seq_length` via the scheme of your choice (`selection_type=random` or `MaxHamming`), where the query sequence is the sequence you want to generate. We have not extensively tested our subsampling code outside of the Openfold dataset. \n", + "\n", + "\n", + "*Note: All our conditional generation uses OADM models, currently we do not support conditional generation with D3PM*\n", + "\n", + "To run; first lets download the appropriate weights for EvoDiff-OADM-MSA. Note: our conditional generation tasks only works with OADM models. " ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "# Specify PDB code\n", - "pdb = '1prw' " + "from evodiff.pretrained import MSA_OA_DM_MAXSUB\n", + "\n", + "checkpoint = MSA_OA_DM_MAXSUB()\n", + "model, collater, tokenizer, scheme = checkpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Next we provide the path to an A3M file in `path_to_msa`, and subsample the MSA to `n_sequences` by `seq_length` using `random` subsampling before we begin our conditional generation task. In that case that the MSA is shorter than the provided seq_length, it will pad additional rows with a `PAD_TOKEN=!`. In this case, the input file contains many fasta sequences, and the query sequence is assigned as the first entry in the A3M file. The subsampled msa returns an MSA with the query sequence in the first row. We will mask out this sequence, and generate a new one in its place. \n", + "\n", + "We have provided a random test example a3m file under `examples/a3m_example` which we will use here. " ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting py3Dmol\n", - " Obtaining dependency information for py3Dmol from https://files.pythonhosted.org/packages/47/69/b295c4c0f7c9e9ddbb3f94577c0b15ddedb4dbbf08a451bdac5d0f5d4831/py3Dmol-2.0.3-py2.py3-none-any.whl.metadata\n", - " Using cached py3Dmol-2.0.3-py2.py3-none-any.whl.metadata (2.1 kB)\n", - "Using cached py3Dmol-2.0.3-py2.py3-none-any.whl (12 kB)\n", - "Installing collected packages: py3Dmol\n", - "Successfully installed py3Dmol-2.0.3\n" + "a3m_example/bfd_uniclust_hits.a3m\n" ] }, { - "data": { - "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", - "text/html": [ - "
\n", - "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", - " jupyter labextension install jupyterlab_3dmol

\n", - "
\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/164 [00:00SEQUENCE_\" + str(i) + \"\\n\" + str(_s[0]) + \"\\n\")" + "print(\"motif start indices\", new_start_idx)\n", + "print(\"motif end indices\", new_end_idx)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "#### To conditionally generate an MSA, run the following code\n", + "### Scaffolding functional motifs with EvoDiff-MSA\n", "\n", - "Note that when conditionally generating an MSA, you can specify query_only = True. By setting this flag to true, you only generate the query sequence. If it is false, then you generate the alignment too." + "EvoDiff-MSA requires an a3m formatted MSA in the data folder to proceed, for simplicity we did not wrap any homology tools for automatic MSA-generation. For generation, you must create an A3M, subsample an alignment (preserving the correct indices), and use this to generate a new query-sequence. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "If you would like to analyze the generated structure by comparing it to the original using the RMSD score, look at the analysis/rmsd_analysis.py script" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### We provide PDB files in the ` examples/scaffolding-pdbs` folder. You can use the following code segment to visualize the various PDB files and pick one." ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Specify PDB code\n", + "pdb = '1prw' " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "INPUT MSA 1\n" + "Collecting py3Dmol\n", + " Obtaining dependency information for py3Dmol from https://files.pythonhosted.org/packages/47/69/b295c4c0f7c9e9ddbb3f94577c0b15ddedb4dbbf08a451bdac5d0f5d4831/py3Dmol-2.0.3-py2.py3-none-any.whl.metadata\n", + " Using cached py3Dmol-2.0.3-py2.py3-none-any.whl.metadata (2.1 kB)\n", + "Using cached py3Dmol-2.0.3-py2.py3-none-any.whl (12 kB)\n", + "Installing collected packages: py3Dmol\n", + "Successfully installed py3Dmol-2.0.3\n" ] }, { - "ename": "KeyError", - "evalue": "'1'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/Users/nityathakkar/Desktop/research/msr/evodiff/examples/evodiff.ipynb Cell 22\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 24\u001b[0m seq_lengths \u001b[39m=\u001b[39m []\n\u001b[1;32m 26\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_seqs): \u001b[39m# no batching\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m string, new_start_idx, new_end_idx, seq_len \u001b[39m=\u001b[39m generate_scaffold_msa(model_type, model, pdb_code, start_idx, end_idx, data_top_dir, tokenizer, query_only \u001b[39m=\u001b[39;49m query_only_flag, device\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcpu\u001b[39;49m\u001b[39m'\u001b[39;49m, mask \u001b[39m=\u001b[39;49m mask_id, pad \u001b[39m=\u001b[39;49m pad_id)\n\u001b[1;32m 29\u001b[0m strings\u001b[39m.\u001b[39mappend(string)\n\u001b[1;32m 30\u001b[0m start_idxs\u001b[39m.\u001b[39mappend(new_start_idx)\n", - "File \u001b[0;32m~/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages/evodiff/conditional_generation_msa.py:564\u001b[0m, in \u001b[0;36mgenerate_scaffold_msa\u001b[0;34m(model_type, model, sliced_msa, sliced_start_idxs, sliced_end_idxs, data_top_dir, tokenizer, query_only, device, random_baseline, n_sequences, mask, pad)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mINPUT MSA\u001b[39m\u001b[39m\"\u001b[39m, sliced_msa[\u001b[39m0\u001b[39m])\n\u001b[1;32m 563\u001b[0m \u001b[39m# Now tokenize using tokenizer of choice\u001b[39;00m\n\u001b[0;32m--> 564\u001b[0m sliced_msa \u001b[39m=\u001b[39m tokenize_msa(model_type, sliced_msa, tokenizer)\n\u001b[1;32m 565\u001b[0m query_sequence \u001b[39m=\u001b[39m sliced_msa[\u001b[39m0\u001b[39m] \u001b[39m# ensure query is first seq -> not true for IDRs\u001b[39;00m\n\u001b[1;32m 567\u001b[0m \u001b[39mif\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mesm_msa_1b\u001b[39m\u001b[39m'\u001b[39m:\n", - "File \u001b[0;32m~/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages/evodiff/conditional_generation_msa.py:534\u001b[0m, in \u001b[0;36mtokenize_msa\u001b[0;34m(model_type, untokenized, tokenizer)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenize_msa\u001b[39m(model_type, untokenized, tokenizer):\n\u001b[1;32m 533\u001b[0m \u001b[39mif\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mmsa_oa_dm_maxsub\u001b[39m\u001b[39m'\u001b[39m \u001b[39mor\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mmsa_oa_dm_randsub\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m--> 534\u001b[0m \u001b[39mreturn\u001b[39;00m [tokenizer\u001b[39m.\u001b[39mtokenizeMSA(seq) \u001b[39mfor\u001b[39;00m seq \u001b[39min\u001b[39;00m untokenized]\n\u001b[1;32m 535\u001b[0m \u001b[39melif\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mesm_msa_1b\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[1;32m 536\u001b[0m src \u001b[39m=\u001b[39m []\n", - "File \u001b[0;32m~/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages/evodiff/conditional_generation_msa.py:534\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenize_msa\u001b[39m(model_type, untokenized, tokenizer):\n\u001b[1;32m 533\u001b[0m \u001b[39mif\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mmsa_oa_dm_maxsub\u001b[39m\u001b[39m'\u001b[39m \u001b[39mor\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mmsa_oa_dm_randsub\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m--> 534\u001b[0m \u001b[39mreturn\u001b[39;00m [tokenizer\u001b[39m.\u001b[39;49mtokenizeMSA(seq) \u001b[39mfor\u001b[39;00m seq \u001b[39min\u001b[39;00m untokenized]\n\u001b[1;32m 535\u001b[0m \u001b[39melif\u001b[39;00m model_type \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mesm_msa_1b\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[1;32m 536\u001b[0m src \u001b[39m=\u001b[39m []\n", - "File \u001b[0;32m~/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages/evodiff/utils.py:250\u001b[0m, in \u001b[0;36mTokenizer.tokenizeMSA\u001b[0;34m(self, seq)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenizeMSA\u001b[39m(\u001b[39mself\u001b[39m, seq):\n\u001b[0;32m--> 250\u001b[0m \u001b[39mreturn\u001b[39;00m np\u001b[39m.\u001b[39marray([\u001b[39mself\u001b[39m\u001b[39m.\u001b[39ma_to_i[a] \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m seq])\n", - "File \u001b[0;32m~/opt/anaconda3/envs/evodiff/lib/python3.8/site-packages/evodiff/utils.py:250\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenizeMSA\u001b[39m(\u001b[39mself\u001b[39m, seq):\n\u001b[0;32m--> 250\u001b[0m \u001b[39mreturn\u001b[39;00m np\u001b[39m.\u001b[39marray([\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49ma_to_i[a] \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m seq])\n", - "\u001b[0;31mKeyError\u001b[0m: '1'" - ] + "data": { + "application/3dmoljs_load.v0": "
\n

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n jupyter labextension install jupyterlab_3dmol

\n
\n", + "text/html": [ + "
\n", + "

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
\n", + " jupyter labextension install jupyterlab_3dmol

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "from evodiff.pretrained import MSA_OA_DM_MAXSUB\n", - "from evodiff.conditional_generation_msa import generate_scaffold_msa\n", - "import pandas as pd\n", - "\n", - "checkpoint = MSA_OA_DM_MAXSUB()\n", - "model, collater, tokenizer, scheme = checkpoint\n", - "\n", - "selection_type = 'MaxHamming'\n", - "mask_id = checkpoint[2].mask_id\n", - "pad_id = checkpoint[2].pad_id\n", - "\n", - "start_idx = [15, 51]\n", - "end_idx = [34, 70]\n", - "\n", - "num_seqs = 3 # Number of sequences generated per scaffold length\n", - "model_type = 'msa_oa_dm_maxsub'\n", - "pdb_code = '1prw'\n", - "data_top_dir = './' # Change this filepath to represent where this notebook exists for you locally\n", - "query_only_flag = True\n", - "\n", - "strings = []\n", - "start_idxs = []\n", - "end_idxs = []\n", - "seq_lengths = []\n", - "\n", - "for i in range(num_seqs): # no batching\n", - " string, new_start_idx, new_end_idx, seq_len = generate_scaffold_msa(model_type, model, pdb_code, start_idx, end_idx, data_top_dir, tokenizer, query_only = query_only_flag, device='cpu', mask = mask_id, pad = pad_id)\n", + "!{sys.executable} -m pip install py3Dmol\n", "\n", - " strings.append(string)\n", - " start_idxs.append(new_start_idx)\n", - " end_idxs.append(new_end_idx)\n", - " seq_lengths.append(seq_len)\n", + "import py3Dmol\n", "\n", + "view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')\n", + "view.addModel(open('scaffolding-pdbs/' + pdb + '.pdb','r').read(),'pdb')\n", "\n", - "save_df = pd.DataFrame(list(zip(strings, start_idxs, end_idxs, seq_lengths)), columns=['seqs', 'start_idxs', 'end_idxs', 'seq_lengths'])\n", - "save_df.to_csv('MSA_motif_df.csv', index=True)\n", + "view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':0.5,'max':0.9}}}) # as color is set to lDDT\n", + "# view.setStyle({'cartoon': {'color':'spectrum'}})\n", "\n", - "with open('MSA_generated_samples_string.csv', 'w') as f:\n", - " for _s in strings:\n", - " f.write(_s[0]+\"\\n\")\n", - "with open('MSA_generated_samples_string.fasta', 'w') as f:\n", - " for i, _s in enumerate(strings):\n", - " f.write(\">SEQUENCE_\" + str(i) + \"\\n\" + str(_s[0]) + \"\\n\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you would like to analyze the generated structure by comparing it to the original using the RMSD score, look at the [analysis/rmsd_analysis.py](https://github.com/microsoft/evodiff/blob/main/analysis/rmsd_analysis.py) script" + "view.zoomTo()\n", + "view.show()" ] } ], "metadata": { "kernelspec": { - "display_name": "msr_env", + "display_name": "omegafold", "language": "python", - "name": "python3" + "name": "omegafold" }, "language_info": { "codemirror_mode": { @@ -995,10 +928,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" - }, - "orig_nbformat": 4 + "version": "3.8.5" + } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/examples/example_files/bfd_uniclust_hits.a3m b/examples/example_files/bfd_uniclust_hits.a3m new file mode 100644 index 0000000..81f97b6 --- /dev/null +++ b/examples/example_files/bfd_uniclust_hits.a3m @@ -0,0 +1,2484 @@ +>query +MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEASVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL +>SRR5213595_18545 +TDIVSLLKFEEGFSATPYRDSEGYPTVAIGIRIGPKGAAMsNYSFTVPLSVAQIWtqqfvDDLMNQINTNQ----KYAGVRLAMGKCVEQAATTPAYQNPRCAVLLSMAYQMGLDGLAGFPNTLTLVDHGEWDKAADAMLQSKWAKQTPARAKRHSDQMRTGVWATEYXX +>tr|H3R9L3|H3R9L3_PANSE Uncharacterized protein OS=Pantoea stewartii subsp. stewartii DC283 GN=CKS_0330 PE=4 SV=1 +SQIIAILNFEEGYAEMPYLDTLGFPTVAGGIRIGPKGASLsNYVMRVPQRVGDVWkqcilENKITDMQSRA----PI---KLALE----------KCNEARTDVLMSMAYQLGVDGILQFKTCSTRLPRKILLRPVRPCS----TACGRGRHRAVSGVMPK--------- +>SRR5208282_1047185 +-----------XMTEIRYTKARCLQRLGEKRSM---PVPAR-------------VAGLSIRLLVASVPAMQRHKAARLILD---IPWLAGFkstQPVRFGVYQNLAFNMGAGGVMEFHHALADAQAGNWVQCAADMRASLWYTQVGARAERLCTQMETGVWQ----- +>ERR1711966_109761 +NIKNDII-REEGgLLLDPYQDHLGYWTIGCGHLIRDDE--RDELMKPI-TKERAKEIFVLDLGVS-----------IQDAET---FYKDMKIDDNVKECVIHMSFQMGLPRLNQFKKFKQALQNNDIETAIVEMKDSRWYNQTTNRANRLIEKMRKSLX------ +>SRR5689334_12075450 +-MARQLL-EEEEgRHTRTYYDHLGYVTGGIGHLLDPRR--GGFLPD------HIVD---------VLFDYDFTEK-SKQAAK---IPGFFQLNEVQRAVIISMIFQMGFEPLdgdghKDFAKMLRALSAGDVRAAAREGRVSKWWRE----------------------- +>SRR6266550_5984299 +AELLSALRRDEELRLVAYDDAtgkpwqpgealLGKLTLGYGRNLSDLY---PHYEAAD-WLSYPHLLVCTQDQAEQWLLEAATKACTELHDA---LPWTSTLDAPRLEVLQNMTYNMGLGGLLKFHNTLTHVQRGQFFIASQDMEESLWDKQTKERAARLVAQMRTGTRH----- +>tr|A0A0U4I9K2|A0A0U4I9K2_9VIRU Baseplate hub subunit and tail lysozyme OS=Vibrio phage vB_VmeM-32 OX=1775142 GN=VmeM32_00262 PE=4 SV=1 +-TLKQMLTVDCGYNNRAVFRDDHF-LIGIGHRIETTNA--lEVKEYLESKLESENDsviqLTIDDSLIDKLFSEDLKIAKSKVT--------YS-GNNARVDALTILEFH----GYAT-SPLTNLLLADKYRDVYETIPNESLF-------NRARVMLLTGDYVSYA-- +>tr|A0A0S7XVK3|A0A0S7XVK3_9COXI Lysozyme OS=Coxiella sp. DG_40 OX=1703354 GN=AMJ43_07805 PE=3 SV=1 +ETVKEDLKIYEGFKPYVYLCPAGLKTIGYGHNIESSPLTINGIRL-DKDYIIDGKHVITREEAEEVLEDDIKNSEDDSR---RLFSDFY--DYplNVRYSIISMVYQLGMPTVGNFRNMCAAIKNRKWKVACYEALNSKWHLQDTPERsLKVAFKFLDAID------ +>SRR5690606_32879420 +--------------------------XGYGFNlkaMPKEKLQRLFLDPMGLDWdkLIDGEQTLTEAQAEELLARRIAYDREII---SEIFPEIDDMPLDVQKTLHNIQYQV-TGGVAKWPNTMKLIKQGKWEEAGDAIVNSLWFKQTPTRAAEAAENLW---------- +>ThiBiot_750_plan_1041556.scaffolds.fasta_scaffold00283_13 # 13266 # 13883 # -1 # ID=283_13;partial=00;start_type=ATG;rbs_motif=GGxGG;rbs_spacer=3-4bp;gc_cont=0.748 +-QVYEEIAADEGKILHCYMCSENHKTVGIGHKVLPNDPESNLPVHGAYD-DVPEEEGITEERCYELFQNDVQIAIDGC---SAIYSNWENLPQEMRHILVNMCFQIGQGGLSKFENMNSAVEQEAWGMVSLEMMDSRWAQQTPERAARLRDRVLAV-------- +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold6971457_1 # 2 # 286 # 1 # ID=6971457_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.614 +-ALIEQLTEDEGYRQDVYECTNGFPTVGIGLAlkdLKPKISADLALEIILWQ-IDNNLIRLSLEHSQFILTTECPELHFQL---SERFSFYDDLPQMVQNVLLNMAFQIGPYGLSKFKKMLKAMEKADWKEASIEMLDSKWARRdSPARANRLADIVREH-------- +>GraSoiStandDraft_44_1057316.scaffolds.fasta_scaffold5460945_1 # 3 # 122 # -1 # ID=5460945_1;partial=10;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.750 +-ITYEKFSEMSG---IPYntlvKhvHKQRIPSQNYMKKyyqLTNGMVTPSDFFD----------IVEDSWEWQVTYERDFSKAYDDA---KKVLEGIdTDINPIAFSVIVEMVSQMGYEGVSKFVKLHDALKNKDYQKAAQEMLDSRWGKQTPKIAYTLAEKMRAA-------- +>SRR6056300_1069505 +-FLCQFIRRHEGVREQLYQDHLGYWTIGIGHLVDRRKGQWcddELARRLT-----QIGFRLSPRMVQAYLDDDIERAQSSARR--LYAPVWSQLTEARQAALIAMAFNLGHDGLAGFKNLRAAIIAQDWRQAQAEALDSRWAKQVPHRAHETAAILASGQPP----- +>JRHI01.1.fsa_nt_gi|693748561|gb|JRHI01010570.1|_3 # 953 # 2524 # 1 # ID=10570_3;partial=01;start_type=TTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.599 +-ELSKSVKLSEGFRNKIYQDTEGFDTIGWGHKVVVGDNFV-------------PDKEYTEEELQSVFDKDLSRAIAQA---KQLMTQNniDDLPETAQHVLAEMCFQLGQYGVQNFRNMWKCLQEANFIGASYERSYEimrlenfftEYKKQLIARQKQVEESITSGLCK----- +>ERR1719315_452918 +-KFKEDLLRHEGCVLSVYPDPvhgEAALTCGVGHLLQPGDKH----------YGKPVGTPLTEEEMTEYLEKDMDTAIAGA---NKL-YdTFDDLPGEAQLVIADMVFNMGFAGRTEQAGDQGHPGPVRQISPCS--------------------------------- +>SRR5690242_9409289 +----------TGEPIVPGYTVVGNPTLWWGLNVTKGQV-----------------PPIPQRIPDECLEGSVGALWLQLT---VRLPWLDEQPEDVQGALKQMIYNMGVPRALGFKRMWAALKNGDRETAAAEALDSEWHRNQKTakRAERVADLIRGHRX------ +>DeetaT_20_FD_contig_31_5445061_length_222_multi_3_in_0_out_0_1 # 3 # 221 # -1 # ID=1342804_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.603 +-LLKTELIHDEVMYLDPFNNARGNRAVGVSRDLdinpltpeeeavvghdgRS--------------------KSISRGAAAYLLNADITRTLADLD---RVLPWWRVFDEVRRRALINLAFSMGAQKLLGFREMLGWMRQGNYEGAATCLLNTAWAKQHIDRARRLSVMLRIGIVP----- +>APDOM4702015118_1054815.scaffolds.fasta_scaffold1111905_1 # 3 # 368 # -1 # ID=1111905_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.585 +EIAVSIIAAEEGYMAKPYFCSLGYPTVGIGLKIGNHGQPLEH-----------FEgLPsMPRDVAELWCGLYVKDVEDSFNSYPLILAAAHNCNEVQRAVLISMAYQLGTRGLSRFERMLGCCIKKDFEGAGAEMLDSLLAEQTPNRTGRQASMLHSGKLLPYYK- +>SRR5690606_15425057 +TKCLNVNATVMAAATSALRAAAGSTTVLI-----C--------------------AASLRRSFTLLLPAQLQRLATHI----AMIAVF---------------------------------------------------------------------------- +>APIni6443716594_1056825.scaffolds.fasta_scaffold11485768_1 # 3 # 212 # -1 # ID=11485768_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.648 +-KLQDEIADDEGVMYETYRCSLGHLTGGIGHLITEWDEDmY----------SGPVGTKIPHEQVDKWFNMDITRTIEDCK---IIFTt-FDSLPQDAQLVIANMCFQLGRPRLSKFKNLIAAVEDLDWAKMADEMEALEIALLGQMkianpyLDDELVPYLSD--------- +>SRR5689334_23905162 +----------------------------------RDG--------------------TVTGVETCALPIYISRVERQL---DVNLTWWRGMCDVRQVALASMCFQLGITg-LLAFKNSLQFLRNGQYELAANEAMDSRWAKRSEERRVGKETRSRWT-------- +>tr|H6X3N0|H6X3N0_9CAUD T4-like lyzozyme OS=Enterobacteria phage vB_KleM-RaK2 OX=1147094 GN=RaK2_00073 PE=4 SV=1 +KQFFTEVKEFEGTiayqsklgyfknnKFWVYKDSLGLDTIGYGHLVLKGENFK---------------NGLTEKEADALLLKDAKQAYNDAK---AIYQQFgMTAPESVQRVLWLMVFQMGKSKVLNFKKALAALGRADYKEAGRQMRDSLWYRQTTSRADKMAKIVESA-------- +>SRR5690349_23604373 +-NIYEQLERDEAKRNRPYRDSVGKLTIGVGHNL------D--------------DKPISDRAVRVILEDDVADARRDLQ---TALDRKSTRLNSSHVEIsyAVFCLKKKKQKK----------RDENT-------------------------------------- +>SRR5678815_375201 +-----------TYEQFPYTDKYGILRIGLGRDLINR--------------------GIPIVEAFQQLDDDLLYFDSKLE---QYLKFYRDLSDNRKAVLIKLCHEWGVKVLLEKTALMLALESHNYVEASKALSALGFDDL--------AY------------- +>SRR6185437_13929503 +-DMQSLILHYEGLRIWPYVDPTGHRTWGVGHNLDASPPATDVREL-------------LNPAAMLQLQHDLDAVLAAA----STDPGWPTLNDGRQAPIADMLFNRGTGAIATF-------------------------------------------------- +>SRR6185436_8101549 +-NIETQLCLHEGVKLRVYKDTKGYDTLGVGYNVAARGMDE-----FERIIGRKVvlsrtSDCITREEAMQVLGAVVARVEKAVV---LHFPTYLQLNEVRQRVVLDMAFNLGM-KALAFKACIADVEHSDWSGAAKELFRSEWAYQVDdgpggryGRADRLSYMLLTGKDV----- +>SRR6266516_1303342 +-RLMARLRAEEGMRSFRYLDSLGHATIGYGYLLEGHAVE-TMRALLGLSLTQaetIAagAQAITLEQAEKLLEYTAGSALTDAGD-VVGRETWASLPDDARLVLADMCFQLGDGGVRCFVHMLTAIRMGDFGRAAAEMRDSAWHKQTPGRCEQLAELMEHCAD------ +>SRR6185312_15576390 +------TFNNEAIRTQTYDDATGkpikpgdqifgTLTIGIGHTGPDVNgkpLMP--------------GDVWTEDKCETQFLHDYDIAIRGAKAIFhmefPYELW-DNIGEARQAVLADLIFNMGALRLSRFEHFRFAVENHNWEDAALELEwsdqartvHTPYYDKEPIRAAKNMYMLRVGDF------ +>SRR5574343_1105923 +--------------------IRDRDVTGVQNVCSSD--------------------LFSQVEIGKLQNDGWTQAHA---ermleheidlicAAFhr--LPWFAGLDDARKAAVIDITNNLGVDGFLEFRKTIAALREKDWEAAVHEMRHSKWAKQVPNRAAKR--------------- +>GraSoiStandDraft_44_1057316.scaffolds.fasta_scaffold4067822_1 # 179 # 241 # 1 # ID=4067822_1;partial=01;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp;gc_cont=0.556 +-NAINLIKRFEGCRFSPYRDAGGLWTIGYGHLIGDGKSLP-----------ANINRNFTEEEINALLIEDVARFERGLN-----MCLVVPVTQNQFDACISWVFNLGLAEFKk--YIAPCINGGDDPEEIVAEMVK---FHFVGKt----SLKGLVDRR------- +>UPI0002BF143A status=active +--LEELIMRHEGYRDVAYLCTTGHLSIGVGHKILP-----EEKF--------KKGVKYSKEQLMQVFRTDLANAKFFT---NLLVKEW-KLPEDAYNVVVSMIFQMGSAGVGKFKKFLVCLKAHDWIGCKYHGLDSKWAKQQTPErAEELMTIISE--------- +>tr|A0A1I1PVG5|A0A1I1PVG5_9GAMM Lysozyme OS=Pseudoalteromonas denitrificans DSM 6059 GN=SAMN02745724_03615 PE=4 SV=1 +-RALEQIKKHEGFNRHPYSNDSNTLSIGYGRKLDTL--------------------GISQEEAEVLLANDLSFLQNIIQI---SV-NTKKCNSPRMAALVALSYNLGFDGLMNFKRTLESVEKGSFDCISSELLHSHWARKAPMRAVELILQMETGKWQ----- +>SRR3990167_2922977 +MTIRDLIKQHEGYSAKVYDCPSGKKTIGWGHNLEVGTIPGTIKLYLDS------HGEITEDMAEKPLTNDIAHAIIAC---QSLFKDFDEIDEVRRAAFLDFVFNVGQGTAAKFKKAIEAINERDWDRAALEMKNSLWFDQVKTRGVRIVMMIQKGEW------ +>ADurb_Val_01_Slu_FD_contig_21_1432719_length_257_multi_3_in_0_out_0_1 # 3 # 257 # 1 # ID=74846_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.596 +LKESLMLRVEEGKRNKRYKDSLGYWTIGIGWNIDATPLPTEIAAFEQI------NGYITDDMVNQLLDIQITWKLADC---HKVWPAFDTFSENRQEALCDIMFNMGKGTILSFHHMNAWINqaDPDWEGAADELKytdgktkshLSHWYQEVHAIRgEEIYNLILNGX------- +>HubBroStandDraft_6_1064221.scaffolds.fasta_scaffold7534810_1 # 3 # 107 # -1 # ID=7534810_1;partial=10;start_type=GTG;rbs_motif=GGxGG;rbs_spacer=3-4bp;gc_cont=0.600 +------------KRDKVYNDSEGKPTIGIGHYLNGSEQDRNlfktL-----FGNTVDynkvlnGQQKLSSDQIEKLFNVDVKIKEKLAS---SKISNFNSLPVTVKNAIINALYRGDLG-----PKTIALMNSGKWDSVAKEYLDHKNAKSGPEQIKRRMntNAMAFAQF------ +>SRR3990167_2457687 +-RLHARTRRHEAVRRKLYFDSKGILTGGIGHALGHVGVLVEVQHAMLA--SGVMDRDLGLETIERWFLADMAEAEQGVRHLAmLRNVDFDALTDERQEVLVEMCFQMGVTRLGGFVKMWAALAAKIWTIAAHEGLDSKWARADSPkRATELMELMRGTEX------ +>ERR1712224_509045 +--------------------PKDNIQIGIKYSVDH--------------------LSEIFEEDFKIAFNGAKQLI---E---EHLPNlyTqglnQGDIEQIQGVLIEMIFQMGYPRVSKFKKMIKALNEGKFSTAADEMLDSRWHKQTPARAIELSTIIRNIX------- +>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold7797389_1 # 123 # 248 # -1 # ID=7797389_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.690 +-KVKDMLIRHEGLMCTLYECTAEppRTSIGVGRNLSD--------------------NGITEDEAMYLLENDIDRVIKNLD---KHWAVWRSFPEKAQMVCIDLVFNLGINGFMNFRRTRALMEMGMWLEASEELLDSRY-hVQLPNRSSYNSRQLALCNKD----- +>SRR5690348_17975525 +-------PCTDPPTTTIYtlSLHDALPIFGVGHNLRV-PLS--------------------ARAIRTILEDDVAEAIRFLDT---SIGWWRDQPEPIQEVMLNMAFNLGDR-LLQRSEEHTSELQSPVHLVCRLLLE----------------------------- +>SRR5689334_21689787 +------------------------------------------------------GGKLDRDIIYQILSRDISK---HT---GDLYSVYPWAAeldEPRRAVLICMCFQLGITALSQFVQAMKYMQNGEYTAASLAFLDSKVAReQAPVRW--KrfAEQIKTGEWI----- +>SRR3546814_18799144 +--ATKHLNREEGRIPHAYQDSLGYWTIGVGRLIDKR-----------------KGGRLTNVEIDMLLANDIAD---KI---AEISDWPAWQAvkadPVRAtaLL--SMAFQMG-aaglAGVKKSL---KLGEQKRWGEEERQRIGVGKSGA---------GRGRTGGRR----- +>SRR3546814_5143479 +----------------------FTFIILFGVCIGVY---------------------------------CIWC---IV---VLV--RFCFF-fsSRRRHTRCALVtgvqtCALP-ICLAGFKNSLKLVEQKRWAEAAANMMLSKWAKQTPERA--ArvTKMIETGAYQ----- +>SRR3546814_7831959 +--ATKHLNREEGRIPHAYQD----------RSEEH----------------------------------------TSE-----------LQSLMRISYADFCL-NKKTpnntiVQCISINHT-------------------------HPRTM----------------- +>SRR5690349_24524792 +-----------------------------------------------------LFLLLPPPHSTLFPYTTLFR---AT---ATARANFEWFDaldPVRQDIIVMLIFNLGVNGLNAFHLMLQAFSEHAWHEAAFQLANSLRGRkQIGler--K--RaeCNAIELRS------- +>HubBroStandDraft_1064217.scaffolds.fasta_scaffold07224_1 # 2 # 547 # -1 # ID=7224_1;partial=10;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.714 +--GKELTKFHEGERLTAy-QDTRGFWTIGVGHLLPRPRSP------------EWKGYKITQAESDRLFNDDWHY---HI---SLVAQYAPWAMqfdEVRRYVIVDMTFNLGVepfdgDGFKDWPMFVAQLKARNWPAAAANMRSTLWASQVKGRA--QrlARMIETGTWP----- +>SRR5687767_14603425 +--LDHDIAVaeapGGKAVLRAYKDSVGVWTIGFGTNLQE--------------------LRIDEGQAYRWLREKRDEAEREAS----RFRWYARLNGTQQRAVVEMIYNLGLTRFLGFTQMIAALDVGDFETAKREAVASKWYRDVGPaRGDRIARMLLGKX------- +>MudIll2142460700_1097286.scaffolds.fasta_scaffold1440245_1 # 2 # 568 # -1 # ID=1440245_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.492 +-HFLDKLIEHEGIVLNVYKDSLGIDTVGIGRNLKDRGISKEELDYMDIpNINVVYQQGISEADARYLAMNDIKIVENELV---KVHRCVEDLDSVRQLVLMDMAFNMGVPRLCKFKNMWNAIHEQNYEAASWEMLDSKWARQVGRRATILSDAMKAGEF------ +>EndMetStandDraft_3_1072993.scaffolds.fasta_scaffold5684806_1 # 2 # 235 # -1 # ID=5684806_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.667 +--LRCQLIKHEGLATKSYLDTTGLLHGGIGHLMRANEIPMY-----------PLGSPISNAQIETWFVQDSTSAI--KIGQELMGTTWSELSDVRKRAVVDLSYNLGKARLAQFTLFLSAMRAKNFTAAGNELRNSIWFSQVGRRGPAVVSMVSTGVD------ +>SRR5690606_24066600 +--LVDDLIADEGFETISYRDTKNKWTIGVGHLLGVGPEYA----------------NIRWTERKVIVTlmQDINSSIFYARQ--RVPNF-DYWNENRQRVLVNMMFNLGPTKFSGFIEMnkall---NNDVKW-------------------------------------- +>SRR6056300_1865798 +--KKKKpKLTEKVYKVYVTHYTDGFFYIGFTSKSGKS---L------ES----YFGSntikdkLVSHKDIVFTSKSKATAKLFELL---LQLSR------LDSSWCVNSMLNVrvRKEHMKDLPRFKLTFEDDKYNNKDKQX------------------------------- +>tagenome__1003787_1003787.scaffolds.fasta_scaffold9056604_1 # 1 # 225 # 1 # ID=9056604_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.533 +--ICAQmPLIPKGIAYS---AQFEAQIINKGYTMGRGGYRP------NA----GRKAqtpeerlltkQLSTIDKLKKLKLDPIDILNKEL---KALKGkEDSKSQNLRVRIAEKLLEYGYQ---KQPT-----SIHQTGNSDMPILTIVQKSQTVDekVVKPIIDEIPSENT------ +>SRR5260221_640902 +MDVVEWISQHEALRLFVYDDKTGhavvpgytlvgHPTIGYGRALDTH-------------------G-LLPAEAELLRANNIADCRADLL---AifGAERLVAFGEPRQGVLTDLRFNLGGAGFRSFHQTITALCNRQWETAANYLLDSALARE-LPeRI------------------ +>SRR5690554_705451 +-ILKEWVMHYHPFTQHVRKTLDNELIIGFGRNLTTQ--------------------GIAFDEAETLLVHDLHVLKKQLRK----FVWYIDQPEQIKNALIHLAYSMGLNKLLQEKELLRFLKNHDYTQASLTLLESNWGR------------------------ +>SRR5690554_5056061 +DKLRSEIQSDESWRGLAYDDAtgktlrkgdtlQGYVTAGWGFCLDGDR-----------------GRPMPQHIGDAWLDWLLDQVEAEM---RRRWPAFDRQPGGVQRGILNMAYQMGEGGAMGFRNMIAALEAGDRARAAKECLDSKYARQTPARARRVAALIRGATD------ +>SRR5437762_11341177 +GTLQDFIAKHEGYRDHVYLDSRGFPTAGIGHLLAGSHYHV--------------GEKISAQQITEWFKEDVAKAIAGAKR--DVGPAYDRLDEARKMVVIDMVFNLGEGGFGGFHATIHAIASVDLAICARGFALRVHRDRAHA-------------------- +>ERR1039458_3832459 +-AIVGMRYDA------------TEIAQGITEV---------------------------LAYGTAVVVEPAK---------------QGGTQX----------------------------------------------------------------------- +>GraSoiStandDraft_40_1057318.scaffolds.fasta_scaffold2545988_2 # 153 # 290 # -1 # ID=2545988_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.717 +HMIENIIEREEGFRPYPYLCSEGFVTIGYGTKLHKSKGMSPS----------GFTLKLTKSSAYSLLEDEIKRLKLSIAT-TSRGEAFSYLSTERKAIILSMAYQLGVTGMLHFVNMWDAIESSNFTKASEEMLDSKWAKQTQGRAERHALVMRSNSIHPY--- +>tr|A0A1Y3MSK5|A0A1Y3MSK5_PIRSE Glycoside hydrolase family 24 protein OS=Piromyces sp. (strain E2) GN=PIROE2DRAFT_64657 PE=4 SV=1 +DFATTLLMFEEGVPKNgictPFKCPAGYPTVGYGHKccdytvssDAQAYSPCeKFISSCR------------VDNGKSLLNNDIKNHLESMKSYNKLWKAYSNASNKRKAVIISMVTQMGPYGASLFEPTLDLMISGNWKAAASKMLDSNWARYqAPNRALRHSYVIEHGDCNRNKNY +>tr|A0A1Y1UZV2|A0A1Y1UZV2_9FUNG Uncharacterized protein (Fragment) OS=Piromyces finnis GN=BCR36DRAFT_215520 PE=4 SV=1 +-----LIKNEENTENLnactPYINYKNLPLIGYGKLcsenkvssNDDLNEQCkDLIDTCT------------QDNAINWLYRDIDKAINCIQECTLCKKAYNTCSIERQSILISLAHSMGCEDFKNLE-IFNDIINKNWENIDLKSLNSNWSEKNYNRAIKHLYVLRKNDCCN---- +>SRR5439155_11164694 +-------IAAEAL---PR--RQAQAHDRSWPDLDD--------------------AGITRAEALMLLDNDIATVRRDVT---RAFPWFPGLDPVRKDVALDVVFNLRLPGFRRFEKTIASIRAGDWENAAREMLRSRWASP----------------------- +>SRR6185312_7360557 +-----RIGDDEGDDLKAYRDSLGIETIGVGFNLTRSDARDAL-----AKCGVtDVdgvmnGtTALTPAQDAALFEYSFAPIESEAR--ASLAtGIYDSMTDARRFTICDLVYNLGNAGWNDFTNTRALRNeaqaaknagqasaHALFVLAAEHLEESDWYNQVGLRAKRNVAMIRSGVW------ +>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold244441_2 # 797 # 1567 # -1 # ID=244441_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.625 +---LKVFTKDENNILHPYKDHLGNWTIGIGHLIGGNL--E--------------NFRITDRISKLLFRKDLLEALDDAHFI-FGKEFFEGLAVGRKAAILTLCFGLGRAKLLTFHHTVPAIKNEDWEAASNYILSTKWAVDVDPkkrhgigRDDRVAYMLRTGLLH----- +>tr|A0A167IM75|A0A167IM75_9GAMM Uncharacterized protein OS=Pseudoalteromonas luteoviolacea S4060-1 GN=N478_08320 PE=4 SV=1 +-ALKVQLICHEGLTCVPMENDAGELTIGVGRNLSQT--------------------GISETEAEQMLEHDLAELLKNIE---EELPVFRQLSEVRKLVLMNIAFSIKIDGLKALKKLLAALCIEDFTLAANEILHCHTIPGGTDRKAELSMMMKAG-------- +>GraSoiStandDraft_16_1057320.scaffolds.fasta_scaffold3274387_1 # 1 # 342 # -1 # ID=3274387_1;partial=10;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.687 +-LLKNRIKKNEGYKSFAYFDQLGFPTIGYGHLIKPNEKIF-------------FKQKFSKKFLLNIFNLDFNETVMQYEKN----YHKYNFSNNIRDVLIEMIFQLGISGQKKFIKMNEYMKKKQVFMASLEMINSLWYNQTPKRVDYLINILLKRH------- +>ERR1711935_488013 +----------------------------FGHVIRYPDTEL------DQ----PIGSTVSQSRVYEALDADVERTIVDVV---ARYSDFQFYKLDLQMILVVMCHEVGKDRFSQYQDFERAIKRYDWRNSARSGRLTNWYSNNRDRGERLMKILEGLX------- +>SRR5258708_6444378 +-------------------------------------------------------YGVLRSEGDILYQNNKIDAQADLQ--IIFGVArWASFGLPRQGILTDIWYEVGGAGFREFHKMITAACNGAWETAAAELLNSKLAREDSPdRQRANAGVLRS--------- +>SRR5574343_949913 +-LFDSRIKDFEGErrkengNHELYTCPAGHLTIGWGYNIEANG--------------------ITDRIAELLLKYSRDIAKFELDS---HIPWWRKMPVRAQAVMLDMCFNMGWgngnRGLSSLRTFLDAMHAGDYDLAADYVWNptksaseqrYKYSRDVGErRAGANARLLREAV------- +>ERR1719471_1265520 +-YWILQLKVHEGYCENIYKDSKGYLTFGIGHLITSSDPEY----------GEPCGTAVSESRVLSAFDSDVAGFEG---DYYILYPDFDDQPDFVQSVVGDMMFNLGLGGLSDFVGMKACVDAKDYQCAADEMIDSAWCGQVGRRCDKLSSMMRNEDYHhGY--- +>SRR5258708_8604862 +-----------GFQPHVYNDTAdpTNPTVGIGFNLNRPDAATAL-SGVGADYAKVRTGaaDLNELQANTLFGPDVATARSQA---ASYFSDIASLVPARQDVLIDMAFNIKPAKFLAFTNLPAPLAPPPYPPAPSAILTRKRAPP----------------------- +>APDOM4702015248_1054824.scaffolds.fasta_scaffold2587352_1 # 1 # 219 # 1 # ID=2587352_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.516 +-AIFKQLVKHEGYKKHIYLDKLNIPTIGIGFNLNDRGNQKIL-AKYGITQRHLQQ-GITDAEIKELFDETLKIATANA---KRFAPNLDSLPINAQLAIIDLSFNLGSEKLAKFKVLRQALAKKDFIAAAAALKDSKWYHQVGNRGIDLVNQLRSASS------ +>LKMJ01.1.fsa_nt_gi|998604844|gb|LKMJ01004907.1|_10 # 10700 # 12619 # -1 # ID=4907_10;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.582 +--AIIIIQEEETFKDKAYDDArpnydlkpgdkiIGTLTIGYGHTNAARDDDEII----------KIGDTVTKEEAVEILKKDLQQYVDIVN--NRMKSFD---VELTQEQFDGLVF-------ATMNRPEKMSGGALWRAIGSgdeDKIRKEWSEtiseavkDFPgleDRKEQELELFFSTpdkpevevqdpdrgiptpsetfvpgSLP----- +>SRR3990167_9237955 +-SIEDMLVRQECEVLHAYADSKGYLTIGVGQLIDPKV-----------------GGKISQAASRFMLHEAIQDsVHF-L----KSYPWYEELNNPMKTAMISMMYTLGPSRFACFKNMIQALKDRDYERASNE-------------------------------- +>OpeIllAssembly_1097287.scaffolds.fasta_scaffold2528050_1 # 3 # 239 # -1 # ID=2528050_1;partial=10;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.616 +--LLEVIKKEegSKmqdGKHIPYRCSENKLTLGYGLLID----PDV-----PG----AG---ITDAQAEMLLETSVNQFLVELHNR---LPWYKNQPEQIQIALGNMAYQLGVPKLLQFKKTLDHIENGRYAMAAAECKNSQWFHQTPNRCERVAEVFNnysKGE------- +>SRR5574343_330317 +--ALNHLIHEEGIRDSVYKDGRGIDTIGIGHNLKANPICK--------------------GSIKAQFLCNLNMAEEGAKKFVGNDKIWASMGEYRRLAIVSMVFQMGIGELMTWHKTREAIQQQNWKLASTRASQSKWAKDsfrgTPSRARRVCAMLADNKLV----- +>SRR6185312_2307017 +----------------------------LRLRARRQPPDP--------------------GREPPDPRLPG---GGRRREDPGRPPVGEHLDEPRQRALSDKAYNLGAHGLESFATFLGLVESGAYDAAADDLETTAWYKEvgsrG----IRLAGVIRNGX------- +>ERR1719481_1387761 +--FNSNHVNILGRTDPFLKAPAHieiFNSTSRSQTLILIRT----------LISIMLLSLEELFNMEAYLTADLQIAVSESRR--LYSN-FDFLPSEVQLIIANMMFNMGYSRLSQFRKMKAAVDSRDWRKAADEMVDSRWYSQTGRRARDLVARMRR--------- +>ERR1719450_1722921 +--YQQDLKRFEGVKDEVYKDHLGYKTCCVGHLLVRGDR----------YYGSPVGTMVPAWHRDAYLRTGTTGVQWEQWC--Q----HGREMRTLGQVLRESSGNNG--------------------------------------------------------- +>BarGraNGADG00212_2_1021979.scaffolds.fasta_scaffold94684_2 # 673 # 825 # 1 # ID=94684_2;partial=01;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.693 +--LKSDlarmADYYESREEKVYPDPVTgmkDPSIGVGFNLnHPQADSLLR---QIGTSKQALLNGGTLNDKQIDflLDKTLDTAIQDIRS--LVPG-FDSLPEPAQFALADMSFNLGKPRLMKFEKMLTAVNNGDLNTAADEMVDSLWYKQTGRRAVENVKRVKQ--------- +>GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold809940_2 # 62 # 598 # -1 # ID=809940_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.676 +-SLKYCVERNEARRHRAYFDSENNPTIGIGFNLRRKDARQKIEAlGVNYDQACNQQIELTDDQIDTLLIADLMAATDDA---SALFPNFHNLNTARQIILVDMAFNLGKNRLSGFRKMIAAVTAEDWEEAANQMIGSAWYHQVKSRGERNVEVMRTGELV----- +>APGre2960657505_1045072.scaffolds.fasta_scaffold11549_2 # 1161 # 1685 # -1 # ID=11549_2;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.499 +-VYYNMLSTNEGVRDYIYFDTEGNPTTGIGFNLNAAHNQKFLKDnNIDKQSLRmvnkdgktiRKGRNLTENEKLLMYNHSLRQAFKDA---RLYDPRFDRRPESVKMGLVDMAFNLGLTKLKKFVKMKKGLDANDYTTVAKEAEKSDWFKQVKSRGPRTVGLFKKAIYP----- +>SRR5512145_596724 +--------------------TKGILTGGYGHQILPHedLSF---------------LTKLTKHEKLeywtKVLAKDLSTATFDANW--LASNWAYRPNEIQLEVLIELCFNLGLTRLRGFKQFLLHFSKGRIKEAARELIDSKWHRDFVKwnsgkdtpeiRSRRLEKKLL---------- +>ERR1711998_651366 +--DYAAEKVNEGYCPHRYKDTAHIWTIGVGFNLESNPHSLitecggSYDAIMQG--PDccsctTAGQTLTDSTINCLFQKSIASTRTCG---PSLIKGWNSLPAGPKSAITDMAYNMGCGTLATFHGTLGSVERHDFAAATNGMRNSAWCGQVHGRCDRDIACMKSAGPS----- +>SRR5207253_533090 +------------------------------------------------------------TVANELLQEDLDSAITDAKS---IFgAAWLAWSVNRRLAITNLLFAMGKTEFLTFHHTIAHIKAGDWKSAAADVLVSEWAKTVGERARRVSVLMQDED------- +>GraSoiStandDraft_25_1057303.scaffolds.fasta_scaffold1469891_1 # 2 # 409 # 1 # ID=1469891_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.620 +--TRSYVQGNEGFKKYVYKDTRGFLSMGYGHKLTAEEKKkyK-----L--------GDRVDEKLLEDYWEKDWTTHYNAAKS---I-EGYDKLSLQQKVAIIDLTFNMGVNWVTKFPNLIKNIKQAGLAEndimkelyisnaanelkyknyKENNLEPSKYWGQVKGRAIRNYQLLLNDYF------ +>ERR1719218_536531 +-QCASYIKQNEGYVPQYYTDSRGYGTICWGH-LYTQGHSG---------------ETFTQSQCQSFFNSDYATAYSGASGTRGP---PCPGL---CC--------------------------DQWQVPAGX-------------------------------- +>ERR1719498_220616 +-VLATSSKTKD-TFPNITLTPEDMEPFAGDI-FTLKATPV---------------KPSLNPNAKASSTLTTPPLTVVLAASSAT---STALTAPDNVsLLIX--------------------------------------------------------------- +>ADurb_Gel_02_Slu_FD_contig_31_2593152_length_206_multi_2_in_0_out_0_1 # 2 # 205 # 1 # ID=260422_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.569 +-ATKVQIRDFDNHIQVFYKDKKGVLHGGIGHKLTEAEKKkyK-------------VGDPVSQGQMDSWFNQDSMKAWKSAG---LKGRKLGA--LKLQSAIAPLDFQLGENWNKEHKKTWALLEQRDYAGAAKEAEDSKWFTQTEIRVRDFQDEIRK--------- +>SRR5574344_122520 +SRLLEYIKTHEGYRAKMYYCPAGYPTIGYGHLIRSHEQH-------------LCYTSLSKEQADELLKKDFNKAIQFALKLSTEL----ELNSPQLLAVSHFIFGHGCfnyarSTLRKE------VKNGNKLGVSRE--FRRWITYKNPKGEIVISANA---------- +>SRR5258705_241217 +---------------------XMFNRIAFtnyKTTLAG----------------------------------------V-----AAILTVVAKIATT-GHVDLTTDLSAIVAGVAQFRDMGRAIAvavqtsdaeirQLSFTAAANEMRDSKWARhDSPARAERRAREMTTGTL------ +>SRR5579859_523806 +--LVNDIKEAEGSKDRPYKDTEGYWTGGVGHFLTPQDDGHwSAFLT-------VPTNRFDQSTIDNWLVSDLATAMNEGR---TLMEWNSLDCDARQNAVIELVFNMGLETWRQFAQTRLAILHKEWQMAYNGLLDSKWAKEVqphgfdqPGRATRIATYILTGRF------ +>SRR2546429_2361992 +--PR--------STLFPY-TTLFRSTVGYGRLMT-QSG-LnIGF-------------TCTQEQADRWLAEDLATAEHLAS---ALLEWSSLDTAARQNALEERSEEHTSELQSRLHLVCRLLLEKK--------------------------------------- +>SRR5690349_16116891 +MDGLTYTRDNEGCELKAYPDpitKGEPWTIFFGHTGPEVKP----------------GLVGTREQAIETSKt-DYTRAARSAV---DFVG--FGYDDVRCAAIVDIIYNIGAEHFSKFATFIQLFTDREFTLAACDLEQ--TLYA----------------------- +>SRR5947207_3199466 +--AKAQLQVDEGRKEKFYLDSLGIPTIGIGRNLRDVGLR--------------------PDAIDCWFENDRtaA--E--A-IAKALVPSFEHLSDNRKAVLKARHESLSDSLGCG-----QVVLEAVVDLVRPQ---------TNV-----A-Q------------ +>tr|A0A1X7TH95|A0A1X7TH95_AMPQE Uncharacterized protein OS=Amphimedon queenslandica OX=400682 PE=4 SV=1 +-----REKVNEGYYECLHTDPRtGLSYLGVGFNLYGYEADTQL-----GKVGANYinimngTECLNDTQIKELFKLSMADALECAS---NFLYkSWSPLTLNAKSAIIDMAFDYHyigcypvfpSRGLQDFVNLRSALseKPPNYDKAGSILKDSTWCGQEPKRCQEAIDCI----------- +>SRR6056297_272191 +-KLIEHVKKYEGFKAKPYVDTVGKWTVGYGRNIEDNPLSVnEVVELFQ-HI--SWdSLRSAEVWAEDLMAYDLANVQEELQRN---LVFFNGLHDYEKLVLMDLGFNVGVPTLLKFKGMLHALDNDDSITASYELMNSRYAVQTKRRAAANARILANndSNFN----- +>SRR3990167_4437453 +-TVAGDPKLTcsCGcgmLPVQSFMDKIESLRIAFGKPMKVAS-----------------AARC-PEYnakvsatksrtgphttgrAI-----------------------DLAVTHAEAHQLLVLACQWGFTGLGVNQKGGGRfLHLDDLPNAPGQPRPHLWSyRSEERRV------------------ +>SRR6185312_6879638 +--LLADLRRDEGLRLRAYPDPLSggePWTIGYGHTGSDVHP----------------STVWTPEEAETSLEQDAAHACDLLD---RYAHWWRFLDPVRQDAMATSAST--WAGspete-----STAWGPSIIPWQRSnctSGRKRTTAFSPAHGPRKSASA----PNAS------ +>SRR3984957_20766028 +TNIIDQLKRDEGEKSSAYDDADgapisagtmvkGFVTVGIGTCIDASRG-----------------CGLTDAEMENLAVNRITLAAADIR---VHFRCTSTMDGGRFGVLMNMCYHFGIEGR----------------------------------------------------- +>SRR3546814_13167229 +----------------------------------------------------------------------IADKSADITD----WPAWQavKADPVPATALLSIASQTGAAGTEGLRNHPKLVGPKGLAGTCANTTPRKRCKQQHGAPARPTEQDGDRDYT----- +>ERR1700674_5042571 +-------------------------------------------------------------EADIMEASDIAQLEEDLV---PLLPWIPKLSTGRQVAVYSLYFNVALGNTQKFIGphgwptFLAQMAAGEFEAAAQNLRTSKpWADEVGPRSGRLADLVLYGX------- +>SRR4051812_16536447 +----------------PYLDTRGYWTIGRGNRFILG-ePVT---------------AATRPLaneaAANQLMYGAIYAACVDAQ---FWYDKLDELPAQKAEVIVEMCYQLGLKTMEAFKPTHDLLAAGDYAGVATHMRNSLWYRQTPARVE----------------- +>GraSoiStandDraft_29_1057270.scaffolds.fasta_scaffold1972517_1 # 1 # 495 # -1 # ID=1972517_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.578 +-----ELAELEGLRLEVYADRDGLA-AGVGRNVTRSGMKA--------------GDTISKEQAEQWFEEDTNAALGVGKR--AAAELGVRDTT-AVMALAGAAYQLGEAGWREHKRTAEAIANRDYNAFLQEVRSSKWAEQTPKRAEWFISRMAG--------- +>SRR5229473_3359192 +-LAVPMLMRHEGFVSTVYKDSKGILTIGYGRNLETTG--------------------VSRAEAAAMLQQDAIEARDHCAS----YSYFDALRNARKAALIDLCFNVGRLRYSDFVRMHEHLKRSEWWEAGEEIVQSKAHLQEPARIRELATILRTGLL------ +>SRR6266851_3279499 +-EYLDRVTVSEGIRRTKYLDTRGYETIGIGHRLDRPLP---------------------LSVCDFLLVEDSLAIEREVL---DALPWVAHLDDVRRWTIIELAFNMelgapgGKHGLLSFGppnsKTLTEFELGHWYNAAAGFRNSRWSDEvGPDRTMRICQQIETGEWS----- +>SRR6185436_7246926 +-RILKSLKDEEGFRAFLYDDATGLPIKRGSIVKGNPT------------IGYGWnvtTSPMSESEASMVLGNRIVLAAKDA---ASLVPNWVTLNETRQDVLIDMAFNLGRNGLFAFKNMLRAVNEGRYDEAADEMMDSAWFIQVGKRGPLLVDRMRSGSEA----- +>SRR5574341_283695 +-HLIPRLKREEGFSEHVYLDTTSNLTIGYGHNLAHLRVVEaGGVEVEGGRVELRPVNGISANVAELLLVGEVMNIMSTLGE---RLPWFVSMDDVRQEVLTDLCFNIGLGI-LRYKVFMRQLASGDYDGAASNMRGWRWYRQVHaHRADPLIRMMRTGVR------ +>SRR6186713_2311099 +----------------------------------RLVM--pPGVKIN--GVRLVPVGSIPENICELLLLGKVEEAQHTLST---SLPWFDSMAEVRREVLTELVYNIGSSV-LEYDQFMAQLKAREYKAAAQNMRGWLWYAQVHaARAEPLCRQMESGVR------ +>SRR4051812_41040902 +-WAKMLICEDEGERLLVYDSAtgkpikagytlVGNPTIGYGRDLALR--------------------GITPAEMLVMLHGDLAVAERAVRM--FLgPTWE-RSSNIRRAVLMDMAHDLGAAQLVSFCALSVALLAGDWEE------------------------------------ +>SRR5579883_2509280 +-SLDELLILEEGERFVPYDDMtgeavpvggvcLGTLTAGVGHTGSDVKP-----------------GDYWTRdRSRARGQGswPRLFQGARPGA---PsGARKHGLRHGRQGPRRLPPYAD-GRA--------------------VGCLAGRAX------------------------- +>SRR6185369_1844050 +---------------------NgsdigpgslvqGHPTLAIGRRVDL---------------------PLSDAAIDCLLEEDIQDRCDALD---IALPWWTTLADPQHRQLVELTCALGLQGILLFTPMLSTMRNNHGAVAAPLAADS---------------------------- +>SRR5579872_1766917 +-YLRADLERDEGKAKldpatgnyLAYKDSRGFWTIYVGHNVSADPGML-------SILDILVTRGITVAQGEARLDRDIASVEARL---DKDLPWWRQLSDIRQDVMVNLAFNIGEGKMLLWKHTLGDIQAGRFVAAEIDLEnDEPWASQVHDRSKRMALQMETDQHQ----- +>ERR1700747_3595665 +------------------------------ctgpDIV-------------S-------GTVWTEDQADQEFTARYAQAAQaaeNLLT----DQYWPQLSEPRQAAVTDMVYQMGTTGVSKFFHMLMTLMCGAYGVAAGQCLASTYAAQTPACANRHASMLASGTWPA---- +>ERR1035441_2774292 +----------------------------RHTRCLSDWSSDvC---SSD--LLAVMtGAAITLDLANAIFDLQYNAVAGEAR---TAVPGIDQDPDNAGAVVCDMIFEMGLAGFLAFHNTIAALVARNWPAAISGMKASKWATQVPAREENEVALLEAL-------- +>ERR1700735_776344 +--AVDLIKAHEGLRLVAYHDNDGSLTIGYGQHFDSLTLD-qRARLTAE--YGSVAdAAlHIRYETANWMIEERAAALEAWAT---VSIAGFSLLDETRQAVLIDMAYELGEgrpgkSGLLGFPHFLHAVEQGDWKQAVVEMDSSHWAKQVPAREQNDRALLLEA-------- +>SRR5215469_8584667 +-TTEALIRKHEGVEYVAYDDSEGIRTIGIGFNLQKEGAQRRIgALGLD--YDAVFsgSCTLTDQHCSALFSVDLDDAIEQA---SGIVSNFSLQPDDVQSVIVDMVYNLGAAGFQKFTKAIAAFEDKDYCTAAAELQNSLWARQVPSRAKEDISMVKAFCN------ +>SRR3990167_7034803 +-QTAGKLKRDEGRNPKPYQDTRGIWTVGYGTNLTSGSL--------------------SEAAVTQMLMDRLQDVETACL---ALPVW-KDLSESRKGVLLTIGYHVGSEGLLRFRRMLQALATRDYPRAAAEILDSDAAREGGAPPDRLPQQKGGKTT------ +>AP41_2_1055478.scaffolds.fasta_scaffold37245_2 # 783 # 1481 # -1 # ID=37245_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.342 +---LDRIAFGEGADPSKLK---------VQEELGIGSTPYDMVYAYGK--TLAPSKPVSEMTMREVFEFQTDLIN----------------ATKGKVKGTSL-------------------------------------------------------------- +>ERR1035437_5668736 +ESLEQRITRHECIggkpNLKPYVDTVGKITIGIGHNLTDNG--------------------LTLAQVEDIFQSDVHEAQADLQ---RVLPWVASLTPLRQEVFVELVFNLEIKNLANFFFFLIGPERGVGQTPPPPFX------------------------------ +>SRR3990167_7722580 +--------------------ALGAIpeIIENPKNKEDEG--------------------IInysRNELWLRLGGP-------DS---TLIHYLHHIGTSSALGYEATAL--------TKEFEQTLVESARWGYPPPDFICRAHRHRHFKM------------------- +>SRR5574343_1351595 +-----------------------PNgaAVGKPKPEED----------------------PInnhVAISDTHCGSK-------RG---LCVPWKVRLPESSGAFYLASQH--------QKRLWKQ------WNF------------------------------------ +>SRR5574343_561521 +----------------------------XMKPKKQ----------------------TVkhiVVISDLHSGDR-------NA---LCPA---RIRMTDGGYYAPTGI--------QSKLLET------WTL------------------------------------ +>ERR1700677_1013875 +--LEQRIIEHEGIKKSVYQDSMGFWTIGIGFLCDEKKN-----------------AGLSVDECMMILRSRLDKLDKQLSQ----YNWYTVQDVVRKGVLQELAYNIGVTGLLEFKTFLAFMAARRCSEAALDLKNTKWATQVsQNRVNDMLKRIREGTYG----- +>SRR6478609_384520 +-------------KFYPYKDMKGNYTIGYGHYLGKKESDAN-----------KYKNGITEYQAKKILKADMKRTYDDFTLL-LQRKNAVNLTKDQQRILYDMAYSMGVDKLDKFTRLWKAVQHGNDRKFKKEIEGSLWFKQVGNRAEILLS------------- +>OrbTnscriptome_2_FD_contig_31_275924_length_522_multi_4_in_0_out_0_1 # 3 # 521 # -1 # ID=451231_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.590 +-KLIKALKVNEGFRGNTYLDTSGVPTVGYGTTRQSPGAV-EYINSIgmspDNIWGPKAVGTVTEPQGLAMMNKALDHNAGLLK---SRYPGYPQFSENQRLALQDMMYQMGPKGLGKFTNMNKAINvqdpsKVDWNSVATHAADSKWAKtQTPNRAKRVISLFN---------- +>SRR3972149_2936869 +-LLVEQLKKDEGYSEWSYADT-DQHSWGYGTKAPGP------------------DQYIGKEQADVELWQRAALAIRDFY---RLFQYaNTEIDEVRQLALGNMCYNLGLTKIMKFRDMLQAIQAGNWLEVSAQGREPNMTKTML--------------------- +>tr|A0A066ZM01|A0A066ZM01_HYDMR Lysozyme OS=Hydrogenovibrio marinus GN=EI16_00545 PE=3 SV=1 +DEVLDSVTQHEGFREKPYIDPLVMREIPHTEKVIILKWFD------RLKITFGYgMTFITEEQARMCTAMMLYDIRHELA---RRKSFFTSMPIKAQDIFVEMAYQMGIEDLMDFKNTWRYAELGQWENAANEMLDSKWAReQTPDRAKQLSDKLAA--------- +>SRR5574343_1436497 +--AKALIKSDEGedknpgdGIAEPYLCTAGEWTWVWGININNGLDQADTLLVL--------REGANFATADKILDRRIE---AAYQNCRKIFPRFDLFTDNRQVAFINMMYQLGYRNFYGFHGMIDHALNGRWPQAADAALDSKWAKnDSPNRAQRIAAMIREGRS------ +>JI10StandDraft_1071094.scaffolds.fasta_scaffold2471112_1 # 163 # 465 # 1 # ID=2471112_1;partial=01;start_type=ATG;rbs_motif=TAAAAA;rbs_spacer=9bp;gc_cont=0.472 +---GGLFKgignIATGKTwggkPTVPGGGFNmgplADGDKYGSFLNqPGG---------------------AGAQKRGLFDGLGKGLLAAAPD----IGALIGGDKGSRIGQIVQGFgsQFGGGEKPGFRDVLGAV-----LPIASEFMS-----------PKMSNIIGNVvgigdmF------- +>APPan5920702963_1055757.scaffolds.fasta_scaffold207099_2 # 97 # 519 # -1 # ID=207099_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.586 +---WGSIKkwgsnignwaknkatdSHGLSSlwsnkPKNQGAKEGsgwfGKAMNWGKNLLgFGTpqee------------------------KKP----SIWQNILSNAGQ----IGSMFGGKGGAIGGAIQTIFggggSLeggkASGWDIAQSL-M---------GVAGSFM---------DPGSKAANWLGKIggigsaF------- +>SRR5690606_1724827 +--MVETLKADEGYSDTPYQDIVGVWTIYHGNTILYTRGNQ----RVT-----ASTIGGNRDEAEKNLYCGIQSAIKKAqgyvNNFHELS-------DVRQCVLVMMAYQLGF-NLYSFKDTKFLIENNAHKQAADEMMDSKWAKQTYRRAKKLTNMYKLDK------- +>SRR3990167_7220187 +------------------------------------------------------------------CDARIQRRDRGA----PHVPQDARRDG------------------------ARRVGRGSPRAARLRL-----CAAGRPARAPAPPAPPNPGG------ +>TergutCu122P5_1016488.scaffolds.fasta_scaffold2043846_2 # 339 # 728 # -1 # ID=2043846_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.428 +-DIKDRIKSDEGYTEVPMYLsytpkdNMGraldtvvetHRTGGWGHTMKDKEFSPptSALAD---------LGGAAKRHWEEKFEEDFATALEGARS----IIPEKDLDPRAFGILVEMVFQMGPTKVRGFKEAIKALKAQDYDRAADQLINnydattgkrrgrTNWYKQTEKRALRAYERMRDL-------- +>tr|A0A1X9T116|A0A1X9T116_9PROT Phage lysozyme, putative OS=Campylobacter sp. RM8964 GN=CVIC8964_0812 PE=4 SV=1 +-ELLEELKKEEGFRANIYQCTAGVDTIGYGFNVAYLTKKEL-------ELNGGVIEPMSKEVATKILELKVKKLIKSV---DAIYSWIDNLPEVVKIGIYDMIYQLGIKGFGSFVNTQKYLRALDYDKAIENIKNSKWAKQTPRRANNLIKRLQRARD------ +>ETNmetMinimDraft_8_1059916.scaffolds.fasta_scaffold253350_2 # 471 # 593 # 1 # ID=253350_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.382 +-DLLKRIKHHEGFRKSVYQCTEGYDTIGYGFAIKDLELD--------------------EDLAEEILLRKVEKLIKRV---RSKFDWLDSVPHEVQGVLVEMAYQMGLSSVCRFRLRICISdkwcIVLSFLNGTSTILYSINLYLTTLNPFPYIRAIYFIKY------ +>GraSoiStandDraft_12_1057312.scaffolds.fasta_scaffold2499143_1 # 3 # 284 # -1 # ID=2499143_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.706 +-KLFEHIQLREGYKQSVYLDILGKPTCGIGHLLSEEEHQQY-------PV----KCLVPKMVIEKWFEEDIKTALDGAE---EQIKEIQATVNAQIQEIT---AEMEVRMMEQQQQKQQAVESG---------------EMLPERMqLEMQKAIKK--------- +>tr|G1UUK2|G1UUK2_9DELT Uncharacterized protein OS=Desulfovibrio sp. 6_1_46AFAA GN=HMPREF1022_02275 PE=4 SV=1 +-PLIAQLKRQEGFRAVPYLCTARACTIGYGTNLQAHPQYIpypDLEcaarsGRLKGLLLRnalrARGMRWNEEEAATALHEEVNACKRQLAARCPEFVRLAEIgEVPRAEVLLNMAFNMGVDGLLRFKNTLAMLRAaisdavairnaadaangcredhASYARVADGMLNSLWANQVGRRADELARQMRTGAY------ +>tr|J0KP17|J0KP17_HELPX Lysozyme OS=Helicobacter pylori Hp H-27 GN=HPHPH27_1020 PE=4 SV=1 +---SFLIVDSEGFSPFIYTDKTEHPTIGYGYNLSVYSY------D---------GERITK--AYGLLTDVLKENYKA----LLSYGWYKNLDAMRRMVILDLSYNLGLNGLLKFKQFIKAIEDKNYALAVEGLQKSPYFNQMKKRASRNMEILKLGGCE----- +>ERR1017187_801687 +---GRLIDEEEGPSSpLAYHDNLGFITIARGCLIDPRSRGaD-----------GLCQAA-----LAAQDAYTLAKAQKLA----ASLPGFSACNDAQQAVVVSMCFQLGDL--HDWPDFRAALEDGDYAECSQQmlfakppALLPSSWhKETPQRCERAAYMMRSG-------- +>SRR5262249_61576198 +-------------------------------GVYPLIAVA-----------GLPQVA-----IDASNKVRTEEARKRA----TTIPGFIRCNEVRQAVLVSMCFQLGSL--TTWHEFRGALVMDDYEAAAAAGLDSEWANETPAARQRARAMPATGQRT----- +>SRR5581483_4741012 +-RLETDTALDEGDKSAPYLDTLKRWSFGRGRCLETAPLTGpEWKALLDRG---YITVSLSPAGSALLEARQLAAIAATLE--HDY-DFWPKLNDARQNALVELAYQIGVAGELAFHDMIAAIRVAvvdnNWVPVATAALQSDWARETPVRAKIVVTQLCTGQ------- +>SRR6056297_2618524 +SLK-TKIKSLD------PRDKMKPTTKKINELF--GDDL-----------------IRSITDIIKSFDPRDK-IKSVKDE----INKFETS-----------------------DDKLGFLTKRADDFLGGDSIQ----------------------------- +>SRR3990167_3909892 +---ARRGRRERECA-CLIRAVRRSPGTRAGCACRctrtrEA--------------------CPPSASATTCATspsRTRRRSSSSSA---TWPTWKPISSACSRGp-TesatratTPSRtcSSTWAASVLTFVKMLRAAEAGRWQEAADEVLASVYHAQVGTRAVRLAQQMAEDRW------ +>SRR3989304_1028579 +---AQIARDEGGMRLQMYQDSRGVPTIGIGHHPRAK--------------------PIPPAAALLLFEr-DVADVEADLK---RLLPWAEGVSPARYDala-----NmlFNLGSGGLLTFVKMLRGARAGGWRGGARPG------------------------------- +>SRR3972149_5957821 +---SRTRRRSSSSSVTWPT---------------WK--------------------PTSSACSLGLREs-APRATTRSRT---CSSTWAAADSX----------------------------------------------------------------------- +>ERR1700691_948072 +---------DEWYRMYPYMDDAKKITIGIGYNLTDNLGVKTLDE-----ANKLYPHGMSYEDALTIMRk-FIDEITDKLE---THFPWFSKLDDVRQSvfi-----CmaYQMGVNGLYGFHDTLDLASKGKNSAVSIAMMQSPWAREYTKRASRLSYMMESGKW------ +>ERR1019366_3632978 +---TKMLIQDEGYNQFLYKDTRNILTCGYGFNMESD--------------------GLYPEECDFILHn-RLVKREKQLI---KDLPYYNNLNDNVKSvll-----NlaYCMGEHGLIGFHNMLAYIQASDWANASRELLDSQFGKDHANX------------------- +>ERR1700722_18925977 +-EAIARLRIDEGNYLYTYDDGDGQYPHPhvyPGYPWRDGHNPTIANGLlLDtsglavlKQVGVTDpqavldgKLDITQPQCDAIVAAIIPKYAGYAR-DGLAPGVFDSMTEARQYAMLSMAYNLGEGGFDSFSATLQMIDAAQtaknsgdpnahalFNAAADHMlGASLWISQVGDRARRIIAMIRVGTY------ +>SRR3989338_9215174 +RETVDMIKVNEGFVGRVYLDTVGKRTVGYGFNIDDATIARL----VDEDV-KSGKREMTRAEAEPILYYViFRTAVKDAMRFVG-KDKFFTLSFGQRKAVLDMAFNLGIGRLNGFEKFRTALREVNAKQAAAELLDSKYAGQVNGRAKRNEDLIRGDAK------ +>SRR5437899_13062484 +---------------------------------------------------------------SVLLTHVRRPALSQPaYGPPPAqHSF--PTRRSSDLVVVDMRFNLGPGRFRAFRATLAAIERGDFVTAADRMIKSKWARQVKGRARSEEHTSELQSL------ +>10_taG_2_1085330.scaffolds.fasta_scaffold748519_2 # 90 # 209 # 1 # ID=748519_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.342 +-DPKPMIIRHEGKKPCVYLDTKNIKTIGVGYNMQNKDAPEVfdsigadYNKFENGPVTRWNvpcncssVPCLTEEQIEELLDISLKTAIADA---RTVIATFDGLCCSVQNVMVDMSFTLGGPGFAQFTTFATLLTRQHWKAAGDDLTVSLWCKQATARCMEDANYVRAGCG------ +>EndMetStandDraft_4_1072995.scaffolds.fasta_scaffold4041008_1 # 2 # 184 # 1 # ID=4041008_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.683 +--VKDKIKKSEGYSATGYFLeyrgangetiKEDFMTIGWGHRVVDGDPYE------P-------GVEYPKEVLEQQFEKDFLVYLHAAER--YIGD--CEVPEVIKDCVIEIAYNIGEPKLFQFVNMRQAMQDGQWKLMAAELKNSKLYRTLTSRYEPMVKLIEE--------- +>SRR6516164_7321876 +-----------GKRPHVYTDTAGHPSVGIGFNLDRDGAQAAL-----EAVGANYndvragTQDLTDDQISKLFQQDLTKAIDDAT---TVVSNFSSLNDPSSSFAHPSG-----PGCRP-GHTRRNPSCCPLPTPTPRSRSPLWCGETGX-------------------- +>Dee2metaT_20_FD_contig_31_2115798_length_257_multi_2_in_0_out_0_1 # 3 # 257 # 1 # ID=843840_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.510 +-------------------------CAVDGWNTSSTTQDQRV-----NFL-----TCWDDAQGS---------------------SEDKAKACSKQ-----VGFDFSA---------VSACNSGPKGEAlqlaaaeafekrfpthAHNGMF----EVPHIFINGQDIGTNRDFN------ +>ERR1712185_833699 +----------------------------TCLEQDNPGAPQAI-----AAIGANYndvrsgKQCLTDSQIMKLFEPSYQSAVRGARS---AVSSYDSLCCGVQNVRSGFKHPAFAGTRrprpstvvlCSLRAALRLVARRX--------------------------------------- +>ERR1719204_494815 +-------------RRQNGEHTGYY-NIGFGHVLTQEELNTgIIVIDGV---SHEYRKGLSQESCKKLFKQDWTLHDPS-K---LI--TNPKVSDTAIGIIHEMVYQMGMEKVKGFEETLKALNKLDYKKTAAEMRDSKWWReDTPTRAEILAKIMESLAE------ +>tr|A0A1G0VB09|A0A1G0VB09_9BACT Lysozyme OS=Ignavibacteria bacterium RIFOXYB2_FULL_37_11 OX=1798448 GN=A2299_12310 PE=3 SV=1 +---VSLIKYFE-gL-rTKTYRCSAg-VLTIGIGHTGS-DV-FV----------N---QI-ITKAEAVLLLKNDLRRFENYVDKV-AVR--N--IKWHEFDALVCFSFNLGYRIDAVmkeainrSNTKLVLVKMLRYNKAKVNG--S-YIVLN---------------------- +>SRR5574343_132817 +---VDLVAFEEGFRNNVYLCPAGYPTIGFGRRVIILDYN---THEEI-------TIPVTRKGEEENLRLRLNAIANWLE---AKYPWFALLSPQRQAVLISLAYQVGNDGFTRFRAMIRALAKDNFEEAAREYLNSTAARQCPLRFGRGTAILRDNL------- +>AP99_3_1055487.scaffolds.fasta_scaffold1116125_1 # 3 # 230 # 1 # ID=1116125_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.368 +--GWDYIRHEEGstkrkgqPVLKAYSIGDGMITVGWGHAEKVEKSKF------------EVGDVITMDQAQDFLKKDLTVAADGVRR--ILKRWEDeyidvYLSQDQFDVLVSLAYNSGVGALNR-SSVMRQVKRGNIEKAGKKLKSWRVNKKFPGLQTR---------------- +>SRR5256885_2377351 +---AAMLVGLEGDRGMMYHDQVGLPTIGIGHLLTKSELTSGKIHLGP--EVLRWGACRSRHRNSMPCAPGPSTWGSKRSG------------IPRCSR---CX------------------------------------------------------------- +>SRR5665213_2518426 +-ALDAELVNDEGDKLYLYDDATGSrivpgytlkghPTFGIGTNAE---------------------F-FYPEERDFCLHCREKKATDVLTA---ALPWFTTLDPVRQNALIDLYYNV--PGFIHWPHFIGLAASGSWFGAAAELEnTHPWIDQVKqR-GHDIAARLRTGVA------ +>SRR6185436_5977522 +---IKFIKGLEGFRNKAYKDSKGLWTTGVGHLIKPNEQEL-------------ITKVLTDQEVEDLLEKDLKEFEDAANA-----GIRVPVSQHEYDAIVSIAFNIGKEwvdgdGYRD-SGFLADINWKKGKeKTLHDIM--LFRHPPelLGRRAKEARLFDKGNY------ +>HubBroStandDraft_5_1064220.scaffolds.fasta_scaffold6338656_2 # 134 # 208 # 1 # ID=6338656_2;partial=01;start_type=ATG;rbs_motif=GGG;rbs_spacer=9bp;gc_cont=0.533 +---AQYVRQHEGSRNRLYKDSRGYWTIGIGHLVTAQELpLF-------------KNRVLSAAEIESIFSKDLASKMQAINA-----KFGA-----KFDTFSdNLKCAVIDGyfrgdLPGS-PKTIDLLIRGNFKQAAAEYL--NNKEYhsakasgSgvAKRMEQNANIMTQEAN------ +>307.fasta_scaffold4207948_1 # 1 # 249 # 1 # ID=4207948_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.518 +------------------MSKFPSAYRALGNkDYNEAIRQVyQVGD------------KVSEEQRNAWLEQDAVKAWEAAAQ---QVQDLNIEKPEFIVALGSVNFQLGTRWMDKFPSAYKALSSKDYDEAIKQVstgsgkdGQSRWKEQTPVRVNDFVEAIDK--------- +>TergutCu122P1_1016479.scaffolds.fasta_scaffold4806898_1 # 1 # 201 # -1 # ID=4806898_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.418 +------ERLNEGYCVHKYRDLGKIWTIGIGFNLQQPGAQQavtncggDYASITAdTL--DKCKgpAQLSKTSITCLFNYSMKIAI---DCASRVVPLWETLPPGPKSALIDMAFNLGHATPAPgnafasFSNAGAlavatkkskpakEMPASPDSYMSP---NDVFN-FPGSSS---------DDA------ +>SRR5688572_12078062 +---VPMMKKVEALRLKAYPDA-PGWAIGYGHNSTsGLkpiPY---------------SGMTLkTEAEADAILRIDLDECLRYLN-----TWVKVPLEQGHVDALCMHIFQQGPTQFRR-RLLENINKKMHWTTAK---A--IENMD----------------------- +>SRR5262245_45247223 +-----------------------------------PiPY---------------KGMTCtL-NEAIAMLRSDLNECVRYLN-----AWVKVPLEQGQVDALCMHIFQQGPSQARK-KVLPTVNQKMHWSAAK---L--IENMA----------------------- +>tr|F7VFZ9|F7VFZ9_9PROT Phage related lysozyme OS=Acetobacter tropicalis NBRC 101654 GN=ATPR_2298 PE=4 SV=1 +----ALARRFEGLCLRPYVCAAGYWTIGYGSRWLANGAAMC-----------AHAAPITAAEAESLLLAALRTLQPEIRK-----IVHVSLTARQEPAHYG--LKHGYCRL-VPENYRHP-HTGEER-VREEF---RLT------------------------- +>SRR3990167_207291 +---LDLIREFEQCRLTIYHDQAGYPTIGWGHKLTPSESSSGKIQIGN--VRVYCKPSLTQDQADALFRQDLYSYESTVNGT-----VSVDLMQNQFDALISLCFNIGSNAFAN-STLLRRLNQKQFDKIPEQFRRWKYAGGKvskglVNRREAEIDLWLSKAI------ +>SRR5437764_13829842 +----------------------------------------------------------TQAQADADQAARMDRLDA------KVRAlITTQLPNGALGALLSLADNAGI-GIEINSRLIAAVNRRDWIAAAHEFIDDDHVKGSEipgllKRRLYEAFIFLES-I------ +>SRR5574337_249516 +-NVQALIADDEGCKSMPYRDTTGVWTVGIGHNLQANPLPGIVADMLRTRLGQTTtgqpsaypaclnlittAQGLVDDEIDALFSYDM-GAITGFL---NDYPWFAEADDVRQAALQDIALDRKSTRFHEFGTFLGFCAQSDWVSAAADLQTTLVYKELTPGYSRFCSILVSGNWP----- +>APWor3302394562_1045213.scaffolds.fasta_scaffold1385679_1 # 1 # 246 # 1 # ID=1385679_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.520 +----QQLMQNEasgKFRAYAYNDPhtQNiKKSIGYGFHLDENNPRVvELIRNLGlnVDKLKRKEQSITEQQAKYLLDAVISDSASEIGRF-----IpdFSSHPADVQKVLLDMYYNLGINGFGQFKRMHAAVNSRNYALAAKEMM--TGAKGGeslyAKQVKGRASR------------ +>UPI000314159D status=active +-KLAEILTVDEGKRTlcyddatgetiKPGTTVKGNITIAIGRDIQNFGL--------------------SEDEIQMLLKNDIKRVIEEANN----FPFYNSLNEVRKIVILSMLFNLGLTRFNKFVKFKKALHAGSYTTAANEMRDSLYYKQLSHRVEKLAVWMDSGIMPDYNN- +>SRR5438105_403354 +---RHERLGDVRRPWEIYWDTQGRPALGIGFRLDTDDAARrlEALGLDLDQV--KAgKVKVNEEQMWALFDDDVQRAIDEA---QRLVTNFDDLPEEKKRVVVDLVFCLGSAGFQRFKKLIEALQENDFPRAAA--------------------------------- +>GraSoiStandDraft_38_1057308.scaffolds.fasta_scaffold3906171_1 # 2 # 163 # 1 # ID=3906171_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.660 +---EV------ESKIE-IADTKKGEVQGVEP-PKPERPRDE--------VGSGEGKKVVRTRKPNTRE----------EQ----IAYFKKILK-RSESSLEKTknefkINAAEKRIDIAKTQLRLLGVEEFADYAGPgetispedreaaanaARKSGAIKQIKsSRSFFIEET---KV------- +>ERR1719329_1646026 +----------------YQNKDTGHKTVGYGYNMESKSAREEF-----TSLGINYdkvfygIECISVTDANNLLKASLEVATFRAV--SNLGPSYANLCCNVANALIDMSFNMTDTpgvAMASFQELLDDVVVGNWSSAAAAVRNTTWCKASPAACADDAASLERG-------- +>ERR1719223_252634 +---------------------------------ETVSARGDF-----AKLGLDYdsiffgRTCLNISSVNRLLQLRLFTAAFEIS--TAVRPVYSSLCCGVANALVDISYNISAVtplqPISKMQGLIQAVISQNWTGASD--------------------------------- +>tr|A0A1V5R0F2|A0A1V5R0F2_9BACT Lysozyme OS=Parcubacteria group bacterium ADurb.Bin326 OX=1852943 GN=BWY53_00699 PE=3 SV=1 +-----YIASHEGYRSEQYTDAIDAstQNIYFGHQVQPGENFN----------------NTEQEAI-QVLRNDYSRYQAQARDVaSQHGVDFDDLSAGRQTALTDMAYNMGGASLNNFNNMWSAIRENNFEQAGREILNSQYYNQLQQTGRPqmNADIMAASPA------ +>ERR1700722_3663261 +-----DLDAAEKDELVAYPDTLGNWTCARGLEMPRP-AP-----------GRSWeGFTVPQSTSDRWFNTDIMSAMTFAKK----LPEYASCDtDCRQNALTELCFNMRG-KWEKWGPTRTLITEKNWQGVHDHLLASLWAKEVqphgetTGRATRIANYFLSGEYP----- +>SRR5574343_646792 +------------------------------------------------------APYMDRPTADKIFEGLYYNAGVAARNW-AGDDVYNALPGRRQAILTDMVYNLGPDKIRAFKALQKALLQGDNEGVKREMKDSQWYNQVGNRAKEHVQNW----------- +>SRR3989338_1995144 +-TLEQQLIRHEGYKAKAYRDGKGY-SVGYGTRLTGTDAKNRLkKMGVDYNALIRKEISLTRKQAATLFYQDVALARTAA---TRLVPSYHLQPKEIKDAVVNMIYNLGPTGFAEFVEVRNFLENRKYLSAADEMTKSNWYRnpRTYRRAEELRQQIA---------- +>GraSoiStandDraft_45_1057281.scaffolds.fasta_scaffold337773_1 # 3 # 101 # -1 # ID=337773_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.556 +----SQLQEDEGFRKGVYKDsvrnkknPHGIDTIGYGFNLERAGAQEalDA-AGIKKSVANlrSGKLQLTKEEADRLMRGEYPHFADAAKR-FVNKgkeGTWSGLTLDRQKILTNMAYNMGATGLNKFDELRKALQNRDYEKAGEEMMSSDWAKSAaeggvGARANRLTARMTNTS------- +>APMed6443717190_1056831.scaffolds.fasta_scaffold2353753_1 # 1 # 231 # 1 # ID=2353753_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.312 +---LQHLRDREGEINMIYKDSLGKPTGGTGHLMRPQDYLDLGLDMenmqygtYhHPGYNRDFqvaidsnGDpiQLDKSVTDNWLLNDSQTAFNAANyQMSQLGEEYQ-NNEALRDTLISVNYQMGENwykpkseGGKGFKGVWEGMQEGDWEKAAGNvewqnpttkLNKTGWHNQTPDRTNDFMEGLR---------- +>SRR5210317_315897 +---VTLIYEEEGLgkrkgrigpqNTRVYKDSEGILTAGVGHALQFRDDDGnlQWVNKDE-QY--QEGYEVPVDVVKSWLIKDAKIANNDAV--TKLQDVRPDLLdnKEMVTMVSSFYYQLGKKNGDDFKKMWKAFEEGNGEKAYAEALDSvWAKEQTPERAKRFAEFIRDN-------- +>JI61114DRNA_FD_contig_123_6269_length_240_multi_1_in_1_out_1_1 # 1 # 186 # 1 # ID=70612_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.441 +---LDFLITAEGAKTRAYKDSVGKWTIGVGHLIRLPAERN------------LLSKTLTQADMRQLLLKDLGHHIqpllDHLK--P---EVVKNLNRNQVTALASLAFNIGPGGTVKGRkrgfkysPVVAALNRGGdFdRNLQTaagAFDRHAYaggRR------------------------ +>SRR5690606_18431185 +-------SWA---prlgaraILSTYLRPeypeHVTGADSYGRLCKPDH------------------PPINEEEGEMYLRQDLRTALTATL---RYCPVLAAEPEGRIAAIVDFTFNLGAGRLQT-STLRRRVNQRDWHAAGKELRRWIYG------------------------- +>SRR6218665_4116602 +--SIALAKRFEGVhrvprvdpgRAHPYICPAGHWTIDWLWPSVra---D-----------------APADHGDRSRGLSGARP-ANRARR---NAAPLPGARrPTR-GAPY-----------------GHSRLNVQPWSCVPAENI------------------------------ +>tr|A0A1Y1XFR8|A0A1Y1XFR8_9FUNG Lysozyme-like protein (Fragment) OS=Basidiobolus meristosporus CBS 931.73 GN=K493DRAFT_239213 PE=4 SV=1 +--DLDLVQSIEGWFPNFYIGPAGIRTIGYGHTCHsdpthCANI----------------YPPLFVARGEDLLRRDMAEFEECVS---ELILVPITSnR---VLHWYRSRSMSGVGGLRN-SNMRRLINQENNNAAEAEFGLWVYG------------------------- +>OM-RGC.v1.039010792 GOS_JCVI_SCAF_1099266870205_1_gene202757 "" "" +-SVKSMIKQHEGAIPYPYKDTKGLWTIGVGHLIGDGKSLPaEYDDWKNNGGPydkkNNKTPALTNTEMENLFQKDFDAHLKIA----KQGPGFELANETGQGAFIDLTYNMGR-WWTIFKNAAKAAEKGDFKTVAKELTDSKWYTQVGKRAEEIVSLIGNGYK------ +>JI10StandDraft_1071094.scaffolds.fasta_scaffold5180855_1 # 44 # 271 # -1 # ID=5180855_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.395 +-QLVIPKKDFRKEVPQKENL----TGMTIGEVLdyqsqmiKSGKYPSsAVGKYQIIQdtlksaVDyfkkKKDDtitlnTKFDETAQDKIYREYLISGKR------QAVEDYVTGKKSGPDALIAAQLDMSK-EFASFGVPIDvnrpADPKGNWDARLVKKGESYYIGDKGGNKASVTPEMsekalnEERLL------ +>HotLakDrversion2_3_1040253.scaffolds.fasta_scaffold03928_4 # 2223 # 3215 # 1 # ID=3928_4;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.656 +----RRIKVQEGLVLQTYLDQKNIPSFGYGTNLTIPFIT-kEREFIFSHL-------FDSMGIASFLFDSKFDACKLIVG--GILerqcSLLLSDIPDNAKIALVDMAYNMGGGALAKFAKMFKAILRDNWNGVADECLDSIYGRdkFSKNRASSNADLLRS--------- +>SoiMetStandDraft_5_1073268.scaffolds.fasta_scaffold837069_1 # 3 # 440 # -1 # ID=837069_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.639 +---VEFIASWEGFPnnGKPYNDPVGYATVGYGHLLGYRPVTAaDNRAVWVP--GQKTPGRLTKREARKLLKQKLRKTYeppvreLFTTG-----PLRGKFRQGAYDALVSFAFNLGPGSVQG-VPGFETLGRAIDSGsigkVANALR--LTTRQA---------------------- +>tr|A0A2T4IFF3|A0A2T4IFF3_9RHOO Uncharacterized protein OS=Thauera sp. D20 OX=2136175 GN=C8261_08920 PE=4 SV=1 +-QLRRRLHRLHGHIAHMYLSPTGHVAVAAGYLLFNSDQALllgfvdtnghraaadAVRDDYQRIrqlpRSTpvaccahLSRLRLPAHEVARLNDARITSAHRELR---ELFDDFDDFPQPARLALFDMVFAHNGKILApAQPPLRGSIAAGNWLAAAAHTWR----------------------------- +>tr|A0A1G7NV30|A0A1G7NV30_9PROT Uncharacterized protein OS=Thalassobaculum litoreum DSM 18839 OX=1123362 GN=SAMN05660686_02338 PE=4 SV=1 +-DYLPIARRFEGDIPWLYLDTVDKVTIGIGHMLPNAAAVGaiplgrngqaasdaDKQTAYAAVaaatDRAlrgakafqdLSDLRITPEQSADLFRSKFAEIFAETQ---RRFKtvggGFAAWPARVQLATLDMAYNLGPQGLYsGFPTFRTkGLAVRDYQVCAEECRR----------------------------- +>SRR5690606_35016690 +---AAYLKSDKNLKNVA-----KHLAIGIAKYLKlktkTTKT--------TTS------KTTTKSTTTEMYevvtnlPGYLTAADAKVdknrkttvKPgKYYVykkydgmVNVTSKRGVpgswiNPSKKTSATYYTVKKGDTlshiavrY-KTTVSNLVKLNSSx-------------------------------------- +>Laugresu1bdmlbdd_1035124.scaffolds.fasta_scaffold269861_1 # 2 # 352 # -1 # ID=269861_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.553 +-----MSSKAQGYKDVA----QGNDDVALGQQNQdrgrQLQEEGRKegGIGGLQKM--LEGQRlekegkaLEQTGREEIQKG-----QQEIsDGEKKIqlgsQNTTA-SEPpqvdeppgPNTDTAPPSTYTVKKDDTltaiaeaY-GVSLESLIAA----------------------------------------- +>TergutMp193P3_1026864.scaffolds.fasta_scaffold791325_2 # 74 # 286 # 1 # ID=791325_2;partial=01;start_type=ATG;rbs_motif=TAAA;rbs_spacer=14bp;gc_cont=0.441 +---------------------------------MpsfkAPGPA-------DERE--RFTGGVDKETGEKLLQRD-VGTAERAvLRF-----TRVPLTEGQFDALVSFTFMDHREIVvpiidfhelva----------NPQKYr--LPLIFQ--RNDR------------------------ +>SRR5690349_22412000 +-----------------TGVQTCALPICIGRHHGVN----------------FGDPDIDEATQDRWLAEDLQSAYAG---AASLF-HLDQIDLVRREALIDLVFNMGDRKSTRLNSSHVEISYAVFCLK-----KK---------------------------- +>UPI00068E4A83 status=active +-NVIKLIKQHEGSKYNDkgQmisyFDTEGNLTAGYGHKVLE-SDVDvNGNKLL------TEGQVISTKQADNWFARDSRKAIQQASS----IVGFDRMSPARQKAMIDLTYNMGIGWTNEFPKAYGYIKNasnlseqwrrdIQFKQAAEELRyrdaddktkgRSKYFNQTKSRAVTITGMVENGX------- +>ERR1035437_10070638 +----------------------------YGAT-GPKIDE---------------ETVWTQAQADGDILARVNFIQTQLS---NRMTFCDKLSPVRQDVLINIAYSIGIAGLIKWTITLAAASRYDYVSVSDDIRNkKVWKSEVKGRADRCAAAFELGTWX----- +>NGEPerStandDraft_8_1074529.scaffolds.fasta_scaffold83269_1 # 15 # 611 # 1 # ID=83269_1;partial=01;start_type=ATG;rbs_motif=TAA;rbs_spacer=11bp;gc_cont=0.486 +---IQIIKKEEGFSEKAYPDPFSPramekwkavakrrpdwdklpgtpWTIGYGRT-GPDVGE---------------FTTTTKDQELF-------WLSFRVQ---EELKWLQKRGVPPCAGLVSLVYNIGKAAFEKSKS-YRAFQEGRWEEAMEEMAGfNKAGGkvRPGLVKRRAAELELIKKWL----- +>SRR4051812_43680048 +----AKMREREAFVPKVYHDEAGNSFVGYGHKVLAGEDFS---------------RGITEEEAMALFEKDVERLVN-----VSLDKIQVELTPRHIDSIGGFIFRTGPGAFE--KDSLPHLNARRPSPP----------------------------------- +>tr|E6YK95|E6YK95_9RHIZ Lysozyme OS=Bartonella rochalimae ATCC BAA-1498 GN=BARRO_10216 PE=3 SV=1 +----SPEKMIDELRLQGFPST-------FLKNELkelNTILP---------------LRHLYERRAETMLLTDLRQYERALEK-----AVYVDLSDEQFGALVSFCYNIGITAFQNST-LLKKLNKGDYESVPIELQ--KWTKAGG--------------------- +>tr|A0A067W9P7|A0A067W9P7_9RHIZ Lysozyme OS=Bartonella koehlerae C-29 GN=O9A_00018 PE=3 SV=1 +----------MGRAPFAC---lsRCRWCvdnWLWTHRPRvgnPLVQE---------------GTQITVAEAETLLQKDLVQFEKTVEE-----MVKQPLNDEQFSALIPFCCNIGIRNFLQF-------------------------------------------------- +>LakMenE01Jun11ns_1017448.scaffolds.fasta_scaffold1136115_1 # 1 # 93 # -1 # ID=1136115_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.333 +----------EGFKLEAYPDvlrGKDAMTVGYGHKLLPEEL----------------GKDFTKEQLEQMYLQDFLKSKQAASE---NIPGYDMMAPEMQSAFTSQAFQLGKAGQADFEDMIAAINAGDKGEAIKEVYNSTWANQTPA-------------------- +>EndMetStandDraft_4_1072995.scaffolds.fasta_scaffold7331405_1 # 12 # 206 # -1 # ID=7331405_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.631 +-----TIVEHEGLRTDVYEDKRGIAHVGVGSRVWEDVDV-------------KTS-----DDIVDRFAADLDHAESVAKEYA-GELAWRRATESQRLALIEIAFALGRTGINKFIKMREYINAVEWNAAARELENSKWYESDTERVKTLANNLKT--------- +>APAra7269096936_1048531.scaffolds.fasta_scaffold187712_1 # 45 # 365 # -1 # ID=187712_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.664 +--GIALIKTSEALELTAYLCPANRLTIGWGHVLlPKFDAQLFKNVSAealarivsecqrrkTVTKEAKQLLYINQRQAEQLLAKDAGRVALFINS-----LTHAELSQSQFDALGSFAFNIGDRNY-AESTLRTKLNAGDMDGAAAEFD--RWIYGTVDG------------------- +>EndMetStandDraft_4_1072995.scaffolds.fasta_scaffold481099_1 # 3 # 767 # -1 # ID=481099_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.495 +--IDAVFF--EAMAADPMSGTS-----AAARWLMWS--------------------GVRAG---------------------GWYAWKRQAARS--------------------KSLNPAWTREGWPEAPX--------------------------------- +>SRR6266850_2763771 +---LEFLKLRESCRLKAYKDQAGVWTIGYGTTRYYPMGK-------------IIeg-DIIDQNRADSLLMMECDGIGRKIEDL-----LNVKLKQCQYDALISFIYNVGMTAFKN-STLRILINQNSEEDkIREQFM--RWSKIHRDGALIVSNGLRFRRRR----- +>UPI00012116A2 status=active +--------------LTPYKDDVGKWTIGVGHLIGDGSLKAKIAHDKARAK-KGLKSAFTPEEALERFTKDVSKRIPTVE--DIFIELWPNMSTGLKAALVDIEFRGDlqSKGEGEF-EWVELLKAGKYKEASKKYLDHKEYKK----------------------- +>SRR5690625_447736 +----RSVPTSDNVLVFSPPYHQGIRTIGYGHACHvnDCSKV---------------HPPLSEKQATELLKKDLVPHEKCIES-----H-CGHLNDNQFAAMVSFAFNLGCGPV--PS-VCKS---HSKKAMAAAIMKYTHAG------------------------ +>tr|A0A2E9YSC7|A0A2E9YSC7_9RHOB Uncharacterized protein OS=Roseobacter sp. GN=CMN10_02000 PE=4 SV=1 +----AEIKKDEGFVpsadglgVTGYYDNNGWLTRGYGHRVKGAKCDPsRAGEHAP-------ELFDSWSSADALFDEDYLVHKRAATQ----VPGWSKASPVQQRGLVNLTFNMGPDWWKahtnewgeekhGWPGFTAAAEAGDWNKAADELEDSKWFREDVGSrGPAVVSLVR---------- +>APCry1669191911_1035384.scaffolds.fasta_scaffold00086_17 # 5251 # 5505 # -1 # ID=86_17;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.384 +--GIHLMHEFEGYRNKPYLCPAHLWTVGWGEVLYQDQIRLPMVHkegytgiiRKEYKLRDADNRTWEKAELEERFKKLLLSFERGVLR------LAPTlsSNQGLFDACVALSYNIGIGGFQR--STLRQrILRDEsAERIAEGFM--MYTK------------------------ +>AmaraimetFIIA100_FD_contig_51_12512804_length_284_multi_3_in_0_out_0_1 # 1 # 282 # 1 # ID=4841408_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.660 +-TMFSLAV----LSAMVsiateMvTrVAAYYWAGSL---LSSL------------------TGMSSEGLSSVALQQGgIGLILTVML---VSAPPMAaAFFNgvmGQFQSYSAYQGGLGAGARGP-GSP----------------PS----------------------------- +>LGVF01.2.fsa_nt_gb|LGVF01443631.1|_1 # 2 # 415 # -1 # ID=27860_1;partial=10;start_type=ATG;rbs_motif=TAAAAA;rbs_spacer=13bp;gc_cont=0.285 +--LADFIAGWEGFEPIAYPDGN-SKSVGHGFYLGNHFSRQMI-----ESVGGnfdrILagQERISEQQAKQLLMSSTEMAISD---AYSFYPGLIEHPREVQMIVIDMAYTMGH-KINQFPKMRAALEARDYEEAAKEMQNSKWYDQTGRRSKNHVSQMQ---------- +>SRR3954453_19585906 +----GVHRPLRGLPRRALQRPRRPLHDRLRPPPASWAINGKEPA--------EFRRGISRRRDLELLRNDAGTAPRAIDER-----VRVPLTQPQLDALVSFTFNVGAGAFAG-STLLRVLNARDYRSVSGQLN--RWTKASGrplpglvRRRAAEGALFSHGVY------ +>SRR5688572_3823868 +-ELRDYVIANEGLSQHVFRDASRRPTVGVGFNLKRRGARDlietlglDYRQLLREARTPASEPSVTTDQAMVLFNTDLPIAIDAVRK---AVPTFDMLSHRQQVAIVDIAFDVGSSGLKGLKRAIRALEAGELAVAGEQLAASKYARklATAERVAGNVELIQS--------- +>SRR5688500_20236797 +------------------------------------------------PALPRCPFFPrpaSPRHRPSFPTRRSSDLVDAARR---TFPDLDALSHRQQVALVDIAYTSGVKQLRKLRKaVEASVESGDWTTTAEDRKSTRLNSshlVISYAVFC----LKX--------- +>ADurb_Val_02_Slu_FD_contig_21_816138_length_288_multi_2_in_0_out_0_1 # 1 # 288 # -1 # ID=63754_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.497 +--IIESGETFsikgasADKVGIFYGTLLGMDvskakkkkDIGKWfktwsttenirLRANVTDKYQ---------------KEIlgellgvkdgdicTKEEAESLLASDLEEFEGYVDK-----YVEVDLTQSQRNAIIAWTFNLGPRSLKS-STMLKELNAGNYEKVPSEMK--RWNKA----------------------- +>ERR1022692_1321356 +---MKILQDSEGCE-lTSYKDEg-GVWTIGIGSISYPNgksvG----------------SNETCTMDQALEYLNFELDQKIRSLNTWS--MQNKLTFSQNQQSALLCFAYNLGMGPIIMKgFSLNLALISGDEQKIRDAFA--LYVNV----------------------- +>SRR4051794_5815555 +---IDMLASWEGER-lKAYRVPgESFWTIGVGHTGKVGgkpiH----------------EGMVISRATSRKLLRADVKVAEDATKKLV---PARWLRRQRRFDTCVSLAFNMGSEILTASapltsfgDVLKKRVNRVTIQQAVNAIQ--LYNKG----------------------- +>JI7StandDraft_1071085.scaffolds.fasta_scaffold1617476_1 # 1 # 327 # 1 # ID=1617476_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.719 +----KIAETAEGRYHKAYKDTDGIWTIGIGHTGHDVTP----------------SSVWTDAQIDAAFAKDVDYAMREAlSE-----SpTLRHASPGTQAAVFDFVFNEGIGRYM-SSTFRKDIERGDLAAAKQSIL--LYNKG----------------------- +>SRR5574344_1274215 +---IELIKSFESLRLKAYKCPSGVWTIGYGHTKDVKATm-----------V-------ISPQRAERLLKEDITQTETQVKK-----NdRLHISNTRAVFGFGVFCLECSNRRFD-GLNTFEESKTKPtRSS--NKRR--VWQVGX---------------------- +>SRR4030095_3464828 +-HLLAQVRRYEGFKLRHYQDAGGNLSVARGTRHPQRP-------------------TTTAHDATTALERDLRRIGGQLES--QL-PAFGKLDSVRQRVLLHIAFNVGVGQLLKMRRFISAVSVRHWPVASEEMLLSKWSEEDKVRARVLADMIRTGRDV----- +>SRR4030095_2298746 +-TCWRNLTRYEGFKLRHYQDAGGNLSVSRGARHPQRR-------------------TITAQDATTALEGDLRRIAAQLEN--QL-PAFATLDSVRQRVLLPHRVQRGRAATLENAKVHLGCRAAPLV--SPRRPSKSWRGPHAIRRSCRX-------------- +>SRR6185436_15944894 +--------------------MPEETSA-------------------------------SPRDAPSATTHNNRsrRhhcLGKRPPS--DRGPIGESATSVREarlgapTRAASHRLQRGGGQLLKMRRFISAVSVRHWPVASEEMLLSKWSEEDKVRARVLADMIRTGRDV----- +>SRR5947209_6322320 +--GLALVKKDEGLRLAAYQDVASVWTIGYGHTPARR------------------RAPHVSGRCRAAPAG----------S---S-GTGYASRQEYFHAYF----AVGFGVLRPPAGAgcskTRAGPCRHQAGGAACPRH--GAPGsgepGRHRQFGRGPQRrRAA-------- +>ERR1044071_200332 +--------------------------IGVGINLNSISGQlttdlaadvrsyylanystnlssstDdQIINMLKTQAQNgNTKQAISTTDDQSLFNESYATHAQIAQ--NAIGAAWSSLSTKEQVAVVDIVYNVGS--LAGFPSLVSNSLPSPSFVVGFCCQRGRA-------------------------- +>SRR5262249_24259931 +-QIGGFVAAFEGKTTYPYKVGgKGNTTIGVGMEIPSVVsAGllqelkgdvqafaranndtswdnlsDqQFIQRLNTLAn-Kk-sTTPILTIDDVDTLFAELVAGYETAAQ--DIL-DQRGItPDDMQCVALV---------------------------------------------------------------- +>SRR5262249_40476102 +-----------GAKQYAYLDSEDIPSIGVGINLNTVNGTikehladavrafyvtkyhknlssdDtKIIDMLKGHAYAdAKKDAISPTDINTLFQEALPKYQKIAR--DTLGaSVYDGLNAYQKVAVVSMVYNLGS--LSAFPTMVKAFKSGDLLRAGFEQVNAIRTTQA---------------------- +>SRR5579885_1497604 +--------------------------------------------------aktnTPtSHTQVITDADAWNLLALALPHYQSNVQ--TRVtqDgGNWSNLSLPQQVALDDLAYNYGV-----FPKMTQDLVNGDYPGVAFNLCDAARTTQAKG-------------------- +>SRR5262249_22946191 +-------------------------------------------------XkpidPRhpdVhAPVAMDAQSDQYLFNVSNAPMTALAA--QQAGPAWGYLSAGDEVALTDAVYRRAQ--V--PTAVNTalnEPSGRDFAQAGFALFNAARPIRNNT-------------------- +>SRR5438034_4982256 +------------------------------TALFKAGDASTLAFvkrgttvkadagtieaEwaeVKKQaKGEpaksyerFTRLDLPPEEVSRKFNEHIATFEGKLKA---KWSGYVDFPKPAQLGLLDMIYNLGSFA--DFPSFVAAVNRGDWIEAANQCR------------------------------ +>SRR3954467_10987516 +---------------------------------NV-----------G-----PNTPPISQGHAKELLRERLSRDYAPAVL-----RANSKLTSEELDGFTSFVYNVGVGGVGPDTRVGANLRKGNVKAAADAIL--AWDKAGGRS------------------- +>ERR1719421_1836858 +----RLGATRGrssrgtraTRSACTPTP-PG-TRPSASATTSSKAPRKDirdVG--------ADFdavysgAQCLTSSQVQKLFDKTLPRYVSQ---AKKDFSCFNSLCCNVQDVIVDMTFNLGS--LTGWPNFRKEVCASKWSDAANNMKNTLWCDQVKSRCTDNVNRMKKGC------- +>SRR5207237_730699 +-------------------------------NLDRAGARSTlae-----L---GIDYngvrdgSVRLTDAQIDALLTADVNDAINI---ARDRVANFDSLPADKQKVLTDMAFNLGA--TR-FAGFRQFLDAV--ANRTGPARHAKWRRRGGTTRS----------------- +>SRR5690606_30906626 +--PILHDGDLKTIGLQPKMCPAGIWTVGYGRALKGKNGqWL------------KGAsgkaeayamYpALTVEQAERMLIEDVNDYAKRIDS------LKlTGLNDNQYSALISFAYNVGFANLRD-STLLKRIRENiNHPDIEFQFL--RWNKATV--------------------- +>tr|A0A2P5N8Y5|A0A2P5N8Y5_METSP Lysozyme OS=Methylomonas sp. OX=418 GN=CTY18_03060 PE=3 SV=1 +---------------QPYDDGTGETitqwqagaTIGVGHLIPLQHWP-------------LYRRGISKSAVMELLAGDLKPVQRAILD-----NVFIKLQQHQFDALCLLIFNVGVRTFEEstVLKYINGVIttHHRYSSPAAAW------------------------------- +>HubBroStandDraft_6_1064221.scaffolds.fasta_scaffold10081493_1 # 3 # 215 # -1 # ID=10081493_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.732 +------KRKAEGVVLELYNDPLNYCTVGIGHLVGGLKPCHelEAAGQIPAEWlkggvppdGKNNrapAPTLTQAEAEALFDQDIKKREKKLVQ--MLRAAnNVKVTQNQFDALMSAVYNAGEGNVNKYI-IKPYLAQtpPDYKGAAEAF------------------------------- +>SRR3989338_5052282 +-TLEQQLTRQEGFSATAYPDRDhYSIGYGMFlknpeapHRLRTMGLDY--SLII------KKQQRVSESQARKIFYEDIVTARSGA---RQTIREYDSQPKKVQDILANMVYNMGLPRFNGFKKLKQHIESKNYVMAAEAMRRSDWYKDTKTHrrAEELRQQMLT--------- +>SRR5258708_7010481 +---------------PTWPNRDkGNPTFGIGHKLISKDLEW--HSWQKL--QPSSKLTISNERIDEVFEQDKDICINNC---QEVFNDFDKMIEELQLILANMMFNMGKPHFLGFKKFILAIKSANYLTAAIEMKDSDWYNEVPVRVERLRKRMVQ--------- +>SRR6185312_9435845 +---LTLIKRSEGFRATTYNDAAGRPTIGYGHKLRPGQSFP---------------NGITRAQAEQLLISDVATACEALSISSAF-----HSLKAdTTLSSISASISAGDGWLAP-RSFasltpatstqlaascfFGIVPAAAFCPPCRPAA--RPNL------------------------ +>SRR6056297_1928111 +---MWHLKVTEGIRLKPYKCSAGVWTIGWGHSIKKGEKFG----------------KITVHQADSILRLDFQKRLDDARNT---YP--FITDKHRqwAVAMLAFNIKGGVKALSG-TKLERYLQEKRYADAADQLLKWD--------------------------- +>SRR3972149_7880312 +-------------RPPSDPH--aaGLAPVGYGHLILQGET-A--------------AQPPSLDAATKLLLDDVSRVEREVDVI---LSDL-TPAPYERDALVSFAFTLAAAALRR-STLLRLFRQGLAVEAAAEFQRWT--------------------------- +>SRR3990170_9014553 +--MSQIIKFFEDCKLESYQDSAGVWTLGWGRTKGIK-----------------EGMICTQEQADKWLEeEDLKDAEDrVRRA------LPNiYFSKNELEALISLAYNLRSFEMLVSYLPDREKFK------KKMLLY--CRD------------------------ +>tr|A0A1L7NPR7|A0A1L7NPR7_PSEPU Lysozyme OS=Pseudomonas putida OX=303 GN=KF715C_pC40 PE=3 SV=1 +----ISAAGYEGTRLDVYDDGLGIPTQCMGQTQGVKfG-----------------QPARTLEECAETLVVRIQDNQDVLQRRIGAIQtpagaiTYKDLSPGEQEAYNSFYDNLGPGGKGV-KDGLFALKangqpstlvrklrAGDRVGACEQIM--QWLN------------------------ +>SRR3990167_2871102 +-KGVHFIANFEGFRAQPYRDSGGLLTIGFGQRCTEaqVK---------------SWGGSITREKGEELLQAYLDRQHAELDKL--P---LAWLLQHQVDAVCSLSYNIGMGNFTT-STVYLKIITRSPdL---SSWLWWVKD------------------------- +>tr|H8NWK5|H8NWK5_RAHAQ Prophage Hp1 family holin OS=Rahnella aquatilis HX2 GN=Q7S_18580 PE=4 SV=1 +---------------------------------------------------------MTMERITS-------FICYCIAVFLAW---LGGMSYQDI--AFLVGAAVGVATF---------------------LV--NWYYR----------------------- +>tr|A0A0T9PNB2|A0A0T9PNB2_YERBE Lysozyme OS=Yersinia bercovieri GN=ERS008506_00031 PE=3 SV=1 +---------------SMYTNESIQNTTHIL-RNSKQL-----------------RSVISERQVAVNLVADVQRVERAMAACMP-----LAMPQQVYDAVVSFAFNVGTGAACSS-TLAFFVNKGDWRSACNQLP--RWVYV----------------------- +>tr|A0A011RLS9|A0A011RLS9_9PROT Lysozyme OS=Candidatus Accumulibacter sp. BA-94 OX=1454005 GN=AW12_00857 PE=3 SV=1 +--GLDLIKRFDRFLDAPQRQPSGQSLIGYRYVMRpad---Q---------------DLIRMGEKDADLLLREALRCIEIYLNSS-----VRVELDQHEFDALASLVFDVGMGTFDR-SDLRAMLNRGDKAGAAYALQHWVGPSS-advRPRRNAEATLFLRGHLSS---- +>GraSoi013_1_20cm_1032409.scaffolds.fasta_scaffold94441_2 # 379 # 501 # 1 # ID=94441_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.740 +--GLDFITSFEGKLKkradgkyEAYRCQAGVPTIYVGCTVGVT-----------------DGMIVDEAQGRAMLRAELAKHDVAVGKA-----VKREVTQDQYDCLVSFSYNCGVGSmqkva--------AVLNKDGPKAAAAKLL--EYCkftnpkTKQLeisrgllRRRAAEARLMEPDTP------ +>GraSoiStandDraft_34_1057297.scaffolds.fasta_scaffold3504957_1 # 3 # 248 # -1 # ID=3504957_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.593 +-QCTNLITQFEGFRPSAYFCPAGRPTIGYGTTIYRDGQPV------------GIGDETDLFSaEMELTYHVRRHVEPTIDRHF----GGLGLQANQRDALGSFIHNIGSNSAKW-PTLKRLIVEDAPIEeISDQWV--KYRLAGGrrllglyRRRLAEVLLWHGLPW------ +>ERR1700679_1770672 +-MTVGWIATYEGFRADPYPDSGGVWTIGFGETYNLDGSRVT-----------AATPPITRADAMTRLSVVVSAYLEQVRYM-----VHVPITNHQAAALSSICYNVGTNAIRN-SSLLLALNKGATAEAADKFC--EWVYDDRrvipglvQRRKTERELFLTP-------- +>SRR5690606_20283691 +-EALALIKEFEGYlkrlndgtdRVKPYLCPAGVPTIGRCTTRYPDGRRVK-----------LRDPPNNKKTATTYLEHELTEDERAFDRL-----TTVRLPPLARGALVSFVYNCGAGAYQG-STLRKRVNAGRWADVPKELA--KWKYGGGkvlpglvRRRAAEAELFLRGIR------ +>RhiMethySRZTD1v2_1073278.scaffolds.fasta_scaffold3214376_1 # 2 # 526 # 1 # ID=3214376_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.543 +--YIDHLIEKEGIRYEAYKPTTkenEPWTIGVGATGEGI----------------AEGTVWTPEEVRTRLQTDIVE--------------RLPEIRRQIPNFDSLPIELKVpllgswfrGGVSGSPKTKELIAEGNFTEAAKEFL--RNKEyeesKT---------------------- +>EndMetStandDraft_5_1072996.scaffolds.fasta_scaffold3180233_1 # 2 # 211 # 1 # ID=3180233_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.738 +--GIELIKHFESLKLSPYLCSAGVPTIGYGSTKYENGRSVK-----------MTDKPITKERADRLFLNTLvAYEDITNKE---F--KGVELSQNMFDAFVSFAYNTGGFRWNS-GKSVYTFAsfakqdANKlKRevvDFLMSVS--KYTdpeTRELK-------------------- +>SRR5687768_13746446 +--GFKLLKHFEAGSGfetk-AYWdKTGKVWTIGWGFTKGVK-----------------EGDTMTEEEGNARLIEEIQEYEDGVLR-----LCTLAPNENQLSAMVCLAWNIGLNPrkgFPI-SAVLRYHNLGNFQKAASSFK--SWQKSGGallpglvKRRRAEAALYLKALX------ +>SRR4249919_3132851 +--------------kyepgphkNKPFWDPYgKVWTVCEGHTGG-VdP-----------------EHDYSDAECKAFKDADIAIANASLKRC-----LTMPMLVQIEAALTDAVFNLGPQVVCG-STLQRKAIANDWPGACAELD--KWSRGGGrvlpglvRRRTDERRLCEFNrX------- +>GraSoiStandDraft_23_1057293.scaffolds.fasta_scaffold69440_3 # 1426 # 1815 # -1 # ID=69440_3;partial=00;start_type=GTG;rbs_motif=AGGA/GGAG/GAGG;rbs_spacer=11-12bp;gc_cont=0.633 +--SLDLIKEYEGLSLTTYLCSGNYWTIGFGAIYAINGER-----------VKKSDPAITEAHATELLRRDVNIAYRSVARL----TApyTEDLTDNQMGALTSLCFNIGSGNFRA-SSVRSNIVRGEIENAGKNFW--QWRRAN---------------------- +>ERR1017187_1421980 +---ASDDDLERSQRLVAYLRRalgysltgttiekAVFIPFGTGDNGKStmlstfrnlveeYS-----------------HlLqVDTLMVRQEFLRDDIRQVESGFPS-----VIHGSLTQGQHDALVSLCFNLRGGALRLAkiaPRLVARINSGDSAGAANELL--DINRAN---------------------- +>SRR5579871_4423982 +--ALDFLKARENCRLEAYRDSAGIWTIGWGHTGS---DV-------------TEGLVWTQAQADQALIAKVANVETTVSMQ----TYPLKLSEQQAAALISFAYNEGQHAFAT-SQILQFVRAQKWFAAMKDLL--NWTRAGSadsqgllKRRCYEAALFLEGSP------ +>SRR5690606_40274282 +--------XRPHPTDTYTLSLHDALPICYGHTDSAgdp-KY-V------------VtFNKTFTKAEGEEILRRDLGQYESAVSN-----AVKVPLNDNQFSALVSFTYNLGADRKST-RLNSSHVKISYAVX------------------------------------ +>SRR5215510_4351513 +------TKISEGLRTEAYPDPGtggDPWTIGYGHTNGVY-----------------PGETCTEGQATAWLREDIGWAENVIRN----CPFAGQLKQQEFDAHSWTSCSMSGRVnratKTassgcvtat----I-QR----------CTAS--LWEatMParptnsrsgTCRRCRGSX-------------- +>CryBogDrversion2_5_1035270.scaffolds.fasta_scaffold91257_2 # 140 # 475 # -1 # ID=91257_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.423 +--ETQNILSFTGmtnsltyveatsadpeaeEKLRNLisdlkRDVYnfkwdGVHDIMYGFLTNdqiikEYNNFksepnyyfnlkngtamftteeeiiktsdilsilS------------LPGLSqNITGKAFDILSIDIDWW----------------FNQFLIPSSTSFQNTLTPKQWLdvnieN-YGFVRPYNDKpwHLyyVGIPkdetktrnetikIKYI--ESKKII---------------------- +>GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold1255415_2 # 318 # 419 # 1 # ID=1255415_2;partial=01;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.676 +--A-QAVRALQQrlnelgfacgradgvfgirtkAAVMAYqskngLSVDgvagpKTLTRLYq---AQesvqtPNetqdplmsqspvENQaptpnnqtgqaslgvakvvtpnggtlnmrsqkvqrastfmr------------------tIPNLAMVSLLSKENDWCLVAYNG---------------------------STGYVM-SKFLQFSEDKptPT-PAPeqtiapiqttiplptptpasddpsvlEEEE--TQRRTL---------------------- +>OlaalgELextract3_1021956.scaffolds.fasta_scaffold1943914_1 # 3 # 119 # 1 # ID=1943914_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.385 +-KLFNEIAASEGFRAEPYLDSAGIPTIGKGTTFYEDGRPVT-----------LQDAPITEDRANQLLKFYVGTVEEKLRnAY-----PkMQDMNPNEIDAIMSFTYNVGANFVDA-PSGFETMqkGLNTGDKKiiSGAFK--LYNKHENP-------------------- +>SRR5579883_2282448 +----------------------------------------------------------------------------------------TRFSR---DWSSDVCSSDLVGNFAK-STLLRDVNRERV--TEADFT--NWNKING--------------------- +>GraSoi013_2_20cm_2_1032436.scaffolds.fasta_scaffold147940_1 # 1 # 546 # 1 # ID=147940_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.621 +-ELIEALIRFEGMKNEPYTCPAGYWTIGIGSRYDLAgnqvtkNT-----------------KPINKHEAIKLLNNTILEIHRVL-----LLNGGSVFTAYQIEAFIELAFNIGYPTLIK-TRLYNNAKKGVNVS-IEDFT--QFChY-RsngllveskqlKQRRIFDWNMFIGAVNRT---- +>GraSoi013_1_20cm_1032409.scaffolds.fasta_scaffold445919_1 # 3 # 251 # 1 # ID=445919_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.683 +-ILIKDIKRREGLILHSYKCPAGFWTIGYGHQIKDTN------------------LKITKNIAEKLLIKDINK----VSK---ELIKVYKIKPIKAYILSSIAMNIGLNKFKK-SKMSISLIENNDSLLYNVLL--KYCyYKNnsgkyiksnnlYISRKNEVFLLKLIKYEX---- +>tr|A0A1B1KSZ3|A0A1B1KSZ3_SERPL Lysozyme OS=Serratia plymuthica PRI-2C GN=rrrD_2 PE=3 SV=1 +-KGFNFIKNSRRCCLSSYKNEIGEWAIGYGYNKSVGP-----------------GRTISELTALGLLQADIMECEQKLNE-----ILMTPLAQSQFDAILSLFYDIGLGVCgkksgigicksGSISPMLSLINDAKFYAAGEYFL--YWGINHEKdklkLREAEKKIFLHGT------- +>SRR5512146_853306 +-LAAPLVDR-EPADVVL--LGglgDADLRRVLQNHGLLGR-----------------RVDLRGGQADLWLATDMQTFEKCVSMY-----VDVAVTQEQFDALVSFAYDLGCGTLRN-STLIRKLNTGDDVGAAEEFS--KWTHAGGkvlnglvARREAERQLFMEASM------ +>OM-RGC.v1.021371472 TARA_078_SRF_0.22-0.45_C20899504_1_gene320246 COG0128 K00800 +-ECYKIWQEFEGLKLNAYVDPatgGEPITIGYGHTGSlsgskVKM-----------------GQTITKEQAVEFMKADLLQVWKNLQPL-----IKVELTPNQWAALVSFSGNVKWKSVKSSsvi----SYINSGKLAEVPGRMA--LYRLGDGkvmnglvRRRAAEGALWMKATA------ +>SRR5699024_8623484 +--SLPLSLHDALPILTAYQDAAGIYTIGWGHTRGVK-----------------SGEFIDEATAERLLSEDISEVERSIAS---HLPesAIRRLPDASYDALVSFVFNVGTQAFVR-PGgshtdFYRAVTGKDLTRVCAEMQ--RWVKAT---------------------- +>SRR5579859_1718574 +------------------PDPGtkgAPWTICNGHTGPdVH-----------------PNMTATAAQCSAWYRADMTAKMKAVLAV-----SPeLAGNRNALMAAGDLAYNSGAGKWPG-SPMRAAYAKGQWAAGCQAFA--GYITGYkapkpvpgrrcwisklngklY--------------------- +>ERR1039457_6500669 +---LELLKSSPNCSVDCYSSGflfllvEGPARGGRSPERAGP----------------------SGGHCRGLGCCDwfllhaqVDSPHPLLIGF-----HKQP-------------------------------------------------------------------------- +>SRR3981081_3784437 +--GYRLIKEFEAGpagnsqpALVPYHCPAGRLTNGWGNTTNVR-----------------PGVAISLAQAEAELQRNLDWAEACVEKL------ANSPNGNECTTTGALSVTTD--------------------------------------------------------- +>ERR1035441_11105007 +----------------------PSSTLFPHTTLFRSESF---------------PDPITPDAAWALLLSDVAKVDAAMNA---QhL--ALDLNQNQWDAVADFTFECGAGALV-------QLLAHGLDQITAQLP--RWVHAG---------------------- +>ERR1017187_4676104 +---LALIQSLEGFKFKASPDAKGKWSIGHGHDLLPGESY---------------PDGITPAEAFTLLKIDVGFAEHAIET---LI--HVVINQEQYDSTVSWIYNLGVHSFTT-STMLTDINARHFTDAAQQML--IWCHQG---------------------- +>tr|A0A0P5SGI9|A0A0P5SGI9_9CRUS Uncharacterized protein OS=Daphnia magna PE=4 SV=1 +--GYETIKGFEALSLVAYQDIGGVWPIGYGNTRYQDGSPV------------RQGVTITQAGADDLFEYWVDESFRtrsRSSG-----LQRP-----KVDR-----------LFSN-IRIVFNGLILHgSGTIGND-------------------------------- +>ERR1017187_1417329 +---------------------------------geIIT-----------------YTFYTKEERPLVQVQDELQRAEGVLAQ---V--IQVPLLSHQRDALLCLVSDLLggyasslSTNFED-SALVKLLNSGTYQLAAAEFF--KFCYVCGkidkrvwRKRNVEQLLFVR--------- +>SRR3712207_8572426 +---TLFPYTTLFRSLKAYRDSVDVWTIGVGHTSAagppkVV-----------------PGLTITQAECDAIFSRDLAAFEAGVER-----SEEHTSELQSRQYLV--CRLL----LEK-KKTHTRT------------------------------------------- +>ERR1700722_342686 +-RGVEFIAPFENIyesRYCPFYDPYGkVYTRAFGETDWSGN---------------FGGKCISHAEALANLKSEFDRSYAPAIN-----NLNVSLNENQFIALADAVWNLGPGSMEW--DIGRELRAGNFHQAALDLL--QYDTAGGvvlpglaARRRAEYNLFMTPVK------ +>SRR5574344_2224119 +-------------PLRAYLCPAGQWTIGWGTSFYSNGQAV------------KDGDtITQEDADFELYTYCKNKCLPIIEKL---E-KmcKRTFNDNELSALFSLLYNIKNADnFCD-TKCAKAIARNN--------------------------------------- +>SRR5258706_1723627 +-----FIQNKeGGPIPQTYPDIANHPTIGFGHKLTPGDTWL----------YQQFPNGINKLMANTLFNQDIQDAEDAVHQ---AVGNVDGLQQNQFDSLVSFAFNVGRGNLAN-STLAQKIKNRDLAGAADEFS--KWGYARAaksqgliNRRAGEEALFRHGIY------ +>SRR3954453_121896 +--QPSFVAAAAGLI--AIRSAATtAAAYAHMTASSAS-----------------GGMKFSKETCDNMLIDGLIRHEEGM-rSW----LGgavAVPLPDQRYVAFLSLTYNIGVGGFCK-SSIPRKIRAGDIRAACNT-------------------------------- +>tr|A0A1V2N7H5|A0A1V2N7H5_9RHIZ Lysozyme OS=Candidatus Liberibacter solanacearum GN=AYO25_04645 PE=3 SV=1 +-IYFELNIRFEGLRLKFYRFSAGIWTIGYGHTGN-----DV-----------CKDMTITEKTANELLKRDSLNCLNQVFT---VSPILINAGKNCISAIGGFVFNLGIGRYRN-RTLRKCINAEDRISSSHEIR--KWVFAGG--------------------- +>SRR4051812_27032530 +---GALIERFEDCAYEPIPDTGPvGWQIGYGMNYLLDGSPVS-----------PATPPIDAVTANAWlHERLSSFYAAAIDK---LLSHAPALTAGQRGAVYSFSWNEGEGRFAH-STMERFIVDGKIADAADEFP--KWIYAG---------------------- +>GraSoiStandDraft_32_1057276.scaffolds.fasta_scaffold1551543_1 # 2 # 517 # 1 # ID=1551543_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.671 +-TISDMLKRHEGFRSKPYLDTVGVPTFGYGFTYITKAEADI----------ILSGRAVELY-NSCSAKWPWFEDLCDIRK----------------LIIMDMAYNLGITGLSR-LTSVIAA---------VSSG--RWADAAAA-------------------- +>tr|A0A1L7DS69|A0A1L7DS69_9CAUD Endolysin OS=Ralstonia virus phiAp1 OX=1932865 GN=phiAp1_51 PE=4 SV=1 +------RFEWTGKETRTYVDPVGVATICRGHTGPLT----------------K-KGSATLAECDDATLKDLLTAQKVVRSC-----TTVPMTPGEMNAWTSFAFNVGPGRAGVKdgfcrlkngriPTHIAYIESHQPVKACGMLM--SWTMPG---------------------- +>GraSoiStandDraft_15_1057317.scaffolds.fasta_scaffold8106439_2 # 78 # 200 # -1 # ID=8106439_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.650 +--YLEKIEAVEGCEFEMYML-EKVPHIAIGHNIVSRPLTDETMDYLGiEDESELMHVTLDRDKCEYLFFKDLDIAIADAKKVINTapdgsvDNVFDALTEVRQEVLVDMSFNLGRPRFSKFKKLIAAVRAGDYDEAAAQILDSKAARDplTANRYADLAERMKSQ-------- +>tr|A0A2T6JZS2|A0A2T6JZS2_9BURK Lysozyme OS=Paraburkholderia sp. GV068 OX=2135697 GN=C8K18_102302 PE=3 SV=1 +--GIGFIETWEAFRAHLYDNdgagGGGNTTIGFGHLVHMGPISGA-------PSETPFLNGITVVQAHQLMHQDLRDPQRIVNQ-----KINVPLFQYEYDALVDFAYNLRHHND----GLLGLVNTGQYDRVPAKFMEYTWAGGQqphglVKRRRAEGRLFASGDYNA---- +>SRR6266404_5083964 +--VFARLKREEGERQFAYDdatgqrvrAPKGFLSWGRGFNLDA---------------------CGSTGLFEVMERYLIGLCDTALR----GYQWYQALDDVRASVVLDIAYNGGVGGLLHYPHMIAALKVQDWATASRECAVE--DEKLdASRYAPLRKILLSGVD------ +>HubBroStandDraft_5_1064220.scaffolds.fasta_scaffold2722472_1 # 3 # 320 # 1 # ID=2722472_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.626 +-NLVDTIKKHEGCRLDMYKDTVGVWTIGYGHNLAEG---------------------IDQETADFILARDLEKHANELDK---HKPMWRELPEPAQVVILSMQFNMGWNRFSKFV-KFWDAIEN---------K--SWSEAGR--------------------- +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold11044595_1 # 1 # 63 # -1 # ID=11044595_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.698 +---RKFIEMeeeSGVPKLVAYNDGTGTWTVGYGHTTAAGpPKVY------------QGFT-lSNAAEADSILESDLSSVEIDVNHH-----VTVAINQNQFNAIGSFDFNTGA--LDR-SGLLTLINNGHIDDKAaitAAFqaW--RWARNGgvlvpelLGRRTREAALFYTPVVEA---- +>SRR5690606_41093304 +---IDRLSLHdaLPISLVTYVDPVGIPTICYGYTHGVK-----------------MGDVATPKQCDQLTRQEAAKALDRKSTR-----LN---SSHVKNSYAVFCLKKKTGSRQG-QATTETAPP----------------------------------------- +>SRR3546814_1346490 +-------------FKQKTAYEMRI-SDWSSDVCSSD-----------------LSykAQFSRDECIAVMGASLFAHAVELDKC-----VKRPLGRNEAAALLSWSYNVGVGAACR-STLMRKLNAGQ--AWCGELH--RWVYAG---------------------- +>8_EtaG_2_1085327.scaffolds.fasta_scaffold00048_50 # 35383 # 39348 # -1 # ID=48_50;partial=00;start_type=ATG;rbs_motif=ATA;rbs_spacer=7bp;gc_cont=0.461 +-NFFDLVRAIEGLKLTAYQDQHGVWTIGMGTVYYPDGTTV------------KEGDTCTYEQACQYVNAHSAGLQSHLTA---V--LPDTVTQGMFDAVGDFCYQEGQGAFDG-STLKKVIIADptNFDGVEAAFN--MWTKERidgelvvskgiVRRRKCDFYLYQNGSN------ +>SoimicmetaTmtHPB_FD_contig_61_267642_length_552_multi_2_in_0_out_0_1 # 3 # 434 # -1 # ID=1627897_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.632 +-ATDLLIKTCEGMKLQAYLDSVGIPTIAIGTTRYPNGETV------------RLGDTCTEEQAYIYLNSHLEEHVYpYVDK--IC--AGNSVADSTYAALCSFVYNEGSCGISI----LQALHDEDLNELAIAFR--KYIYA-kgkiaqglVNRREIEIKYFMEAAX------ +>tr|A0A0P8YML2|A0A0P8YML2_9GAMM Lysozyme OS=Psychromonas sp. PRT-SC03 GN=JI57_03910 PE=3 SV=1 +--GLAHIANLEGCRNQTYQCSAGTWTNGMGHTAKVK-----------------QGDIVNNQEIADNFIQDVASSERVVTKS-----LRVDVTAAQYDVMVSFVFNLGAGNFKR-SALLKKMNKKQFKSACYEFT--RWVYVN---------------------- +>tr|A0A1Y6E5W0|A0A1Y6E5W0_9SPHN Lysozyme OS=Altererythrobacter xiamenensis OX=1316679 GN=SAMN06297468_0118 PE=3 SV=1 +-SFKQALVEEEGVREVVYRDVAGYPTVGVGHLVLPEDGL-------------YVGQRVSYARILDFLERDLAKAEAAVQRL----VGDLKLYQHEFDALVDLVYNVGEGNVSseESPQLNAAIAAGDYDAIADELDYRhaggKVAKGLAIRSERRAQIFMDASYD----- +>ERR1700753_3857323 +-----------------------------GHRLFDPNDP-------------LRTATITEDH-ADMLALNDLGRVsvyldHALGI------LIGRMSEWQWAALLDFVFNIGSGRFDG-STMIVDIRSNKWDDVPAQFP--RWVHAK---------------------- +>tagenome__1003787_1003787.scaffolds.fasta_scaffold1589885_1 # 3 # 176 # 1 # ID=1589885_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.707 +---TPVVAYFEGRNLLAYLDPVGIPTICEGWTYGVR-----------------LGDVATDAECDQKTRLALQEAADIFQRW---VPakVIARMDTKSIAAFLSLIYNTGPGKPgvkdgfvwlkngRH-STMLLHLQAGRIQPACAQLS--YWVSAG---------------------- +>tr|Q2NQW1|Q2NQW1_SODGM Phage lysozyme lysis protein OS=Sodalis glossinidius (strain morsitans) GN=SG2189 PE=4 SV=1 +----VLIQWHEGVLYTPYRDSGGVLSVCYGHTGAVAI----------------SS----PVSATSLLDSDQKAAMAIVDA-----NVTAPLTENQKAALASFVYNGARGARAL-WwaailagsALLYFHqKAKDARESAIGLK--EAVTETT--------------------- +>tr|Q7UD42|Q7UD42_SHIFL Lysozyme OS=Shigella flexneri GN=S0944 PE=3 SV=1 +----VLItgpggnDGLEGVSYIPYKDIIGVWTVCHGHTGKDIM----------------PGKTYTEAECKALLNKDLVTVARQINP-----YIKVDIPETTVSAPSATSCEnCPVWQQSR-PSILMDTNDEFPDNKRYSLL--PFLFA----------------------- +>SRR3989344_4594077 +---IELIRELEGIHagpaEEPYNDSRGNCTIGYGHLIHRGNCVGSQE-------EIGFKERIKREGPEPLFQEDLEqRAMRYVRE-----SVTVPLSQSQFDALTIYTYNRGSGNLQI-LVDQTQLNQGNHQQIPSRML--TEEHIPstipelEGRRCDEAELFKNGDYTIQ--- +>ERR1719499_2642919 +----------------------------YGYNLRAGSDSV-VAA---CggNRQEflNGGCAKNEGVCDCLLQGSYNTAVSGARS---VFGN---VCTCVNNVLVDLTYNLGRAGIAGFYQFDSYIKQGNYAAAASDLRGTKWCGQVGSRCTRDTAQIARGC------- +>ERR1712185_412189 +-------RKRQGSGqpppcQSRYRPDQRLRRVSLGAVVYVGRWWTQAWGCTK--GITKNMRPGTMAEVGEMLKRDLERFERGVHR---LC--PVPLTQGQYDALVSFSFNTG--SLSV-STLRKKLLRGDYEGVADEFP--RWVFAAGR-------------------- +>tr|A0A258KE98|A0A258KE98_9CAUL Lysozyme (Fragment) OS=Caulobacter sp. 35-67-4 GN=B7Y78_12060 PE=4 SV=1 +--SLLVSLGVLGLAMFGGGVLWiltvasgpgfsQIKMFGYGASMIgaicf--------------------------VVSAYLLLRKLAGPDQDDED------------------------------------------------------------------------------------ +>SRR5579862_6738054 +--FGILLLGALGVALLALAAIWgfqpdaeqvlgVApRTAGMTIGAVgavcl--------------------------GVSVYFMLERLGMPRDRVSRX----------------------------------------------------------------------------------- +>SRR6185436_15561611 +--VLDIVLGLLGLAFFSFAVVWgataradvidGLvspWMVALLAGVAgigfv--------------------------FVAVMRLLQRVARAAEQD-------------------------------------------------------------------------------------- +>RhiMetdeSRZDD1v2_1073273.scaffolds.fasta_scaffold4061113_1 # 2 # 199 # 1 # ID=4061113_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.702 +--RVHLRYQVLGQVYAVLPAARGAPVVGDGYRLPEP-----------------VDVRQRKGYLEALLLYDLIEVAHVVNE-----WTFVPLNQNEFDALVSFAFNIGLENFQR-STTLRRLNEGRPLEAALAME--LWRRADL--------------------- +>ERR1700749_5118701 +---SRLLEPWEEYVGYVYDDKrpkvhghypewdggpvRGTLTFGFGHTDAAGGPKI------------TQGFRCTRAEADERLSLDLAPCVAAVNRL-----LHVQVTQHQFDALVDTYFNCPSA--SI-A-AIKLINAGRINDVPGKLL--QYVCSKG--------------------- +>ERR1700742_1442124 +---LAICMESEgyanrqpDDSCVAYWDDsGACWTCAYGTTGEDIV----------------QSTHWTRDEAQSQLLTRWNEAQQGVLRA---SPIL-GQAenYNRLEAVTDFAYNEGVGRYQG-SSLRAYVDRQSWLQAADEFP--KWDLAGGkvlaglvKRRARERTLFLS--------- +>SRR3990167_11559521 +--LKPFLTKAEAPIMRVYDDlrpklvlktgskVLGTLTAGIGHTGADV----------------VIGLKITPALVDRWLDMDIETAVLRLYNV-VKVDVIDTLTDNQYAAMISFVFNLGAEP--T-WTIWKVLNARQYDQVPAQLA--RFVNAG---------------------- +>SRR6267154_538932 +---------------------TNHPTIGWGHLIKdGE----------------DFTSGITQDQADAMLVADLAYAEGVVGRK-----LRDNAVQALFDAFVSLIYNLGDC---A-DHLFQVANAKDPMALAKLWM--QYDHSG---------------------- +>SRR5579863_5220754 +-------------------------------------------------------QALAQLHADTATAAAAVNLAIHVRL---G--IVASRAQARFDALCSLAYNIGAGAFTS-SSLVTAINvkgaPRDWTPLGAYWL--EWDHAD---------------------- +>tr|A0A2A9NKB1|A0A2A9NKB1_9AGAR Glycoside hydrolase family 24 protein OS=Amanita thiersii Skay4041 OX=703135 GN=AMATHDRAFT_49724 PE=4 SV=1 +--SIDFIKGFEKFADSPYFDTGHKPTVGYGHLCTRDGCTE-----------LGIKFPMTVKQGEDLLQKDLGVSRGCITT---DTTDRVRLNANQYGALVSWGFNVGCGNVKK-STLLARLNKGEDPnkVAASELP--KFNRDsTGavvagltRRRAGEVKLFQTP-------- +>SRR6516164_3870694 +----QVLVDREGFSLTAYQDSVGVWTIGCGHAATSNVPPiP------------HEGMTITPQQAWLIFDRDTDAYEEGVTA-----FLKVMVTNYEFDACVSFAFNVGLTAFET-STFLERLNNGDRTGALEALL--WWDQPPeiIPRRQGEYVEFRDGIYL----- +>SRR6266850_8434744 +--------NGKDTSSMHTTIANdhpvapgdvvhGTLTIGYGHTGSDV----------------FPGLTWSQQEADESLRYDIAAIAGQIAP---L--ITGSLTDNSSPPSCASPTTLGC-------------------------------------------------------- +>SRR5690606_4217366 +--GLKLIREYEGLRMNAYADNFGTWTIGYGSVRWADGRLV------------MEGDKlQNQEEAERLLDYSLQSAIRVVNTH-----VKVPITQNQFDALVSLVYNIRGRNFND-SGMVELLNGGEYDRAGLVLL--QLDDPE---------------------- +>tr|E6KYC7|E6KYC7_9PAST Lysozyme OS=Aggregatibacter segnis ATCC 33393 OX=888057 GN=HMPREF9064_1159 PE=3 SV=1 +--GAQAIGDEEGCRRDPYRCSAHVLTYGIGAAVTGGTMI-------------LENKRYTDEEIAEQYAKDLKKSGDCIML---YF-NGADMNQNQIDALGSVIHNLGCGGARyyydkksgkrlK-TQLYKAALDKDFVRMCNTFT--NYVGVNG--------------------- +>tr|A0A1W5P530|A0A1W5P530_9VIRU Endolysin OS=Pectobacterium phage PP2 OX=1897743 GN=PP2_045 PE=4 SV=1 +--GLENIAQWEKYATRTYLDGVGVPTIGVGSTRwFDGKAP-------------RSSQTASVDEAARLFIRDVKEAEKCVKE---RM-SGNLMPQKVFDSAVSLVYNVGCSGVTwnpkynrQ-TNIRLQANAYNWTKVCYHLG--DFIYSGG--------------------- +>SRR5690242_4232158 +--LIPIVKQFEGCRLEPYWDAAGFPTVGWGHLLSRERNANL-----------NQWHPITQEEADNLLAADLDPALKDAAI---MS--PSLVlIPSKHAAIADFIYNCGPTNYAR-STLRECVDTGDYHGVIAELE--KWNHAGG--------------------- +>SRR5271170_7196512 +--GLQLTARWESCRLLPYLDAVGIPTQRFGATRGLDGSLVK-----------LTDPAWTPAQALMVLRRDALIAARGMES---LV--DRQLHQNQVDSLGCWVFNLGAQRLKT-STLLRCINGRLAAQVRAEWV--KWNRAGG--------------------- +>GraSoiStandDraft_41_1057321.scaffolds.fasta_scaffold397676_1 # 1 # 351 # 1 # ID=397676_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.564 +---FDeiynwlrAQEGPPSYSAVKDGKKKdgtQLYSIGLGHQIQPNELYL-------------MTALLNDEQVLEIFKKDIEGIRLGMNR-----VIKVPINKNQQLALLSLRYNIGGPAFDK-STLLRRLNEGNFSDAAMRFA--EWRLSEGkinqglvNRRERERQLFVKPV------- +>LauGreDrversion4_2_1035121.scaffolds.fasta_scaffold830583_1 # 146 # 709 # -1 # ID=830583_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.693 +-----dqinYIINAEGFSPTAYADPPnqnTTYSIGYGHQIQAVENYL-------------MTATIDQAQALKFLNNDLAPLIAQINN------ANVTFTQGQFDAAIDLGYNAGPGALNG--LIFQFVNNGPDAVAAWLPNHYITINKGAtilqsliNRRNDEVNTWNNX-------- +>SRR5579862_5901577 +--------------------------------YAVTS----------------TTPEISRPDAARLLERDIGGSLGPAIN-----ALGLPLNQSQFDATCSLIYQVGANALDATSHFGAQLRASDWRGAGDAML--DYIRH----------------------- +>SRR3954454_2353319 +---ARFVAKWEGFVAEAMLDtiaSPAVWTQGFGHTHMAGPPNP------------RPGTRWSRAFALKVLAHDLHAAARTVAEK----TRGIKLSVRQRIALISGVFNCGSGILDD-SDIMGPMRRGDWKRVGREWE--DWSHAGGVVVEGLLN------------- +>SRR5215207_669505 +--TLTAIANRFGVSLAALKAAnp---------QIYdpnR---I--------------FPGQVMTIPGSSPA----AGS-AYVVQ---------------SGDTLTAIANRFGVSLA-----ALKAANPQ------------IYD---PNRI------------------ +>tr|B2ASY2|B2ASY2_PODAN Podospora anserina S mat+ genomic DNA chromosome 6, supercontig 1 OS=Podospora anserina (strain S / ATCC MYA-4624 / DSM 980 / F +--TVDLIAEFEGFEPNVCMSSWFDLAVIFKHSDMRGQTSI-----------LLEIQPSAMATSvsklaalrcrIQSRSPRLMRFEQCITA---MITG-ATLNLNQYGALISWSFNMGCGAAQT-STLVARLNKGENVntVLAQELP--RWVYGGG--------------------- +>SRR6476646_338570 +----AHLKHFEGFVGHAYYCAAGKLTIGYGHRVDRDPRY-------------TKASIITEEQGHLLLLEDIAKYTEMAIRISRPAPdkpsILTSASPRRLNAIIDFCFNVGGAAYAG-SGLRSRVLAQDWPEAAYQ--NSLWDKVTDPK------------------- +>SRR5215210_5406398 +---FPTRRSSDLFVPYAYDDPAGHCTFGVGHLVHRGATTAADRT----TWG-TKAKPKTRAEVMRVLREDLAtRYEPAVRE-----AVKKPLLPHQFDALVSLCFNIGTSGFKG-SSVVKRINAGDFRGAADAIL--MWNKPAmlTPRRGRERELFLNATYR----- +>SRR5574337_534586 +---PGTAM----GRPGSSSDGIGPAGRCYGASR-----------------------------CVGRPRHGYSRVGAMLSS---------------------LSLKIGVAALLV------ALLGISLYGMIHAKQ--EQARAEVK-------------------- +>AACY02.3.fsa_nt_gi|133326044|gb|AACY020394175.1|_1 # 3 # 1091 # -1 # ID=472_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.407 +---GALVAKYEPGPHKNKPFWDpygKVWTVCEGHTGG-VD----------------PEHDYSDAECKAFKDADIAIANASLKRC-----LTMPMLVQIEAALTDAVFNLGPQVVCG-STLQRKAIANDWPGACAELD--KWSRGGG--------------------- +>SRR5210317_1927282 +---------------EIYLCSSKKMTWLYGRNIEDRPITDlEWDV-LKKKLL-LRKKTGFKEWADYIFRCDVRKVRIELSNQMisNNLTNFDNLPDEVQVILLNMGYNMGTAKFNpkKWPKFFEAVSNGEWEKASEEMMDS---------------------------- +>UPI0004C6C67D status=active +---------------YIYLCSQGKMTWLFGRNVDDRPITDyEWRM-LKRLLE-LgmEPEAAFDEWADQIFRNDLWNTESGLKRIV-GRDLWNKTPQEAKLIALNMAYNMGLSRFGedKWPNFYRCFREGDWAECAIQMQWT---------------------------- +>ERR1700743_3758051 +--AIELIKRFEGYRQTAAQLPDGRWTIGHGHTLTARQ-----------------GAEGSPDDAAAPRGRAVVAGGCRS-P-----AALRPDRRRA------CAERGGVHAADP-EPVX---------------------------------------------- +>SRR5271165_850263 +-EGYRFIENLESFRPTPYRDSGGVWTWGYGQTGANP------------------PPYVSLFDAEQQLKSVLDHvA---GEI---Y--LPPTYTGAQKTAILSVAYNAGTSAVLS-SSWFKLLLARDLRA-LELWV--KWYlhDKEGhflpglaARRQKEVALFVYGFP------ +>GraSoiStandDraft_41_1057321.scaffolds.fasta_scaffold5121963_1 # 1 # 489 # 1 # ID=5121963_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.650 +-EGQELIKGFEG-FLPTAKWDYGQWSVGYGWAAEP-------------------GETVSLEQAESEFLGRLEPYAQAVRD-----RLGVPVTQSVFNALVSAAWNLGVWGGEL-DPVFHSVNNGDNLGAANKLQ--EYVHAGGevleglvNRRKAEGELIVADIPG----- +>GraSoiStandDraft_11_1057310.scaffolds.fasta_scaffold2030598_1 # 3 # 71 # -1 # ID=2030598_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.478 +-RLFNLISQVEGKYQAVPKWDYMQYSVGYGSGynwdakrpVIK-------------------TDVIDQATAKRWLLLEAQDKYNFVMS-----QVKVPVTDNQLLALASFTYNVGEGAFAN-STLLKRLNAGENKaNVATEFD--KWIYAGGkvsdglkNRRTAEKALFIAX-------- +>ERR1035437_524844 +--VISRLESFEGRVSYPYLCTGGEVTVGIGHAIQTAADALqlTwsidgrsataveTQRDYAsvaaagkgltaRSYAPLTECRMADADIDALVSSDVHNFEISLA---AALPNWSTYPAPAQAALFDMAFNLGLGGLKKFPHLLAAVDAGQWEVAAAQCHR---QGISETRNQQTADLFRQ--------- +>Orb8nscriptome_5_FD_contig_21_1704909_length_1853_multi_16_in_0_out_0_3 # 1612 # 1851 # -1 # ID=246396_3;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.529 +--CRLKIVEYEGVRLEAYMPTPdDRPTIGVGATHIDgKPV--------------TMGMVITMEQAMDLLDEHMRLYRTFYMKALTEESRRTRLNTPRDCAFTSWTLNIGGGAAQR-STAIKRLNAGWIEGACDAMT--WFHKQAGR-------------------- +>SRR3546814_12446088 +----GAIKGYEKFRGIVYPDQAGHLTVGYGHKLQPGEDKQ-------------FAAEIDAATAMALLEIGRASWRDRGG---------------QYGEISVvAVYLKKKKDQKK--------------------------------------------------- +>SRR5512133_2268057 +------PPSTASCSSPPTRGASTTRtatRPACGRSAT-GTRE---------------RATPRWARS-ARLAAAGSNAEAAVNA------VGLALSQSQFAALVSFVFNCGAGTLAP-SRSLGDALRQPGM-KGVPAA--MALYVR---------------------- +>SRR5258708_81692 +--TLATVKR------IQQAAATGTWPHQLARLRRP-RGISCS---------LERGaTDASWIGKGLGLEQAGQGRGACVIRR-----ATAKIIQNQFDALVDVVFNLGGASLsI--SQLLREVNSGNFSdaA--TQFL--RWDHVKgvvvqglLRRRQAEATLFNTPIX------ +>tr|A0A0L0DJ41|A0A0L0DJ41_THETB Uncharacterized protein OS=Thecamonas trahens ATCC 50062 OX=461836 GN=AMSG_08396 PE=4 SV=1 +------TAQNEGCRLCAYLDSRGIPTIGVGLNINAHPEVLeQFGLtaqeVISRGFShpevtcdetptYEPQKCLTRSQVMQIFNQDTYPIFQQCAE-----SWAPNQPQEVAGALTDMAFNMGCAGLEQFTSMRSAIMAGNYQQAATDARNSLWCSQVGNRCEIDAQCIASGGS------ +>SRR5215471_8662363 +--------------------------------------------------EQYPTRCMDKDDAAEIFRKDYkKERVRCAQK------FAPNQPPTVLAALADMAMG-GCKKLTSPPPdmeeIRKNILSCDYSAAADLILELNWCTNVGeTRCNKISNCIR---------- +>ERR1719197_1412179 +----SMLSAHEGYRECPYTDTTGHKTIGVGFNLDSGGARKIcdkLGIPFDEiykgKK--GGGRCMTPQEIDKVLDYTIANAVRESKR------VVtkyDELCCDIRNVVVDMTFN--LGSLAGWPNfvreI----STDNWSQGKwNLLHSQPWCGQVKSRCPALANIVARG-------- +>ERR1017187_9114568 +--LMQRLTSEEGEKNLPYDDATgkpvkapvGNLSWGRGFNLMKCG---------------------SAGLFAVMERYLLDALDSYLQ----SYAWYTGAGDIRASVFLDIAYNGGIHDLLGYPSMMHYAAMDDWVNCAAQCTEKDP-KLDASRYAPLRQILLTG-------- +>tr|A0A0F9SL40|A0A0F9SL40_9ZZZZ Uncharacterized protein OS=marine sediment metagenome GN=LCGC14_0438000 PE=4 SV=1 +---IPLTKQWegtgptfkcsrsaRGICVRAYLdkiPEPDLPTICYGETsltgTRV-----------------AMGDTRTIEQCEAGLSRIMRDLYWT--KYRSG------vtikyMPPQVDAVFTDLSWNVGPFAVLK-SSALKSANRGDFADACYRHT--FYNKSGG--------------------- +>tr|A0A1M6XUT5|A0A1M6XUT5_9PROT Phage-related lysozyme (Muramidase), GH24 family OS=Nitrosospira sp. Nsp11 OX=1855338 GN=SAMN05216428_10164 PE=4 SV=1 +----MGNAIKEHYVDEAMIPVPgDRYTYGFGSTWKEDGTAV------------KKGDKTDPVRALIQLHDQLDkDYAKPVQKC-----INVPTTINEAGSMYHAARNKGAGAFCR-ERaPIWNQAktEEDYKAACESFV--GWRETVkG--------------------- +>tr|N8TZB1|N8TZB1_9GAMM Uncharacterized protein OS=Acinetobacter sp. ANC 3789 OX=1217714 GN=F975_01635 PE=4 SV=1 +----LFTTGYEGWRSKPYYD-TgRVVTQGFGSTLKPDGSKI------------KITDKpITQKQGIEYLTAHYARDAKVFNKS-----LqGIKLSQDEYDLYADFSYQFGTPAWTS-SAMLRNLKQGKYIQACKSLE--NWRFSRvggkK--------------------- +>tr|A0A2R5EFM4|A0A2R5EFM4_9RHOO Lysozyme OS=Azospira sp. I13 OX=1765050 GN=AZSI13_32430 PE=3 SV=1 +----GTLAGYEGWVGSAASPLPgDKPTYAFGSTTRPDGSPV------------QLGDKITPPVGLALMVRTVAMKEQTLKSC-----IKGKLFQYEYDAYISLAYNVGAGDVCD-SSIPGKLAREEYEAACKTIL--DFKKVQG--------------------- +>SRR5690606_5376196 +---AQQIASHEGYRLVAYPDPAtggAPWTICRGHT-KGV----------------YRGMRATHEQCDQGYAEDLRVAEQAVQRN-----VRVPLKQGQYDAMVCIVFSGGEPSLRA-STLLRKVNAGDRIGSCNQYP--RWIYANKmvldglvtRRYEEQAQCLKGGPY------ +>SRR5262249_3417943 +---VALVGGFEGCRQYAYPDPAtlgMPWTICYGET-SGV----------------KKGDRRSIEQCREGLRTGLDRYGGEVDAC-----LKVPLKDEAWIASVSWAWNLGTARyc----ADIAPIFNAGLPHQACETMS--RYKRAAGivmpglvrRRAAELALCLKGA-------- +>SaaInlV_150m_DNA_4_1039716.scaffolds.fasta_scaffold63469_1 # 1 # 549 # -1 # ID=63469_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.366 +--AIACVSTFWRLRTIAYRDPIGIPTICAGETRGVT-----------------IGDSYTVVQYKEMLGDRLVEFEMGRPAC---RKAPDAIPAGACVASLSITYNIGNSLLQV--PPCSQAQRGDIVEASNDFP--KWHTAAG--------------------- +>SRR5450432_1943143 +--ALGIIKKAEagaKVKLTAYQGEGtdTDWYIGYGHKKDVTP-----------------GMKITQAQAETFLRDDVRVCESAIVRA-----VKVPVTENEFSAMVSFCYNFNPRASQL-LSLIDLVNASKQQEAAEFLL--MYDKAMG--------------------- +>tr|A0A219MH17|A0A219MH17_9CAUD Putative endolysin OS=Dickeya phage BF25/12 OX=1698708 GN=BF2512_08 PE=4 SV=1 +---ISVVKHNEGYSESAYQDGAGVWTICYGDTRGVK-----------------RGMRLTMDVCDAQLRDSIAEHTKALAG------LPESTPDVVILGSIDMAYNIGVSGFST-STQKKHLLNGDYNKAGYAVLSWRYITLPN--------------------- +>tr|E1Y3X3|E1Y3X3_9CAUD Putative endolysin OS=Pantoea phage LIMElight OX=881915 PE=4 SV=1 +---TVVYHEGTGISkdglAHPYRDSASVWTVCSGDTYDVV-----------------PGRAETPAQCQARLRKSIEEHAQALSG------LPERTPDYAVLAAVDFAYHVGVYGAKN-STTFKLLEAGDPAGAAAAIGSWKYITDDS--------------------- +>SRR6266702_1069180 +-------------------------------------------------------------RT------RSRRAGASIACR-----TRMPPSSGrSATAISSKAANGTTRSTRR-RRPRSRRRSSRAGPERAAGSMWSWLNGAnarqsflPRGTTHESDRKAVG-------- +>ERR1700735_2944168 +-----------------------------------ycDGNPIS-----------ISDNEIDENKGTELFMCFIKSkIQTCFDK-----YITQELAQYQRDALGCLVDNIGVGAFEK-SALLKIVNSDpNSPDVTKHWQ--CWDKVNG--------------------- +>tr|A0A2W5R5Y6|A0A2W5R5Y6_9CAUL Glycosyl hydrolase OS=Phenylobacterium zucineum OX=284016 GN=DI570_09950 PE=4 SV=1 +---APGLVEDEGWVLKTYPDPVSIVTACGGVTGKNI----------------KAGVTYTFEECVSLTSRAMLEHGLAIRPC---LSGAAMERPQTYGAFIRFAYNTGAPGFCK-SSASRRALAGDLAGACRALQQaddgrPIWVWATK--------------------- +>HubBroStandDraft_5_1064220.scaffolds.fasta_scaffold2802128_1 # 1 # 315 # -1 # ID=2802128_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.660 +-DAVELIMEFEGCVLCPYKCPADVPTIGFGATFYQDNSKVK-----------MTDPCLTQKQAVDLLQYHLQYFWNFQE---TSIPFWNEMNDNQRGCLLSFSFNCGAAFYgnSGFNTISACLRERRWSDVPDALM--LYVNPG---------------------- +>InofroStandDraft_1065614.scaffolds.fasta_scaffold445672_1 # 3 # 170 # 1 # ID=445672_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.345 +-IVPELMNRFSGHPASSFDAQFCN---DFNALLRQTGFDTD-------------------LTAFRMlLAqmAHESANWLYMkeC---GSTAYFTAMYENR-SDLGNTQKGDGARFSgcGPIQCTGRanfsdsykylqqmeGLNDPRFMEEG---T--PYVSEK---------------------- +>SRR5690606_26153082 +----------------------------YGFTWRNPVFRDWWMAKHGR--KMRRGDTITQTDAYAVLLTILVHDALPAVQ-----NKRGRQALNVIEAGCSAVYNLGPGALA--WKWAKSIVRGAVAAGAALLRK-TGTTAGGkrlpglvRRRGEEADIIEFNRX------ +>SRR5210317_2162172 +-EMLKGVKFFEGFRSKPYFCCAGVKTIGYGCTDKNI----------------VSRGVISENKASDILHKNLKIIQGKVLE-----EVNVPLSDSQLCALTSFAYNVGISNLKKLINGPNRLNEGNYESVENIMP--KYRIAGG--------------------- +>BarGraIncu00222A_1022003.scaffolds.fasta_scaffold170019_1 # 2 # 637 # -1 # ID=170019_1;partial=10;start_type=ATG;rbs_motif=AGGAG/GGAGG;rbs_spacer=11-12bp;gc_cont=0.593 +---IDLLKGFEGLKLNayLC---TsNVATIGFGSTFYADKT----------------PVKmgdkLKDKKAAEELLKVTLQSfESTING----LFYNITLNQNQFDALVCFVYNIGPNAFAASTLLKKAKVNPNDKAIQLEFN--KWVNSG---------------------- +>SaaInlStandDraft_4_1057021.scaffolds.fasta_scaffold695938_1 # 1 # 213 # 1 # ID=695938_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.371 +---SAFILAGEGVRQTAYPDPGtggRPWTICGGHTGPDVK----------------PGDYRTMAQCRELLRQDAELHANKMERC-----TTVELPDKRYIAFLDFTFNLGPGRYCQsi----APLVNAGRTREACDKLL--EFNRAAGivfpgltRRRERERAYCLEGIX------ +>GraSoiStandDraft_25_1057303.scaffolds.fasta_scaffold4563826_1 # 3 # 224 # -1 # ID=4563826_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.514 +---FHLVRSFEGLSLTAYADPAsplartgrgsgEPYTVGYGHTGPDVT----------------PGLHITLAKADELLNEDMAKAAQIVRDA-----IKEPLTASEFSGLTTMALNLGKIP--Psiraclnggttdk----GVKMEPGSYGSALAQFP--RNCRAGGipmrgllRRRLAEACLYSNLPWE----- +>UPI00050FAE6B status=active +---ATGIIAGLP-----IWQRAlpgDPWTQGYGLTGPNIK----------------PENGAWSeRVARINLEARIEEKLEDIQRR-----NRVVLTVGQLAALTCFLDNVGPGKTGEkgkdglfalkganrPSKLWQLCQAEDHAGAAKQFA--LWTHAGGkelpglvRRRAAEKNLYLTGAWR----- +>SRR5690242_3730931 +--GASFIEAWEGFVDHCYDdggSPGvGNCTIGVGHLvhrGPTT----------------AadvaQWGTITLDRALALLQADAKSNgADAIRqS------ITVELTQAQIDSLICIGFNAGPGCLAAGKAIPTAVNGKPADASGLDA------------------------------- +>19_taG_2_1085344.scaffolds.fasta_scaffold314862_1 # 1 # 354 # -1 # ID=314862_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.514 +--FGNYLLPFEGFDEVARKGPGeKYFTLGHGHYGPDV----------------KEGQTISRKDAALLFQKDIATR--------------IPQIQKLIPKFDSFPASAQAAMFGE--FYRGSLTNHKKKGSPKTIR--LinegkFEEAS---------------------- +>SRR6476646_5410095 +-----ELIGHEAIVQTRYRDTKNIWTIGVGHTKAAGDPDPA-----------AYTEVMPLKEVFKLFQKDMQNYADDVNA-----ALKVAVSHTEFRSISTPAAS--------------------------TKR--AWLNPstPETRRRRPSNSX----------- +>SRR5665213_941418 +------------------------------------------------------elvvvERRMDVGEDVALIAADVAWVQRDILA-----YIHPPLDHHQFDALADLVFNCGPAPLTS--TVGEFYNEKRFADAAQAMQ--ASCHSAgavvPglvRRREAEQQLILHG-------- +>tr|A0A0A0YWA6|A0A0A0YWA6_9CAUD Endolysin OS=Escherichia phage Pollock OX=1540097 GN=CPT_Pollock71 PE=4 SV=1 +--GLKFTAAWEQLRTKAYRATQneKYLTIGYGHYGPDVK----------------PGDTITADGALNLLYKDMAKAVRKASEL-----VHPSFNQAQFDAIVDLCFNVGIGVIEKDNVlgdFDDAVRLGDIPKAREIMG--QFIYQNK--------------------- +>ERR1700761_4794854 +-KGLDFIKENEGYTPRPK-DDNGHLMWGHGHDRVGFE------------------evpEFISFEDSDLLLMYDLHQRFDPHIS---ELA--PWATQNQHDALADFAYNEGLSALAT--M-LHHGQ----DQVTKQLP--AWCYEHvG--------------------- +>KBSMisStandDraft_5_1062788.scaffolds.fasta_scaffold11723368_1 # 3 # 212 # -1 # ID=11723368_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.529 +-DMVIKFEESGTFAPMPVQDPTGRWEIGYGSIWDWRTGdepssvT-------------QDTTPVDEPTARLWVGYELRRAAATLAA-----DVNVPLTDGEIAALEDFIYNLGVGNFEG-STLLRYLNAGNYQAAADQLG--RWVYAG---------------------- +>SRR5471030_1614327 +--GVNLIEKLEGFKSAATNIGPSLKVIGYAHTLSTEELSSGKIQVGS--ESIRFQDGITEGQAKRILDKDLAPCYANIHTL-----VKVPLTQGQMDAMAIFCFNIGIIRFSR-NDLIQHLNEGDYAAVPEDMK--KLTIAG---------------------- +>SRR5699024_5168237 +---ADLMKRYEGFSSAPYIDMVGVTTIGYGNTYYPDRRKVK-----------MTDKHLTEPQAEQLAMDIINLDFAPAVN--KIFKdeiASGKLNQNMFDALVSLAYNIGTSALANSNSVTGNIKKGNYQAAADGFL--LWNKGR---------------------- +>HubBroStandDraft_3_1064219.scaffolds.fasta_scaffold2349515_1 # 3 # 329 # 1 # ID=2349515_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.633 +---LQkvqsnvnagaiapILKHAPQYGIVtkkqmcAFIATCIVESNGFNakresFAYKPERLRQV-----------FPTRIESIAAA----KALIAQGQPAVAN--HLYGnrygnrpntndgwdyRGGGLIQNTFRSNYYELQQMTGIAFGDNPKLIESMD----NAVIAAMA--FWQVKG---------------------- +>tr|A0A1E7Q831|A0A1E7Q831_9GAMM Lysozyme OS=Rheinheimera salexigens GN=BI198_12760 PE=3 SV=1 +--AGAFIAPLEndtndPAKLAVHADPIGIPTACYGSVKQ-glV-----------------IGDAFTVEQCDQQFAADLKLHDMQLRNAV-KVK----LTEQEHTAYLSFHYNVGAFNF-KQSTLLSLLNSDMRIDACIELTHacssssntcKGWIYAG---------------------- +>ERR1740117_145500 +--NPPL-PTPHAKppvSICWEGEATG---FGQKE-QLFVSLNN-FNVTSQgMDITFqaAGVSHVTCTHRS---------AVKKGQ---------------------DITIDLNG--CSK----------------GTKIDDVKYCSDTDTIEIKVTDIVTIGA------- +>SRR5690606_37050221 +---VAGFIALEGLETQAYLDVGGVPTICAGTTRGVE-----------------MGDTKTQQECVELASEEVERFTHILYKH-----TDMDIPPDTEAGLVWFIANIGETQFVR-DSLvYKRLQERDLPGVCQALI--RPGPNG---------------------- +>SRR3954463_7847234 +---GDHPAYQAQRRLRAQSLSGQRWGLDHRLRPYRARCH---------------RRPEDHPGTREDLLrQDLKTFQDGVDD---FL--EGDTSDNQFGAMVSLAFNIGLGNFRK-SSVLRDHNASNPKAASDAFL--MWDKAGG--------------------- +>SRR5262245_42479953 +--AVPVAQAFEGCLkpvpsrpgfFTTYYCPAGVLTLGYGHTNLGNvPPHI------------EPGDVWSKAEVDQAFVNDMTKFEKHVSA---LAP--GISDQGMYDALVDWSFNTGG--PAN-SSVWPYARAGDVEETCARLL--RWNKAGGrela--------------------- +>SRR4030067_35825 +--GLDLIKKWEAFYAHAYDDGEGVWTIGWGTIRWDLKTPV------------KKGDVITREEAERQLRKELQRVEDAVDSA-----VRVPLRRRPRWS---HAF---------haRGEFWQALRR--ASQSPQGRG--AFVAR----------------------- +>AP92_2_1055481.scaffolds.fasta_scaffold595246_1 # 3 # 350 # -1 # ID=595246_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.290 +--GFDIILKYEGLHdgdlkqigLQPKMDPIGIWTEGYGRAMRDSKGnFIRGTASKKLAY--ANISMHTIKEALEALKSDLSTYEAIVSK-----KIKIPLTKNQFDALVSYTYNTGGS-----STLFRYINTYQGLdKIYQWFTT-KYITADG--------------------- +>SRR5581483_2047003 +--AVAFIERHEGVRYYAYNDPSGIAacTIGAGHVL-DWRYCT----------RAQLTSRISAAQVADYLRADTNTARGCITR---A--ITAHIGQPEYEALVDLVFNAGCGSLT-YRDVGGLVNAGETARLPAAWE--NTATTAG--------------------- +>SRR5690554_5273752 +--GKQWLKDVEQLRLKPYNDQtgqditswVEGATIGYGHLISRDDWHL-------------YQGGINEEQADQLFEKDLVPYLEVVNS-----EISAQLSQSQFDAAVIFSYNIGTNAFRN-SSAVKLINDPEATTLYSSVEEawKAFNRSQGvvnqglvNRRAAEWDMYSKGEYRR---- +>ERR1719198_970524 +---MQLIELSEGYRSCMYKDAFGVPTICWGYNLHNYNARQEVADAG-SDYDKlVNGGCTDRKVCDYLLNLYVTRSEQYTRN------IYGNLKcKWAQAVATDMVYNMGEIGMKTFEsqAFNNYLKQGNWNAAVNALHPTYWCKRTATRCVRNMNQLKQ--------- +>ERR1719198_2199636 +--------------------------------------KQQVAKAG-GNYDDiMN-gQCTTQSVCDNLLSYMVSNAESEERT------IYGSLScSAAQAVAVDMTYNLGPSGMASFNTFISLMKQGKSMRLLKMVKA--LLGAVKLEIDA-LET------------ +>tr|A0A2S4SK04|A0A2S4SK04_9ENTR Uncharacterized protein OS=Enterobacter cloacae complex sp. ECNIH11 OX=2080662 GN=C3394_23135 PE=4 SV=1 +--CVNNLKKHEGFKNKMYKDTGGNITVGVGHLLATAEMAAALPFkktaTInqghgnaedidkavsagdiKAAFNKyatdssaIPNIHLTNDAVISECIKDVQITETGLR---SLYTGYDAFSNDRKVALVDMGFNIGIPKLKsNFPNFNAAVNRGDWPTAAVESHRTGLDDSRNPRNKDTHDQLLS--------- +>SRR3712207_7527537 +------------------------XMNDHAIFLMIRR--------------PPKSTLFPYATLFRSLWEDAGTYAAGVRAA---L-PDTKLTQAQFDALVSLSYNIGNGGFRD-SSDRDVLRseEHTSELQSRQYL--VC---rlllekIKKRTVWIDL------------- +>SRR3712207_9359182 +---------------------------------------------------------CHSTVAFILFFF----------LM---I-RRPPRSTLFPYTTLFRSYNIGNGGFRG-SSVRAALAadEPDYAAVPDRML--RWVSSsgirlagLQRRRINEDRKSTRLnsshaniSY------ +>AntAceMinimDraft_9_1070365.scaffolds.fasta_scaffold773467_1 # 1 # 294 # 1 # ID=773467_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.367 +--MAQATAVfgrapstsqladLNSCLTRFnINTPARIRHflaqvghesggLKWMLELASGDAYegradlgntrA------------GDGRRFKGAGAIQLTGRYNyQRfA-DYIK---DPDVMdggaYVAVRYPFTSAGFWWHLNAINAFVdqGA-SCRQVS-A----KV------------NG---------------------- +>TergutCu122P5_1016488.scaffolds.fasta_scaffold1929144_1 # 2 # 265 # 1 # ID=1929144_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.383 +--GTTLKTEfsnnwdgvlaaasQAGARYP--ECVAAQWAleSGWGSHVSGTNNF--------------FGLKGKG--GT-----VVntQEyI-NEQW--------lTIRDGFIDFPNLYSSVQ-----YLvDRWYKDYKGYKGVNRASNRNECA--KLLVKE---------------------- +>SRR5690606_19751008 +-------------HVA-------EYTIYFfrfSSR----RRHT------------SFSRDW-----------SSDVCSSdLYQ---AINKAAVTPSPAELGALTSLAYNIGLGAFQks---TLLRLYNAGEKAGAAAQFL--RRNK------------------------ +>SRR5262249_49179505 +-----LIQQCEGLSLTAYLCPAg-RWTIGYGHT-----DGV------------QQGDKLTRGHADTLLKTDLASY-GkAVD---VAL-GACEVNQHEFDAMVLLVQRGHRGlqGIDraALAPPGRSPRRGPCLRHvEQGHG--EWL------------------------- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold00006_315 # 353764 # 354828 # -1 # ID=6_315;partial=00;start_type=ATG;rbs_motif=AACAA;rbs_spacer=8bp;gc_cont=0.438 +--MDAAKR--AGARYPELVAAQWALESDWGRKPTTA-S-----------------------------SHNYFGLKGpGTK---VKTQEVIN------------------G---KTVTITDEFLCFRDLDDcvTYLVD--RWHR------------------------ +>GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold5591379_1 # 1 # 201 # 1 # ID=5591379_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.572 +---------------TE-----------AAINRHRGGSQT------------PP---------DVVF--------STIP---NPPPELQELRlrLREVLAKIGYCMAAGMG---TDEQVLRHDLSVEFPDEgtEVRL------------------------------- +>SRR5256885_3737265 +---LKFIGNCEGFRGKLYNDVSGNYTIGYGRSEEHTselqsPCNLVCR--------LLLEK-------------KKKFMLSVLPG-----GmDIPRLPSPPSDRTSARPX------------------------------------------------------------ +>SRR5690625_7484018 +---LEHLIKLEGGAqLVMYNDlggQKGHCTIGVGHLVHKGVCNGVV----P--SEKPYLKGISLSKAKELLKNDLRSEERRVGK-----EGRDRWPQRDEGRQSAX-------------------------------------------------------------- +>tr|A0A0D5LS60|A0A0D5LS60_9RHIZ Peptidoglycan-binding protein OS=Martelella endophytica GN=TM49_17255 PE=4 SV=1 +-PGAAFVRGHEGAPTVAYLDPTGTPTLFAGFTMNSPYCRLELAKIGi-TK--IVPgKTKISVADGDRIFRAVLNqGYAKEVVA---N--SPADRTQYQLDAATSAAYNLGGRVVSTW-RFGNLWRAGKLKAAADYLA-THYntskGKKLPglvRRRQEEALLFEQGVYTG---- +>SRR4051794_26467368 +---MRFLMREEEFRSKRYGDLSKNCTIGYGHRC-----QA------------GDPQTVTRAQARVILARDMDAAARTVRR---VAP-GAPLLQQEFDMLVSLTFNNGAAPFVRRTKLRADLLAspPRYASMPAHMLPFSgFHDRNGK-------------------- +>GraSoiStandDraft_15_1057317.scaffolds.fasta_scaffold4202054_2 # 159 # 272 # 1 # ID=4202054_2;partial=01;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.535 +--AITLIQQFESCHRtsdvddliHAYPDPLsgdAPWTIGWGTApaipMD-G----------------RGVERCDQPRCGRFDlrGAPAGRLLAAPGRT--G-GDQgqaGRTTGTT--GRGGAQLDPGVGSLLG-ADPKPL---GX--------------------------------------- +>SRR4051812_2526447 +--QLAETKSFEGFRSRPYQDAGGTWTVGYGETNAKVVNA-------------ARLVPLPEIVAARMLRRRMDrDYGVHVDL---L-G-L-PLTRAMYDALTDLAYNKGPGVLRD-EGpgsLGHALRLHKWNAAANHIL--AFdRDRNGtrlaglvRRRAANRTRFLSDLP------ +>SRR5476651_2405217 +--VIARLKLFEGCINHMYQCTGGEVTIGIGHALQTAADAArlpwqrdgasaesgQVQSDYAavaaapkgqpaTAYARLTRCRVRADDLENLVAADVQSFETQLA---AALPNWSSIPTRPGKRSSTW--------------------LSTWDSEASRS------------------------------- +>SRR6202035_5300818 +---AAARKLKEGFRSSAYQDSTGRLKIGYGFSIAD---------------------GISKAAAGALLLAQVQERAKALEE----YEWFQGLDDMRASVFVDLSLDNGIAGLLHLTKTLASAGDKNWPATRDALLDSKAAKANPARFGVLAQLLLTGA------- +>ERR1035437_299371 +---LNAIRAYEGRALRAYQDVVGVWTIGYGITAMDKGLGFK----------VGPGVTITADQAEELLYSSLRRnYMPKVEQ--VLNFDRCPNPQGAMDGGMSFHFNTGGI--GR-ASWPAQLMAGNMSSARTAFE--SWNKAGGkvvgglvRRRASEWNIISKGDYG----- +>Tabmets4t2r2_1033128.scaffolds.fasta_scaffold661249_2 # 171 # 329 # -1 # ID=661249_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.736 +--GIEFIKDKEELVLFVYDDMaqyppvpyvegtdiVGTLTIGYGHTGVLTGTqQAVI---------DLIGKEITEEEAEKLFEDDLAKAQEVVNKR--IKRhiaipkfiekYGkfevNELEDSAYDYLVITAFNRTGLN--H-ASITGNLLTGNFDEAHAKVVEL---------------------------- +>tr|A0A1R4LQ83|A0A1R4LQ83_9VIBR Lysozyme OS=Vibrio ruber DSM 16370 GN=rrrD_1 PE=3 SV=1 +-----ITGTLEGQRNQAYQDPGGVWTVCFGETAGVRQ-----------------DMSYSDQQCAMMLASSLNYHNEPLEN-----LH-YQLPPNVHIAALDFSYNLGTNALRR-STLYRKLKQRDIEGACQEFN--RWVYLN---------------------- +>SRR5690625_4462925 +-----ASVAllvgLEGYPGKPYYDVAGILTDCYGNTKNVSP-----------------NRIRSDLECRELLEGEALRIGEFVYadV-----PAEHRIPKNTLAAVISWTYNVGDGAYRG-STLRQYLRRGDWAAACHQMS--RWVYIT---------------------- +>SRR5690625_5350568 +---------tplvLHSFPTRRSSDLAGVLTDCYGNTYNVSP-----------------TNIRTKQECTKLLNDEIYRVGYIILedN-----K---DIPVSILAAGISLVYNIGTTAYSK-STFRKYIIRNEFYNACYEIP--KWKYIT---------------------- +>SRR3990167_9480122 +--GLDLIKHFEGCPVdketglaIPYDDN-GSPAICFGHSNRSRKPPIV-----------TDSLRLSVAECNRILAADLVEYEDDVKR-----IITVPLKQHQFDALTSIAYNWGPGNLDR-SELKNMVNAGKHDQAQAEIR--TLLPPKT--------------------- +>SRR3989339_379235 +--------TLDGINefqrLFAGPLiitsgtdgehsilNEMNHATGYKLDIAPAEGL---------------GTFIRVNGTATGLVSGYpsyTIDSVDGYRYriLDENDHWDFLITRRX-------------------------------------------------------------------- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold4453107_1 # 1 # 414 # 1 # ID=4453107_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.674 +--------EKGGLIefqfASNLAkfidhaksegynIrvtslndgvhaEEGTHPIGRGADIGGLSAE---------------ESRKMGEwwinqgNNGKRYIDGYkdnHLHID-IGKPpkX---------------------------------------------------------------------------------- +>UPI0003CFDE5B status=active +--------AKCGLTeeerVG--PFrccekKLTVDTYEYKYALLPLNQS---------------CND----VCGSGFGSGW-----------------VLKDPDKC------KQSLGYX------------------------------------------------------- +>BarGraIncu00222A_1022003.scaffolds.fasta_scaffold209936_1 # 1 # 603 # 1 # ID=209936_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.723 +--------TM---------sqhvstgysYtcccdSEIGATWSFDPGI---EKQ---------------AGDASAS-----LN-Q---------------------------LLACMkqKLPTGVGRISS-------------------ISDSNYIGTPSSCYSTGDA-CKLLNPP----- +>KBSMisStandDraft_5_1062788.scaffolds.fasta_scaffold3675940_1 # 2 # 403 # 1 # ID=3675940_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.637 +--------GIGGGG-----vgagvatgysYscccdSEIGATWSFDPGI---EKQ---------------TGDASAS-----LN-S---------------------------LLACMkqKLPAGAGRISS-------------------ISDSNYIGSPSSCYFSGDA-CKSLNPP----- +>SRR4051812_28001742 +---LIELANYESMSLKPYLDSGGVKTVGIGSTVSDIKDLPSW----------PWTKEISPQEAVSIYQDHIDVYVMAVDR-----KIKREIPQNKFDALVSLCYNIGVGGFSG-STAARLVNEGANdEDVCNAIK--RWNRDNGavvkglvNRRAKECELYTTGKYN----- +>tr|A0A1X2I864|A0A1X2I864_9FUNG Lysozyme-like domain-containing protein OS=Absidia repens GN=BCR42DRAFT_421156 PE=4 SV=1 +--GLSLIKELEGWNATFYDDGRDKFTIGYGHNCDADSTHC-----------ANIHPPISKAEGESLLKQDVSGFEKYVCKT-VVHDLKCPLNCNQFGALVSFVYNVGTSPNG-FPRSKLYEDLANNCNYKATSN--DWTS------------------------ +>Dee2metaT_18_FD_contig_31_3607753_length_254_multi_7_in_0_out_0_1 # 3 # 254 # -1 # ID=1323530_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.488 +---VAGIAVHEGYRGRAYDDGVGVRTMGFGGThHEDG-RQV------------QTGDTVTPERAVVMLARDADRIWREAAAC---I-GDTPLYPREAAAYQSLAYNIGASAFCR-STLVKRLKqtPPDYAGACREIL--RWTRAGGR-------------------- +>tr|A0A099U2N8|A0A099U2N8_9HELI Lysozyme (Fragment) OS=Helicobacter apodemus OX=135569 GN=LS72_10875 PE=3 SV=1 +--------------------------IGYGRNIESNPLSVEENKL-------LKDGKVSKSVAMRWLKEELSRSYDSLDR---NFKFFKALPLKKQGALVDMVYNLGFSKFKTFKNTLKEIELRDYEKAAQRLEASLWYKQVKNRGKVIVGFIRG--------- +>UPI00084A981F status=active +---GPLVSRFEGERRVPYQDLNGIWTVCRGETHVE-------------------MRTYSPAECEAMFVKSLTEHGQDVQQC-----LPDDLPP-rIAASAWSIGYNMGATKFCK-TNFAKRLRAGERDSACAAIS--ELTTINDG-------------------- +>JI91814CRNA_FD_contig_61_2199860_length_798_multi_1_in_0_out_0_1 # 1 # 474 # -1 # ID=639171_1;partial=10;start_type=TTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.614 +----VDYIEQVENPNLKYGMihksaEg-GNDTIAFGHKLTDKEiknnkV-----------------YGYNLNeltakNAKHILLLDLQKADEQlqADY----GEKYNKLDKKRKQMLIDFQYNMGSEGVKQFKNFKE----GLFSNDIDKMKkeyergftnke-GEFKKL---tnRNK---EFFN---------- +>ERR1700742_2055056 +---IALIQFFEDYAEKAYRKfSHEPWTCGYGHTRGVTE-----------------ITTCTPEVALQWLKEDVAEVEAVVNA------LGYELTQHQFDALGSLGYNIGGHNLVSATGFLAAVAAKRYVDAGERFL--SFDHID---------------------- +>DeeseametMP0441B_FD_contig_123_15086_length_451_multi_10_in_1_out_1_1 # 1 # 450 # 1 # ID=32296_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.420 +-----LIQEFEGaPRLKARLCEGNSFELSYGVTFDLNGNPF------------SADSTCTPEESDALFRNALELFERGVREL-----VTVPLNSNQFSGLTAFAWNIGLANFAT-SSVLRRVNENRMDDAAACFG--MWIFAT---------------------- +>ERR1044071_4782955 +--LMNMLPVFEGNIPWLYLDTRGFVTVGIGHMVPNVSWAQqipfvvpatgapasseVIDTGFQaviaankgrtpKYYRQFTNFNLHPGWsVEDASERLENEFLPPLK---AQYPGYD--------------------------------------------------------------------------- +>ERR1719163_233137 +--DTELIKSGEGdgsihgRRFCSYRDHLGYKTICYGYNIDHNGCGPL------RAVGAScNPDCLSESQCNRLLDNTVKSARSIAKR------VMGNHCSCVQAVLTDMAFNLGEGGLSTFRNFKRLVDKGDYAGSIREVQNSQYCGEVGPRCQRNMNQLRSGC------- +>SRR5262249_51183750 +--------------------XGTgrePWTIGGGHTTAAGGERV------------MPGMKWTLGKAVQVARQDLARVDARVNK-----AITRNHTQNVHNGFGSAGFNTGSILT---GSYAKKWNSGDESGALATLG--QYVNAGGhrmqglvTRRQEEIRIIRDGVYPT---- +>SRR5260221_3890269 +------------------------------------------------------------SFAGICLTSKMVQVEHQLMTC---T--IRTPTRHQAAALLSLGYNCGVGVhdgvagdIAD-SHLIADFNAGNITSCARRFL--DWDKG----------------------- +>GraSoiStandDraft_2_1057267.scaffolds.fasta_scaffold1378252_1 # 1 # 237 # -1 # ID=1378252_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.662 +--TIAA--FFRGQGITDlkalavilgnlqqesSLNPLA--CEAYGGPAS-----------------SLYNCPIKLSGsyymtGVGLLQwSDPGNlpgRRtnFfnyCKNN---RLNpDSVATQLAFLAQESQWQKVLPCFQATG-KDmg-TVWNNAGATdnTywKCAARWT--GWGIAGNratyatqWYN--------S--------- +>SRR3546814_2170482 +-----FFKQKTAYEmliitwssdvcssDLAYPDPasgGEPWTIGLGSTGRWEGGMI------------GPGTVWTVEQCNARFERDLERFAAEVARV---I-GDAPASQAQFDALVSFHYNRSE--ERS-EGKEGVS-PCRFRGGQDR-------------------------------- +>tr|A0A1F5LNR9|A0A1F5LNR9_9EURO Uncharacterized protein OS=Penicillium arizonense GN=PENARI_c005G04088 PE=4 SV=1 +-----IVAAAEKEKGLPYkwgGGGCNGPT-GGGfdcsGLTQYALCQA-----------LDKKIPRVAQ----------DQYNTSMGK-----------RyprsEAKEGDLLFWATGGDCKNGVS-HVGI-FIKDGLMInaAHTGTPV--ReqaiWTSY----------------------- +>SRR3546814_19471118 +----TLFPYTTLFRSNTYLDPVDIITSCYGHTGNDL----------------KQGQTFTDEQCLDQLSKDLGEANDAVNN-----VIHVPLTSWQRAALISFKYNVGQSNLKS-STLTKAFNQKQYEADRTSVV--KVKSVS---------------------- +>SRR3546814_3380201 +----FLVAPSEGYNNNTYLDPVDIITSCYGHTGNDL----------------KQGQTFTDEQCLDQLSKDLGEANDAVNN-----VIHVPLTLWQRAALIRSEEHTSELQSLMRisyaVFCLQKKNKKHYETQX---------------------------------- +>GraSoiStandDraft_49_1057285.scaffolds.fasta_scaffold661959_2 # 113 # 454 # 1 # ID=661959_2;partial=01;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.643 +----------NGLASRTQGAFGGTLTVQTAptvtynavkdtyqftvtLTGATA----------------SvtgflkagdqikFtNTYWLQQQSKQVLYNGSApisftatvlsDANSTAGG-----AVTVTLSGVPIYDTTNPQYNA----------VSRQLAAGDAVTvigtAGQTMK--PNLFYN---------------------- +>SRR3546814_11471690 +------------------------LAGCLRFEGAGV----------------VRGKTDTAAEGEALETHYIARMYDRMGRC---VPDS-RMTFNEIKAWGHFAYNVGETNFCK-STAAKLLNRGENAAACKQIP--RWRFIKG--------------------- +>SRR5690606_39232184 +-RCVDLVAHFEGCELKAYLDPVGIPTIGFGATFYPGsGRKV------------RMGDEITMRQAKEMLADLLVKFTGMVLS-----KVKRPLYQNELDALTSFCFNAGTsdksgGQWKD-YNIWKHAQART-------ITKEYWETLaVTSGGKRLAGLVRRRKA------ +>tr|A0A063C6L4|A0A063C6L4_9HYPO Lysozyme OS=Ustilaginoidea virens GN=UV8b_3074 PE=4 SV=1 +--TLDIVKEYKGFLPLPNARERepqltlfgrsekdkaKRVKVGYGHRCHNETCAE-IG---------FPF-PLTHETATVVLRKDIVTAQNCVTV---YLDGKRKLNMNQYGALVSWANDIGCAAARR-SLLLRSLYNGRDAnrTIVRELP--KWTKEHGyqiperyASRKREVMLALT--------- +>tr|A0A1I3VR79|A0A1I3VR79_9RHOB Lysozyme OS=Celeribacter halophilus GN=SAMN04488138_11672 PE=3 SV=1 +---WAYTSAWEGYENCVYLDSGGYPTVGVGHMDPTL----------------KVGDCYSDEQIEAWFWEDMAEKVDKPLS---HCITNQSIPENTVMAVRDWTFNVGGGAACN-STLVRRLNEGRLREACEQLP--RWVYVKG--------------------- +>SRR5437868_3601378 +---XMLVPNWRAVLLCAccvrlilaiviiqaldatmpllglaLPLPegwLPRGTICDGDTK-GV----------------KPVHVETRAGCGARLVRSMGEFRAQLVVCIPGFA---GKPLSWRSMMNSLSYNIGSGAACR-STAARLGIAGRYV------------------------------------- +>SRR5690606_874822 +--------------MTTFVDIePGQWVLAWRPEY--F----------------FPG--EEMAGCAERLQYRGAGWDHMLYASRFEILLVTNVKPKTFFAIS---KADGTGASPT-R-HRvRKVNAGDIRGACNELP--KWNRAGG--------------------- +>EndMetStandDraft_9_1072997.scaffolds.fasta_scaffold5504554_1 # 1 # 207 # -1 # ID=5504554_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.662 +----VVTGMcpgPSGYICCENgsPTPsgkgakivaaarsmVGKYPYSWGGGnnngatkgtkqtsspyCDDRNivgfdCS-----------GL-AKYSVYQGTGHSLVHHAQTQYNDCTNK---V-----AIGDRQPGDLVFFGnsattihhvaIFVGSGKMVE-APGhdsqchgilVREITLRTS-gLINKACR--LWCSASP--------------------- +>tr|A0A2A7MJ58|A0A2A7MJ58_9CLOT Lysozyme OS=Clostridium neonatale GN=CQ394_07880 PE=3 SV=1 +NELCDFIGKFEGCRLNAYYCPSGILTIGIGCTRKEVTT----------------LGRITKEAAYEEFKKDAKVFLDQLTEL--SRKNNVILDKYEKEALISFAFNCGINTLEK-STLWNNIKHNikESSIITENFL--RYVKSSkGevlqglvKRRKAEAQLFLKKVY------ +>tr|U2Z361|U2Z361_9RHOB Phage-related lysozyme OS=Loktanella cinnabarina LL-001 GN=MBELCI_1554 PE=4 SV=1 +---LLEICEHEGIVPAPYYDSVGVLTYGIGHTKNAGGIDPaDLPRGMPA------DLDAAIDHAIEVFRADIASYEARVNE-----AIKVPLAQHQFDALGSFDLNTGGIYR---AILTRQINASDPK-ASEHFF--GWLRPPEirKRRTAEKRLYETGDYDH---- +>tr|A0A0R1XZY3|A0A0R1XZY3_9LACO Uncharacterized protein OS=Lactobacillus pontis DSM 8475 GN=FD34_GL000645 PE=4 SV=1 +--GVAYV--WGGNTPAGFDCSgLVQWAYGLGANYRTTYQQTT--------LGAHHYDVYNAPkGALLFFGGDSAPYHV--------------------------AISLGNGTYVHApEpgDVVKI-----------GYS--KYFKP----------------------- +>SRR5215467_10215739 +---KNLFKEWEGLVTHEYLDSGGAPTIGIGHLLTRSERTSGKITLGGQ--GVDYRNGFTEQQCWDLLDQDLLGSEAVVNG-----AVGAVEPESVR-RPRVIRLQCGRRGFPQ-QHIARALEPEAVR--PGTQP--AGTLEH---------------------- +>SRR5882724_7684042 +---VPLVAEFEGCAKLgkdglvyPYLDKlakPPVWTRAYGRTYGISKD----------------SPPISKDEAKLELQEGLARYAAECVK---LAPE-LANRPECLAAVASWAWNCGVGAFRV-SRLRRAINEGRWRDASEFIR--RPRTAGGVelrglarRRDTEALLFAKGVV------ +>SRR6185437_6891147 +------------------------RSICYGRTQGVY-----------------PGMTATQAQCTAWLTQEMAAMERYVVS------VVGKQPDSRIASLTDFCYNVRKRGC---RKVLDRIAAGNIRGGCDRLL--WYDKHA---------------------- +>ERR1719329_861734 +--GVDISQALSSSTASCFKSSGVTFIVPRGYKSTGSVDS-------------AACGSLNAAMFAGIATRDVYMFPcpTCSKS-----AA----TQLS--------------------------------------------------------------------- +>tr|A0A2A4R4S4|A0A2A4R4S4_9RHIZ Lysozyme OS=Rhizobiales bacterium OX=1909294 GN=COB78_05785 PE=4 SV=1 +-KGAAFIRKHEGFVSKAYRCPAGKITIGTGFTNGSKTALQWFKKSRGHK--IRMGDRIDIVENDKLLTKAMNEEYgIGVAN---A--LGTAAPKHAKDAGTSVSFNCGPGSL-KW-SWAKLYKAGNIKQSAARLR-TTAVTARGrrlrglvRRRDEEANLLQHGDYGN---- +>ETNvirenome_2_60_1030617.scaffolds.fasta_scaffold98983_1 # 1 # 588 # -1 # ID=98983_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.406 +--TLPRWQRFEGYSINFYLDGVGYITIGMGCVIPDAVAAIQlnllhkyndtkainlakiedFNVIKLSlpnrsltYYAKLAKLYLPQPDIIKLFSFRLNFIVDFLR---RQLPIYDSLPSDVKEVLKDMAFNLGITGLiIKFPKFVKAIEKRDFKAAAYECF------------------------------ +>SRR4051812_6945693 +---VGGLQYREGSAYKAYLDGVKVWTICMGDTNDVTP-----------------GMVLTKAQCRERDERNARFAWDFVDN------AvTAPMTWGQHRAYGDYAFNAGVGNFST-SSMLSYANQGDLQKSCNAFRDHMMAGR----------------------- +>SRR6266566_4028549 +----------------------------YGHLLHFGPVTQADR---------DKYPHGLTNAEALLLLQKDAEGAASAVR--QIQ--PPIRLQNRFDAIVSAVFNLGPGVLAP-NrSLGAALRRRfiFRPRDRKAAM--LCHHAGG--------------------- +>OM-RGC.v1.005059364 TARA_037_MES_0.1-0.22_scaffold328212_1_gene395968 "" "" +--LADRILSEESPTTWMYLdtADPPVVTCGIGHALFILEDCLalpwnqpkavvqrDYQTIAGKPpgyraewYQQFTTARLTEIFVREMLEQDIERQVKILR---CHFPRYREYPEPAQEALGDMAFQLGGNFPRTWPQFSQSVRSEDWSRAAACCHRK---DISDQRNQDTSALF----------- +>tr|A0A1I0WEL8|A0A1I0WEL8_9FIRM Lysozyme OS=Acetitomaculum ruminis DSM 5522 GN=SAMN05216249_10439 PE=3 SV=1 +--GINLIKSFEGCTLTAYKLAGeQYYTIGYGHSFDKSIT---------------AGTRWTEKQAENALIADLVKFENYVTNI--ALVKFPNLNSNQFSALVSYCYNRGAGGLRQ-LI------TNSYNinSLSQNIV--KFWGSA---------------------- +>SRR5690554_6326494 +--GKQRSEEHTSeLQSRPHL--------VCRLLLEKKKYH-------------LSEHH----------------------------------------------------------------------------------------------------------- +>SRR3546814_10896724 +-----------------------VFVLCFVHVLFF-------------------FFkqkt----aYEMRISDWSSDVCSSDLVRK---CVPSLE-HRPFQWAASTSLAYNVGVGTFCK-SSIARKFNSGDWKGGCEAF---KlYVKGGGVY------------------- +>ERR1700744_2903073 +--LISLTSSQEGVSLTPYNDKLakDLQTVCFGETNVA-------------------MHAYTLPQCKDMLGNSLAGYAEAVRE---ATPGFDSLTEGQKVAAVDMAYNAGVANYKA-STRRVMYADKQFPAACEQFL--RWRFINNgktdcaipaNR------------------- +>SRR6185437_2853382 +---YGLTRLSEAFKPRLYLDSVNYCSIAYGHLVWRHPCNGKE--------PESFRKGLTEPEGATLLVADMTLARKSLIQ---TLHDNSALNDNQFAALCDFVYNVGGDHFRT-STLRQVIDQARYAEVPEQMRRWIWAGGkqQgglVTRRNREIALFFKGQA------ +>SRR5688572_11035949 +---IVRVRDFASIAsiaprglIGGCP-PGl------------AKkrhGC-------------QPPGQARRW-lfdrpDFwglrlgdGRYFYNDGylLRFGPDgrIGG---YIP-LlggAlaignvwPAYYEPFAVPVyyESYYGLGP-------------------------------------------------------- +>SRR6476659_1593286 +------IAG--REHflwrndlAPGCP-PGl------------AKknpPC-------------VPPGQAKKIYgdswfryaPWynaagaNDWRFTDGyaYRVDTrTglVNS---FMP-LlggGlypgnpwPAAYTDYAIDPyyQ--------------------------------------------------------------- +>tr|C3X1U7|C3X1U7_OXAFO Lysozyme OS=Oxalobacter formigenes HOxBLS GN=OFAG_00336 PE=3 SV=1 +--IRENLMEREGVRLKAYQDSKGLWTIGYGHTKGVK-----------------PGMTITKDQAAKLLEQDMKDHVDVALK-----MYAGS-SEKTRMLAADLAYNAGLKAIQK-GTQFAKLAEQgE-ISRSDYTK--LYNYSG---------------------- +>SRR5437870_3549539 +--GLRDLANHEGRKAFPYLDTNGYITTGVGHLIPNGDAFKklpwvntatgqpataeEVAQGLAamqqmqndkkwgKPYaaSyfqPKTNLGLTDDYMNKLY----dSDFTAHVN---GVRDalgeaKYNALPESAKEALAD--------------------------------------------------------------- +>SRR5438105_1317506 +--LMDFIRAREGDALIVYPDSKGHPTVGAGFNLDDPKAAAilakigvNYDDVmltwqqtqddWetfgkdpadlKTdaahkaAWNdfvgyEEPEEAITSDQDDQLLTLSINNSIGNAKK---AVRGFGKLTYGAQAAVTDMVFNLGIDKFRGLRRLISDLNVRNYDAAGKEILKSQRGPELAGRAKAEVQLLKSGN------- +>SRR5579885_1784137 +---LHAVEGFEGWRNHPYTDNLGNATIGFGHLLHRGQRREEAR---------QSaaYAGAVRCIGLVH-----------IQY---R--IRWPSRLRSFERSQCAPLRASACRYAD-RKST---------RL----------------------------------- +>SRR5262245_38704460 +-DHRKKLKEFEGYAEHMYLDGGGNVTIGVGIMLGSAAAAKtagikftnrdthaaasdaEIEADYNSVKkassGMfppSKYKGYTKldgdtSSLEKEMNARIKTAEADAK---AFYKDLDSLPSSVQYALIDMAFNLGRSKLMAYKKLKTALDAKDWKTAAKESN---RKGIQSSRNKAISDWILAGE------- +>SRR3954467_10838660 +-----------------------------------------IEADYNVVSkagsGMfppSKYQKFTKldgdtASLKKELDARLKTAESDRK---AFYKDVGDLPSSVQYALLDMAFNLGRGGLMKYKKLKTALDHKAGKPAAKESN---RNGIQPSRNKAISDWILAGA------- +>SRR5262245_63571476 +---------------------------HTTQDLPSFPTRRssdlkftnrdthaaatadEIATDYDTVSkatsGMfppSKYQKFTKldgdaASLKKELDARIKTAEADRK---ARSEEHTSELQSLRHLVC---------------------------------------------------------------- +>APAga8741244001_1050109.scaffolds.fasta_scaffold17324_1 # 365 # 1519 # 1 # ID=17324_1;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.362 +-------MEFEgGFKPNVYLDTEGYPTIGFGHLLDSTKYNIpegASKDWVPEQ---YKDIVWTKEKGEKTFLDDYLRMEKDVAS--RYGkDEFSKLPTDVRDVLTDLAFNMGPSKLfGKFKGFLKDIKSGEYGEAAKELkyknpdkgnmEMSLWWDQVGGDTTEAKNLKRSG-------- +>FEC22Drversion2_1045045.scaffolds.fasta_scaffold06735_2 # 288 # 1352 # -1 # ID=6735_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.718 +-------MNFEGYEEKPYLDSSGFPTIGIGNKLESVSYK---KGQMPEK---YSNMEVSKEKAINNYINNYLEIEGIVKK--KYGk-GYDKLPEDARGVLNDLAFNLGGFKLfDEFPGFIEDFKKGQYGEAAKELkftnpdegdmNFSKWWKQIGALNTENENLKRSD-------- +>SRR5579862_4764652 +---ATVIAPFEGFARKPYVDTVgtgHPETWCYGETAADGPPP-------------PFSQVFTKEECQQLLADSLPKYDAQLAKC---LKPdvYAALPAHRHAALISTVYNIGGGAFCK-SAMARDLNAGNVQRACDDLL--GYDHADG--------------------- +>SRR5690606_37882973 +----LALVRHEGIVPGPYRDVNQVWTFGIGHTAAAGLPAPaTMPRGMP------ADLDAGIREVFRVFRSDLARYEAAVLR-----AVKVPLEPHEFDALVSFHYNTGGIAK---AALTRHLNAGNRVAAADAFV--WMPRGMQGHREPLGRVIECALV------ +>tr|A0A200J7Q8|A0A200J7Q8_9ENTE Uncharacterized protein OS=Enterococcus sp. 9D6_DIV0238 GN=A5889_001973 PE=4 SV=1 +---DPTLYYFDGK---------GI--TGIAHPDEKGILNT-----------IYK----------ANYGKDMPtvrravGWFSRLRSV-----STRPIVK----------------------------------------------------------------------- +>SRR5437660_9693328 +----------------------------------DANAASrlsfylrdtnimatvpekvnEYNIIKNSLpnktlkyYERLCTLYMQDIMINYLFEVRISQLLDLIY---HQLPEFLKMPKSVQNVVLDMAYNLGVSGFlLKFLKFCEYLRKFDYLIVX---------------------------------- +>ERR1035441_9468339 +------------------------------------------DALPI-----STRGRLSPAAVEGLLVSDVAGVLRQLL---ARFLWFNDFPDGPASGLLDMAFTMGVEGLvLDFPKLTIDVDRQDWPSCAEECE----RLGVaPERNAIVKSLF----------- +>tr|E4SH39|E4SH39_CALK2 Lysozyme OS=Caldicellulosiruptor kronotskyensis (strain DSM 18902 / VKM B-2412 / 2002) GN=Calkro_2229 PE=3 SV=1 +-AIVEFIKiyeYKGEYSKFAYSDKDGVWTIGYGHVLRGKELEE-YVDL-K---THKPKKAIIEEKAKEFLKNDIKAAADAINE---FMEeNKIQLSQNQFDALVSFTFNVGSAWTKNkssetRNDIIKAVKSGIDSNlerkLRDDFL--SWTKVQGevweglqRRRYDEWEMFVKGDYK----- +>SRR5688500_20331306 +----------------------NSNCICYfRVST-----------------------------YSTLSIHSFpTRRSSDLRKC---V--TVPLFQHEYEAYTSLAYNIGVTAFCR-KagpgkppNLIDLINAQRYEEACERIN--AFKYGP---------------------- +>SRR5687768_5573427 +---------------EPKPPVKgDVCTNGFGSTHG-----V------------KCGDKTDPVSAVVRAAKEIeTDYANPVRAC---L-GDALVTEGNVEGLTLLAYRVGAPTVCS-RaapgkppKLIDLFIQGRNEEGCRRIL--EFNCGP---------------------- +>SRR2546428_11448442 +----PTRRDPCPVREPR---FPRHSDHRVGFNLRRPDAKDKITA-LGVSYDAVchGAASLTDGQANTRLDADIAGAMAEART---CVRGCDRLRPH---------------------------------------------------------------------- +>SRR5690625_961561 +--------------------TIMLTLSLHDALPISNGDSVS-----------MKDRPISREEATMYLKLHAS--EDAY----KIFEEFPSLNQNQLDALVSFVYNVGFEAFRK-STMFRIMKEDINDeRIANEFS--RWNKGGGK-------------------- +>SRR5271170_5523871 +--SVAKLTYFESSIPWMYRDENGYLTVGVGEMLSSGSRAQtlafvdsagnpatsgAILSDYLrvlalppamdaNSYRAPSSLLLTSATIAALLTASVQDSDSALS---GQFANYAAFPDPSKLGLLDMIYNLGSHGLfSGFPTFMRYAENTDWANAALQCH------------------------------ +>ERR1017187_2923425 +----------------------------------------atvqEITAEWNrvkamrpgrlaSFYAIPAALQLRQEDIDAHLLGILDQTDENLQ---RDFPGFETFPDSVKMALADCDFNLGDAKLrGTYPHFDAAVDRQDWATAAAQ-------------------------------- +>SRR5210317_450493 +-EVFDHVRGLEEFVPFVYDDKagyppkpydpnsgspKGKLTIGYGTTNPEIIK--------------KYLNKISQEEADRLSAEDINGAAETVREWQSEDPENRKLTKGMYIALIDMAYNRGEGNFRQ-SGVLKQITNGNYKQAAQEILNGKGIWGHPDRLKKDYEMFCR--------- +>tr|A0A2E8CV96|A0A2E8CV96_9BACT Uncharacterized protein OS=Verrucomicrobiales bacterium OX=2026801 GN=CMO61_14240 PE=4 SV=1 +-KLAEEVKSKEGFVAKPYKDSKGYWTVGYGSLIGDGSDAAY-------KKSPYYTGKITMGKSGIADKADLsgksvteetarammmksiTDKasRAIKSD--MLGDKFFDLSPDLQDAAISSVYRGGLS---GSPKTMENIREGKFTEAAKEFLDNDEYKAakesgsgVAARMDLLANLLK---------- +>tr|A0A0H3H043|A0A0H3H043_KLEOK Lysozyme OS=Klebsiella oxytoca (strain ATCC 8724 / DSM 4798 / JCM 20051 / NBRC 3318 / NRRL B-199 / KCTC 1686) GN=KOX_05320 PE=3 +--GLEFIKRMQGLALAPYRDESGLRVIGYGHVLNDYESF---------------PHF-TREIAETLLIVDLLQCQRELKR-----RLRVTLNQAQYDALVSLAFSCGAASPAL-DTVLTHLNHQRFADALSAWENIRIG------------------------- +>SRR5690606_21833785 +--QCDALAEHELDVEYMYRDTstdtagvgcdGGCITIGIGNKFESVDDAValvgsffitdangerraateqEIRDEFNNlprqpancsnnntasCyaythWRTRTNLVLSDEARNGLCQGRLSNEFIpGLQR---IygADEWNAMPTTVQYALIDMAYNLGTEGLRrNWPRFNRAIRNQDWA------------------------------------- +>SRR3546814_13965704 +-----------------------------------------VAVR-GAramiGrvasaFRPLTGLRLDDAGIAAVFAADFDVIVGrtrRLFR---AvGGGLASYPDAVQLAVIALAFNLGPGSGKaS---CRErGCQTVLISGVAVS-------------------------------- +>tr|A0A1Y1WN49|A0A1Y1WN49_9FUNG Uncharacterized protein OS=Linderina pennispora GN=DL89DRAFT_25383 PE=4 SV=1 +---TkaHTSSKHTqlsGIEDESSADDASN--VELSDILLPSED-ESL-------TEVESsAKPSAKTsaestsakvttpgsqgSASAIGVSPAKAVAMCAA------------------AIVALAF------------------------------------------------------------ +>SRR4051794_24631995 +---NPSIEAWED-----------------------------------------------------MLRR-------YGPL-----YVDIGF-PQQTTTHAVVMTGISGNGEAD-GTSVTLIDPALG-------------------------------------- +>SRR3712207_7951115 +----LMIRRPPRstLFPYTTLFRSGHCTVGYGTLVHKGNCDGR-------SSEQPYAGGVTKEKATELLRQEAEQFQKVIND-----SVKVELNQNQNDADRKSTRlNSSHANISY-AVF----------------------------------------------- +>SRR4051795_1807142 +---VALVRRFAPFAAR----------------------------------------------------------------------------KHANAAGHC-SIGYGTQL-----------HTGACDGRPSEPD--EITQERA--------------------- +>SRR5215469_7394364 +---VSFIQRFEAFVAKPTADACGHCSVGYGTLLHEGACDGR-------TSEQPYLSGIADDEARRLLTRDLAQVVAAIVQ-----ASGRPLRQNQLDALASLVYSIGTPRFQR-SRLARLLADGNFGPIADEIR--KWTRARV--------------------- +>SRR5262245_23681562 +---------MNE-----------------------------------------------------AVRN-------FIRT-----LP-----IPG-S--DAIASQI------D-FEKMDFVRDSAPVV----------QGQRT--------------------- +>SRR5260221_489426 +---YTVPGLVAPLQQPSP-MT----------------CWATV------I----------------AMMASWRRQQSIAPR-----DAIAPAGQEF---LQKFDAGQPLDSA----------------------------------------------------- +>SRR5690606_23333129 +-----LITPWEGSVMDndgmhvAYVDKLGkgnPITYCNGLTGKDFRGNL-----------PKAGDKYSQADCDYMMALRVKGFEQDVVTSVSEYnspiakVNNKFVSEYQKAAIISFAYNVGINNFRN-STLLRLLNAGKHGAACDELT--KWVYVNG--------------------- +>SRR5438477_1491429 +----YTTLFRSGISLTPYKDPFGVTVIGAEHVLTQNELRSGKVVIGGRS--VDFRSGITRQQADELLQQDLDPVRKRSEE---H--TSELQsHVNLVCRLL---------------------------------------------------------------- +>5B_taG_2_1085324.scaffolds.fasta_scaffold07522_5 # 3244 # 3765 # -1 # ID=7522_5;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.331 +----ENAriYLLNGKKEK-----------TNDITALRQELSTAGFAIVGA----------------RFLEDNSRPNHPEVRY---Y--NPTDK--EQSEKIAEFMRiRLNNKGLQA-SEYKDA------TARPGYIE--IWLGRX---------------------- +>tr|A0A127T3X0|A0A127T3X0_9BACT Uncharacterized protein OS=uncultured bacterium OX=77133 PE=4 SV=1 +--VLADLKRHEGYREYAYPDPHSewgrnfpswkikwgrrpasviieelglskddvakgaPWTVGYGFTKGVKYT-------------SRTTHGIST----ERLKEEVVEHVKGLD---KLVPGWrTEHSIVVQSVLANLVYNLGELRLSKFAPTLALFKTKEYAAAAARLRNTAWYKQVGVRSVELVERLETGRIRP---- +>ERR1719326_184573 +------------------------------------------------------------------------------------------------------RTTSALAACPAFPPLSASSRAIAGARPPRTSRAPSGAARSSLAAPAM--------------- +>ERR1719329_2001773 +--DTDLIKDGEGYRQCTYKDTMGIKTVCYGFNLERGNARGEVSAA-GGDYNAlm-SGGCANQNICNKLLDVEVRSARSAAKR------QFGSvGCAAAQAVTVDLMYNLDQEPLANSRDSPPTSRPRTGTVPLLNLKTPPTADKPA--------------------- +>ERR1719329_758888 +--------------------------------------RGEVSAA-GGDYNAli-LVAAPTKTSATSSWMSKLDLQEALPKD------NSAPlVAVQPKLSPLTSCTTSDQEPLANSRDSPPPSRPRTGTVPLLNLKTPPTADKPA--------------------- +>Marorgknorr_s2lv_1036017.scaffolds.fasta_scaffold454071_1 # 2 # 307 # 1 # ID=454071_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.575 +--AAKLAMRSEGCRLSAYWdEFGKVWTVGWGATGADI----------------KEGTVWSQQHADFRLLQRLAMDFARVkaT-----WPGADRLHPKAQASLIDLAYNRGVSLTKK-AS--DALDRRRE-------------------------------------- +>tr|X6AL62|X6AL62_9RHIZ Lysozyme OS=Mesorhizobium sp. LNJC386A00 OX=1287270 GN=X748_14335 PE=3 SV=1 +---EQMIMPWEGLRTKAYLDtLPkkHVWTVCYGETLNIK-----------------KGMKFTREQCKAMLIKRVIHDYyLPLVD---GVKDYAIAPMSLQASMISGAYNFGVAGQKG-SRTAAFVTKHQYSQACDAQT--AWNKAGGK-------------------- +>SRR5512133_18078 +-----------------------NCTIGYGHLLHLGPCTGG-----------ESTSSISQAAALDLLEDELNHCGDYVPR-----YVTVPVSQNQFNILAGLVCGVGYDRFSK-YQFVKLLNEGHYYAASQKLK------------------------------ +>tr|M1WJU6|M1WJU6_PSEP2 Uncharacterized protein OS=Pseudodesulfovibrio piezophilus (strain DSM 21447 / JCM 15486 / C1TLV30) GN=BN4_11274 PE=4 SV=1 +------AKKYVG---------------------SDAWAKFKS--------KDDFGWGKTKCNK---FAYDVLKE-----N-----GTPVPLIHEGNNPLSNDMYPP---------------VAADWANKDMELD--GWVVVD---------------------- +>EndMetStandDraft_5_1072996.scaffolds.fasta_scaffold1348660_1 # 2 # 358 # 1 # ID=1348660_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.689 +-EALELLEGFEGFTPTLKFDVNGW-AIGFGTHVDpaEYRG-----------------ESITPEEARELLRSRIAKEVEPaLRK--AL--KDVKLNQNQWDALVLWTYNVGVKAMEG-STLVKLLKEgkYDE--AADEFL--RWSNVRGrfspglcARRKRERLVFLRGVFA----- +>APCry1669189034_1035192.scaffolds.fasta_scaffold06517_12 # 4398 # 4502 # 1 # ID=6517_12;partial=01;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.448 +-KVIEKIEEYEKRINYLYLDSKGYVTIGVGHLVKDRNSMSSIVlcktknnvpyqlatikekqDEYDnvakqpKNYKAawykqHTKLVMKKEDIDVLRNKDIDSFYKELTNIykksKGYHDNFDNLPKNVQLALFDMIYNLGANRIvNKFFNFDKAIKAGDWAKAANESN---RSKIGTERNKYVKQLLLS--------- +>ERR1700744_5941244 +---TSEICGEEGYRGLLYDDKTgkalkkgdtimGNPTIGNGWNVISK--------------------SSSKERANIITSWFVQDAYDNLQ---KALPWVLDHPIEIQEALIDLTYNLGVEKLQTFNTFLGLVQAKKYKEAGEDLDKTLWAKQVGHeRSSNVEQLIK---------- +>BogFormECP12_OM1_1039635.scaffolds.fasta_scaffold246295_1 # 1 # 390 # -1 # ID=246295_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.582 +--IEDYIKKHEDLRLDAYPDGTGakaGFSVGYGHYVRGLA---------------QKGLNITKDKANELFSNDVQPLYNKAK---THIKGWASIGKERQSAIIDWMYNVGPNVNESWPDTWSAINNHDWVKVAKNWEISKWATQVPSRVKANVDKMKTNKP------ +>tr|A0A1X7J109|A0A1X7J109_9BURK Uncharacterized protein OS=Paraburkholderia susongensis GN=SAMN06265784_10243 PE=4 SV=1 +---------VDGMILKVYVDNKGYPTVAFGHKVLPSDHL-------------HVGDTISVERARELKKNFQNVSLRLI---------VVCVFHYINTNMTL--W------------------------------------------------------------ +>SRR5579864_3889595 +---------VDGFVLSVYDDGYHIPTVGLGHRVNPQDHL-------------RIGDEISIERAKEFMRMSLHDVESAVNR---YL--RVPLFQYEYDALISVLWNTGPFHAKhdkwpetRTEHLTKSLNDGEYSTMRDVIL---GFFAerDRSRRASEVKLFTTGVYDA---- +>ERR1700704_4251283 +-RFVAWLKTVEGFLAEKTYLGDGGATIGYGHYEPAGPRAD------------ALPDTVTEAQGEDLLLKDITErAVAPVRR---Y--VNVPLTQNQFDAMLSLAYNLSSA---SFAQIADSVNRGVgLDPIVFNFVRagSQFEGGLTHRREREIAMFNAGLY------ +>SRR5699024_11282409 +---------------------------------------L------------GPHLLSNLFPYTTL---FRSVAEKCVME---RL-DGAYMPQSVFDASVSLIHNTGCAGATYNrkrkakTNLRLQAEAGNWKQVCYRIGDRSEE------------------------- +>tr|A0A1W5P530|A0A1W5P530_9VIRU Lysozyme OS=Pectobacterium phage PP2 GN=PP2_045 PE=3 SV=1 +---LENIAQWEKYATRTYLDGVGVPTIGVGSTRWFDGKAP------------RSSQTASVDEAARLFIRDVKEAEKCVKE---RM-SGNLMPQKVFDSAVSLVYNVGCSGVTWNpkynrqTNIRLQANAYNWTKVCYHLGDFIYS------------------------- +>SRR5262245_63231902 +--PLAITPLSLHDALPIYLDTRGFVTVGVGELLASAAKAEtlafidpdgkpsaqdAILNEFNrvsglvpakvaAFYRSPTDRKSTRLNSSHL------------G---ISYAVFCF-------------------------------------------------------------------------- +>SRR5882757_4316513 +--ATEGVHKFEGNVPWMYLDINGFVTVGGGYLIRALSDALklgfldasgqpsrpdAIAKDFLrvrnmakgrlaAYYKVDSSPCLSQATIDQLIGIKLDEFERGIQ---TLFPKLPSYPVGVQAGLLLMTYAIGVAGLQKYTQLRESLIAGNWAEAGTQSG------------------------------ +>tr|J7VFN9|J7VFN9_STEMA Lysozyme OS=Stenotrophomonas maltophilia Ab55555 GN=A1OC_01904 PE=3 SV=1 +---GTNDSAHEGRRYTAYYDSAGILTVCAGITG---PAV-------------VKGKRYTDEECTKLETAYVRTMLGHMGQC-----VRGEFEFHEIKAWGHFAYNIGTPAFCA-STAAKRLNAGERQAACTEMW--KWRLVRI--------------------- +>SRR5688572_440695 +---RQLIQKWEGYHtalpdgsCEAYLDRVavpPVWTIGFGCTEGVH-----------------KGMVMSRKEAEAFLDKEIVQTEEEVAK-----VLRVELNQNQYDALISIAYNLKGGIKKA-PTLMKHINNQDWAAASKAFM--LYTGAGK--------------------- +>SRR6056297_1097136 +--CLDFVKKWEGFHQKAYYCQADVPTIGWGTTRYCEGTVITrNGREIPISPGekVQIGDRITEVMAELNLKHGLNDCAWRIKGL-----LKVKPTQYQFDAMVSLTYNIGIGGFSE-STCLRKFNEGDIEEAADAIL--LWNKTTidgkkvvsqglVNRRREEREMFLSGNT------ +>Dee2metaT_23_FD_contig_51_918237_length_385_multi_2_in_0_out_0_1 # 1 # 384 # -1 # ID=1690187_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.534 +-EVREYLKTAEGFSPTPYEDN-GRLSIGYGTLAEGP------------------DDIVTEEEAAVRMRAYLQDVSyPELR---DIYSNFDELPPHVQFGLLNQNFNMGGTNQRKFVNQIKAVEAGDFEKAAEETLASKWFEeQNPGRAQWTADMIRKA-------- +>JRYH01.1.fsa_nt_gb|JRYH01104160.1|_1 # 43 # 267 # -1 # ID=104160_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.716 +---SEFLTAGEGFRAQVYDDKdgtiissyeesVGTPTIGYGLALQTAELREKFRKYL------GGGKTLSKSKGLSLFKDTVQNYIDGVNE-----KLMAPVTQSMMNSLTSYAYNVGVNSSF-LKRAINSLNEGDYIGASDVIRSGPYTGAgigymagLERRRNKEADLFLEDGLP----- +>NGEPerStandDraft_5_1074534.scaffolds.fasta_scaffold212704_1 # 1 # 201 # -1 # ID=212704_1;partial=10;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.647 +---RKELIASEGYRAQVYDDAngrvvssydqvSGYPTIGIGHLIT-DKERNKFSKYL------GGGKEMSESQVSRLLSKDIKRFTDHLNK-----ELKIDVTQPMFDALTAYAFNIGVNSNF-LKRTIEKINNKDFKGAASIILSGPTTSKgvvldgLVRRRQKESEQFLSGGKP----- +>tr|A0A1C0TKY5|A0A1C0TKY5_9GAMM Uncharacterized protein OS=Pseudoalteromonas luteoviolacea GN=A7985_21030 PE=4 SV=1 +--AAKHICLYEGVIAHLYLDTRGNVTLGAGFLITDPQTLAkmtlrekstkrtasrelkakEFAtiSKLPEGRRaswyePHCQLYLPHEECIKLLEKKLSEFEHELNTLFcaqNGYIPFKKMPSKVQLALLDMAYNLGTTNLSqAWPKLLSAIRAENWSAAAQECHRKH---VSEARNKATAKLFHQS-------- +>tr|A0A167CHE4|A0A167CHE4_9GAMM Uncharacterized protein OS=Pseudoalteromonas luteoviolacea H33 GN=N476_22925 PE=4 SV=1 +--AAKHICLYEGVIAHLYVDTRGNVTLGAGFHITSAKALSklplrekstqkaasraakiqEFEriAKLPSGRLaswyeSHCKLYLPQQACVQLLEKKIAEFETELSTLFsakNGYVPFRRMPSNVQLALLDMAYNLGTPNLSrAWPNLLHAIRHENWQLAATECRRKH---VSAARNQATARLFAQS-------- +>SRR5574343_1559691 +--TKELLIPFEGYHkklsngdCIAYPDPAtGgePWTIGYGSTRHHNGNPV------------QEGEVWTREYAVECKQKVLQVCLVQLLSL------SPtlVtESPRRVAAVLSWVYNCGVGNYR-ISTFRKKVNAKEWEEAGEQCK--KWDKANgkvmKgltCRRLAESFMLLNP-------- +>RifCSP16_2_1023846.scaffolds.fasta_scaffold969775_1 # 1 # 288 # 1 # ID=969775_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.451 +--AICIIKKYEGFNERACPDPKtGaePYTIGYGSEFYPDGSPV------------LKGQLVSKKKALEYLLSEIDVIDGEIERL------NLDLDPYMHCALLSFVHSVGWEAFL-YSSIVDAIEREDFAEVIHEFN--RWIFDDehkviGgllERRREEANLFLTN-------- +>ERR1719453_1155256 +---ITRFALGVTPAQCALSKCGTEASACladagCKTKVTCALACTtkTCIDGCAGASPD--------AATSALVSCAVAQGCVTSS---VPVAAD-------------------------------------ATACTNDADSAIWAS------------------------ +>ERR1700723_2906109 +--MKDKLKQSEGESGGkpmlvVKDIGDGHHTVGWGHVDDSL----------------KLGDKIDQKQAQAYFDKDVSTMESKVADAL-TSNGGHQFSQGEFNALVDLSFNGGPAVLSttASPNLMKDMNSGDYQGMSEQLR------------------------------ +>tr|A0A2T3MJE1|A0A2T3MJE1_9GAMM Lysozyme OS=Photobacterium iliopiscarium OX=56192 GN=C9I88_13540 PE=3 SV=1 +--GFALMGNAEGCRLDPYKCPAGLVTNGIGNTHGVP------------------ERPIDITQVATDWAVNVEQAEQCLIN---TAPKDNPMSQGQHDAFTSFVFNTGCTRFLKNkdgtsTQIARLIKQGEYVQACGQLK--RWVYGGG--------------------- +>tr|A0A1E5H472|A0A1E5H472_9ENTE Uncharacterized protein OS=Enterococcus termitis GN=BCR25_14675 PE=4 SV=1 +---IGRAYDF--------DGAFGIQCFDLINQ-YAHD---------------LFGISFRGAVAK----DLMQTGNVG--G---F---RVIPNTANFYPLPgdIFVYTNGSAGHTG-IVLGSVTttgfigvdqNGRSNNEPSTQ-R--AFNYA----------------------- +>SRR5438105_4855480 +---QRWLALLeGGHRLKAYQDTNGTWTISVGVTFYEPGIKV------------KKGDTlPDVATSEALYQKRLRTYEAEVDT-----MTRDDINAFEFDAFTSACYNIGPTAMRT-STFAKRFNQRTSIESVCEAL--SWFR------------------------ +>GraSoiStandDraft_55_1057291.scaffolds.fasta_scaffold74304_2 # 897 # 1424 # -1 # ID=74304_2;partial=00;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.509 +-----------GDRDYIYLDTStvvdkvtgktiknPQPTTGIGFNLNAAHNKKflKEnnIDirsvrVVDSnGITVRGGRNLTQNEKLLMYNHSLRQAFKDAR---LYDPRFDRRPESVKMGLVDMAFNMGLTKLKKFVKMKKGLDANDYTTVAKEAKKAIGSNRLKavvhaqlL-CSRKLSILKVX-------- +>SRR5688572_4711008 +-ELQNYVIQHEGLTQHVAKESKRPPTVGAGFNLKRRGARAlveslglDYRQLLREAKTPDAESSVSADQAKQLLREDLAIAIADVQR---VVTNFPELNHRQQVGLVDLVYNLGFGRFKSHKGAVKALNAGDAALAASVITASKYVQRefSAERIADDLALLQS--------- +>tr|A0A0H2YYA2|A0A0H2YYA2_ECOK1 Lysozyme OS=Escherichia coli O1:K1 / APEC GN=APECO1_392 PE=3 SV=1 +--MKKRVTTLRHTAMVP-----ATGPSVGGATMVDGKPV-------------FPGMKLSKEKCDQVNAIERDKALAWVEKN-----IKVPLSEPQKAGIASFCPyNIGPGKCFP-STFYKRINAGDRRGACEAIR--WWIKDGG--------------------- +>SRR6516225_863928 +--T----HPFEGNVPHTYPDV-vYGWrvtTACRGHTGagPDGKPM-------------RPGETFTPAECDELEHADLTKTFDALRPCFG-DAALLKLNSNQLGAILSLGYnAGAGTVCR--SSIPGKVKAGQLGSACATIG--AFVYAGG--------------------- +>SRR5690349_13361209 +---------------------------------------------------------------DRLEARYLQRMHATLERCVS-KPVLDDTSVGEFLAYGDGDYnVGDTAMCK--SPMVAKLKAGDRQGACAAIL--DYRVHTG--------------------- +>tr|A0A0B5DZ45|A0A0B5DZ45_9RHOB Lysozyme OS=Celeribacter indicus OX=1208324 GN=P73_1291 PE=3 SV=1 +---VPFGMKWEGTVLTPYWDRFAKiWTVCTGQTGVE-------------------MRTYTLPECMEMHETRIAEGYARMIR---AYPKLQTAPVEVQAMAVDLEYNAGLGTIRAARNTNAALRDGRWRDFCNILP--QWNKSGGRFV------------------ +>SRR4051812_17101312 +---CAGLRRRVDAVMNYCNDIANDCTCGGGTLAHLGFCTPD-----------EFQRPISVAQVDALRATKFRLTEQIVRRN---V-RGRESApitQAKAGRATRF-------------------------------------------------------------- +>SRR4051812_1532565 +---------HDGLFHVYWDPNGNVYTIGYGHTGREVH---------------TGMHPWTKGQALRQLRRDAKASLLAAVA---NV-RRLKPNRYELAALTSAGFNLGTGIFDRSHTLGAAMHHGDRHAIADAFL--LYDR------------------------ +>SRR5271165_6210941 +--ACKFIAQFEGCKLQAYTPlegLANGYAIGYGFTDRSI----------------QSNTVWTQAQCDQALQDKVQKIINSLES-----VIFTNLNQNQMTAVIDLCYNLGVGAFDRtSPSLPELLNQHAFGEFCVHLM------------------------------ +>SRR5208337_1498800 +--ASQLIHYYESCSLTSYPDPKGIPTIGWGNTFWQDGRPVT-----------LQDLPITQDEADALFLYWLHSFSADL------APLVPGAAPNELASFTSLAYNIGTANFSC-STALRQFRVGNKCAAGDGIE--MWNRCG---------------------- +>tr|R5KQ58|R5KQ58_9CLOT Lipoprotein OS=Clostridium sp. CAG:967 OX=1262849 GN=BN819_01351 PE=4 SV=1 +---LKSLEKMEEKHNKIYTDKNGVKTIGIGHALDAGETGK------------YSGKTLTDTQIYTMLAQDILDREQNIRAI-IGDDAYKKMPQAMKDSVMDFVFNRGETVFENHPGFVSALKSGDYSSAIAKMN------------------------------ +>ERR1719305_73277 +----------------------------------------------------snGNAARLSLQNTGGTNPYdapegsiIVVRAGTPGT---R-NPTAGDIAVKgsGDHFYNGGEMGYG--GSGNFPSGNNF----------------VLGIYVSTKC---------S-------- +>ERR1719247_1313179 +----GVAGKIVRQPQCCFCGNGGDGS-GAC----------------TtVDYCAskgGGYRCTQPGSGG--CEWVKKSALQPNG-----------GCVX---------------------------------------------------------------------- +>KNS2DCM_AmetaT_FD_k123_25042_1 # 2 # 343 # 1 # ID=21598_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.567 +ESLVKNMKYEEGSitdkgepVLTAYDIGDGAKTIGYGHAIFSSESRGstggDYPFL-P-KYNKiIVGKtKITKKQAEKLLRDDLDKSKKELNK---ILDKWEedgitpDIDQNMYNAMVSMIFNMGVGNFRT-SEFIQYVKRNQMDKAEEQIKKEssRSFRkfpGLKDRREREADMFGGE-------- +>SRR5579863_9352389 +--LEAELSVEEGNRSLIYDDATGkpfkkgdtlkgNLSAGIGLNLMI---------------------PFAPEELDFMEQFRLAKGQAAL----APYLWYSTQDEVRQVALADIAYNIGIGGLLHWVHFLSYMAKQDYAAAVAEIRsDALWISQVGqSRAGRLETMIETGKWPS---- +>SRR6185436_2654935 +----------EGRVLHRYLDSAGVPTGCVGHVIRPGDAAI------------LADGV-SEADCDFFLHHDLDSVEREIQRV-----VLPEIYvqltAEQLGALGA--FEFNTGGLAH-STLLRELNASHWESAADHFA--EWNKRRDPR------------------- +>SRR5262249_7732576 +-----------GFAATPYTDPAGNAVIGYGHLLHNPCSA-------------HLPEPVIFKGlvAKRFFTFGRANWHRYLNR-----R-------NRLRREIRTRCAESPSQGKG-RCSRP-CE------------------------------------------ +>ERR1700694_246644 +---GVLVSHWEGMNlvaKHLPFDPPGVITVCGGITTYDWPWL-------------KVGMKFSEADCQKALSEAAERYADEVVK---CVPSLPSMPPHRQAAIASFAVNLGPAKVCG-TSIGRDLNAGHVKDAGNAMV--KYVNANGK-------------------- +>SRR5437868_13130397 +-----FFQAEDGirdRNvtgvqtCALPI-SPNVITVCGGITNHDLPWL-------------KVGMKFTADQCNKFIMDALPRYAAPITA---CVPSFPQMPGHRQRSEER-----RVGKECR-SRWSTESX------------------------------------------ +>SRR5574337_2001255 +--------------------------ILSG----DWS----------------SDVCSSDLECDKYLTADMTRAVELVDAC------VPNAPDSVLIAFSDAAYNIGRKIACNtqASTAARLLKAGRWADACRQLP--RWNRAT---------------------- +>SRR5574337_1566153 +-----------------------------TRLSGDWS----------------SDVCSS--DLDKYLTADMTRAVELVDAC------VPNAPDSVLIAFSDAAYNIGRKIACNtqASTAARWLKAGRWAYSCRQLP--RWNRAT---------------------- +>ERR1035437_10221116 +----------------------------IGTIAYPNGVRV------------SITDpAITEEQALEYLNFELKNKCEQITK--WAVLNNVVLSDNKMSSLLTFSYNEGCDPIVsKGKALNMAILSGNEKAIRSAFN--IYNKG----------------------- +>WorMetDrversion2_7_1045234.scaffolds.fasta_scaffold816839_1 # 3 # 245 # -1 # ID=816839_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.346 +--GVNFLVGQESSRSTLYD-DVGHPAIGYGHDLLPVEIAGNYVNILGTK--RSLSaGPLSADEINGLFRTDAAPREQDIRR-----WCKVPISQTQFDMLFSLVYNVGYPKS-----VFAKLNTGDYNVG----------------------------------- +>tr|A0A2V1M117|A0A2V1M117_9VIBR Uncharacterized protein OS=Vibrio sp. T21 OX=2007197 GN=CCD93_01240 PE=4 SV=1 +-KLKIKLEQYEGRVEHMYLDTRGYITVGVGNMLSDVTAATKlpfvhsstnepatheqikeeFLRVKARPFGEsepasrfkpFTVLKLTESVMNEQVAHHIQSFEKELKVI-YGDEAFTSYPDNVKLALFDMIFNLGMPKLKdTYPKFNGHIRNGNYQQAALESKR---NGVQAERNAYVANLLRS--------- +>SRR5580692_8313843 +-----------GLELKAYQNKlkdgkLDKLTVGIGHNVEASPVPGV----------TQVGDSITMNQAYSLFATDQANAESQVSSNFdswsstsSNPVMWNDLTPGQQQAMTDLAFNMGPS-FNGWPNFKADMKSGNFQAASDELAKgtkpgttSQYVKDVgPTRSGNVRQEIL---------- +>APIni6443716594_1056825.scaffolds.fasta_scaffold11169514_1 # 2 # 214 # -1 # ID=11169514_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.709 +--PNDI-------------APagsPPGAPVTWESMI-------------------------E-------LARVAGARYPELVA--A---QGALESDWYRKPS---GTHNYFGLKGsgTTKST-QEFIDGKWITIKDGFL--NFGS------------------------ +>APAra7269097080_1048540.scaffolds.fasta_scaffold00431_6 # 5325 # 6260 # -1 # ID=431_6;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.644 +-------------------------------------------------------------------------------L--VRMPSWQGQWSAPRPMTipmSAERFADRFRFYKgqPQQQRGV-------------LE--LHAV------------------------ +>tr|A0A2V5DK07|A0A2V5DK07_9SPHN GH24 family phage-related lysozyme (Muramidase) OS=Novosphingobium sp. B3058 49 OX=2183925 GN=DFS19_103137 PE=4 SV=1 +-----TVRHVSGkQYLQAYLDIVGVATICDGLTSIEGRRV-------------TAKDKLTENRCAVLLEKELVTHAEGVMQC---TPGLaltIPRRDYVRFAAVSLAYNVGVANWCG-STARRLINAGDVRGSCNALL--AWNKGR---------------------- +>tr|A0A1G2QFY9|A0A1G2QFY9_9BACT Lysozyme OS=Candidatus Vogelbacteria bacterium RIFOXYD1_FULL_44_32 OX=1802438 GN=A2571_00670 PE=3 SV=1 +---KQYIIANEGWHTTSYPDA-GGRSVGVGHFIVPGDGV-------------DPNATLTNDEVGDLFERDYPGYEHQARNAAaANGVHFESLTEGRQIVLIDMAYNMGAQgssGLGGFHEMWGAIKDENWERAAYEVTDTapgGYADDTGGRAVRNEQMMREGTT------ +>tr|A0A1X3AFH8|A0A1X3AFH8_ENTFL Lysozyme OS=Enterococcus faecalis OX=1351 GN=ELS84_2621 PE=3 SV=1 +-KAIDLCKKYSNFSLKAVAGRNGILSIGYGHFTNEKHPI-------------KPGMVITESQATQILRDDLNEHAALISK---L--LAIKATQNQFDALVSFSHSKGLGFLPS-SDIMHFTNNKEFNSAAREMK--LYVYD----------------------- +>tr|G8NQN9|G8NQN9_GRAMM Uncharacterized protein OS=Granulicella mallensis (strain ATCC BAA-1857 / DSM 23137 / MP5ACTX8) GN=AciX8_2962 PE=4 SV=1 +--ECIQIPVWEGNENFLYLDSddPPNATTGQGHLVANLHNSQqlpwympdgtpatsdQIADDWQrvkampgdrdaQFYKSLTGLHLLQFDIDALTCTSVQALDEPLH---VLYPAFDSFPQCAQVGIADMAYGLGIGepatakrpatGLHSYRAFNAAANStpPNFLRMAQECHR----------------------------- +>ERR1700684_446482 +--------------PWMYLDTRKNVTCAEGLMLPDVKSALalpwrspdfrrpatqgEVASEFArvsklsagfkpsGYRIDAISPLLLGSDVDDMMAAETGGFIAELA---KSFAVFWSWPLPAQ-------------------------------------------------------------------- +>SRR5689334_21578792 +---AGLLIAKEGLEVETYVDVAGVNTICIGHTGPEVQ----------------LGRRATRAICEDLLTADLDPVWAAED---QYSRDVGALPVVTKARAGPQGRLV----------VLRGLINRRVAERQlCLGE--AWSX------------------------ +>MEHZ01.3.fsa_nt_MEHZ010996174.1_1 # 3 # 155 # 1 # ID=317056_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.333 +---LGLIAILEGYgpqvrpgVYASYVDVAGVVTICNGHTGTDVK----------------LGQTVARAICDDIANKDLSAAFTIED---RYIRRPEELEPWVRAAAALFIINVGEGGFSQ-SSFLRLLNQRKITDACNAML--LWNKA----------------------- +>ERR1719262_568040 +---------------------GGLDTIGYGIKCQSQKEQDH-----------YRKHGLTDTEARALYDKKWDEARSKAE---qNLGrENFAKATRTQQDLATELAYNKGN--LKGYNNFQKEFKQGNTQGLMEQNN--RGFFKdgeykkLITRQKEVNEAARKQ-------- +>tr|U9VXB2|U9VXB2_9CYAN Phage-related lysozyme OS=Leptolyngbya sp. Heron Island J GN=N836_03845 PE=4 SV=1 +-AAIDLIKKFEGLRLDAYPDPGsadGlPITIGYGSTRKQNGSPF------------KLGESLTQAEAEALLSWDLENRFLPPQ---TKIPTWDTMDDDQRGAILSFAYNLGAAFYgnSGFQTITRVLKDAAWDEIEAALV--LYRNPNTnveagllKRRLAEAEVFLTSE------- +>SRR5699024_544050 +-QGFRFIKGFEGFAPSKYQDSGGYWTIAYGVTAHGEPSIYNE---------LVSESPITEERAAKVSYDlKNSNYGAKILN---AVKNLGCTTQYQFDALCSLAYNCGTDVIIK-SNSLtNAIAKDptdEETIRPIWES--FYITSNGvqlpgliARRKQECNMFFNKEF------ +>SRR6185312_11957514 +-QLIKLIENSEGWSATPYADKlaHDLPTIGFGFTHYPDGTPVTL-----------NDAPIGRERGAEILEQLLAQFIHGVEA---LV---PDCSQNQKDSLTDFAYNFGLGALKS-STLLKTILINpndVSGI-AFQFT--RWVHgEGGvtlpglvTRRNNEVSLYFSPIN------ +>SRR5690606_34912435 +--LRAFIPLVEGFSPVPIWDY-SQWSWGYGTAAGyDPNV--------------KPQGTITREKAWIDAKRVIDDNKDYLQR---Y--LNKSLSGNQWAALLSFTYNCGPAAG---KKIVDTINAGSTADVVTRMS--KYVYAN---------------------- +>SRR5260363_117120 +---------------------PGVPTICYGHTRGVA-----------------LGQTVDHPACERLLRQDLTEALAAVDRR-----VSVPYPKPARPRPPLSCITFEKRKSAA-LRSYAASMRAKARAHAANSP--AGFTA----------------------- +>SRR5260363_55371 +-----VITYFEGDRHRAYLGGYTVGARWRGPVGGRT-----------------RGSVGstrp----------------------------GSPPPPRPlgGPAGRAAVVR------------------------------------------------------------ +>SRR5260364_321348 +--------------------FSRLPDLRLGQHRHLD-----------------LHAHAarsfhlekagasvvaktppangS-VRCTSK--QI----------G-----ARPRCGTATVAALASFVYHFGETKFAR-STLLRRINVGEGARACSELS--RWIYG----------------------- +>SRR6185369_15207429 +-------------RYYVYPDYKKIPTIGMGHAVRKGEDW---------------SRGLTLEQVVELFHKDLASTYNAIGT-----GVRFNYNQSQFDSMVISGHNLGVGFFDEtRSHPIYALNRNDVDGYASVW------------------------------- +>ERR1700761_5333075 +--------------YGEYTDEAGVQTIGFGHRIKAGETF---------------SPNFSLQDASALLAKDLGESLKVVLGALG--PKVSGATSGELKGLTDFEFNVGEGAFRN-SSVLRDYLAGNKEAAMADLA--KWDKIHKggvlvtsqalqARREAEAQLFKTG-------- +>SaaInlV_150m_DNA_2_1039686.scaffolds.fasta_scaffold06100_7 # 2253 # 3122 # 1 # ID=6100_7;partial=01;start_type=ATG;rbs_motif=TAAA;rbs_spacer=8bp;gc_cont=0.377 +-RLIPWTGENEGKVLRAYRCPAGKITIGFGFTWGSSRFRE----WWMTERGQklKLGDRISEADAIYLLKEAIDAEYaPPVLK------GAPDATPHAKAAAIDMLFNCGIGAAR-W-SWFKLLAAGKVSASAERFKVTGTtaKgrrlPGLVRRRAEGAAIMEFNRWPAH--- +>tr|A0A0E3F412|A0A0E3F412_9CAUD Antenna protein OS=Synechococcus phage ACG-2014f GN=Syn7803C10_205 PE=4 SV=1 +----------------------------------------------------------------------------------------------------MIALDIFCDW-----------FTGRWNNRAQAHsnprgqaY--VMAR------------------------ +>SRR3954469_22660444 +---------------------------------VERGGCD------------NLHAPITREEGTTLFKSDVAEFESCVCA----MDNARDMNGNQYGALVSFACNSGCGGVRR--WWHGAMAKKNFQGICEALPTT---------------------------- +>SRR5947207_791778 +----XMV-IFAGRK-------GNPLIMGTTIDFLK---------------------PPLR------ARIRAATATTSDS---CNDKW--G----------LTPLYFVLPPlhhGRQFRRAFSDlARPAA--------------------------------------- +>SRR5437773_10765724 +-----------------------------------------------------------RPT----VSNLI-VSRCADR---RDLH------SFPTRRSSDLVFNLGLPGFRRFKKTIASISAGD--------------------------------------- +>ERR1044072_19978 +---AQMIQEFEGFKENAYHRKGDVPTVGFGSTYNYTAKRK-----------VKLGDVVTLQEATNWMQFDFQETIRLANV---Y--IHKPLNKFKATAICDYIYNRGIGNFLK-TQLDEMINENPSNpKILDEIRGTGlkdklgnLLRGLVRRRKSQAHLYSTGVI------ +>SoimicmetaTmtHAB_FD_contig_41_623359_length_370_multi_1_in_0_out_0_1 # 2 # 370 # 1 # ID=1147609_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.648 +---AKMIREFEGFRNKAYRDSGLVWTIGLGSTYNYTAKRK-----------VQAGDYITLDEATTWMRIEFQEVVRLANV---Y--IKQPLNPAQSTAVCDYIYNRGIGNFLK-TQLDELINANPNDlRIQKEMSGTGltdrlghVLNGLKRRRSCQANLYFTGQL------ +>UPI0005BD0512 status=active +-PIFDYIRQWEGdikgnHIPYVYDDQVkwvynsdlkmklpprytggtkyGTLTVGWGTTDPNVISE-------------YMDKDMDVNTAKSLSKPDIQTAANCIKRWQTRAKegevNERKLTLGMYTAMSDIVYNMGCSGFNN-TKTIGHIEKGNYKVAKNYIKN-KLEWGHDKRREKASEVF----------- +>ERR1700731_2466687 +--------------------------------------------qkraEWTmmkaKEPGripnyyqQFATLTLDKGVARGYLVDELEEAIGYLA---TALPNLDSYPAGPQDALLDMMFNMGPGEfgPAHWPSLFAAVTKKDWATAAAQSTR----------------------------- +>ERR1035441_6681109 +---VPKRErSiTSTTMWRIT----ARWEPGTWSTW-----VRS-------PAPRPKhHTCMAVRLPKSWLSSSMTWGTPSTRS-----TSTX--------------------------------------------------------------------------- +>tr|B4EMI1|B4EMI1_BURCJ Putative phage lysozyme OS=Burkholderia cenocepacia (strain ATCC BAA-245 / DSM 16553 / LMG 16656 / NCTC 13227 / J2315 / CF5610) +--AAPFVTGWEGWRNTVYKDQGGVSTVCAGHTDRIGTEN-------------ITKQTYTNEECGRILIKDLNKDEAQLRA---SIGYDVPLTQGQEVILIDFVHNLGIGALNA-GSLRPLLLRGDVNKACAKILEYKYARVGPG-------------------- +>SRR4051812_9329020 +-SLCAELLRWEPCRPHMYVDRRGFLATGAGHALRSIEAAIalpwchrasglpatiaEIARAFvrvralgrrqkTLSYRLASDLVLPPGIAGDLAIGRVErDLLPALR---RLCSDFDRYPLPARRALVDMAFDVGVAGLARFRNLIAACAQRDFATAANHCRRR---------------------------- +>SoimicmetaTmtLPC_FD_contig_31_12938192_length_208_multi_1_in_0_out_0_1 # 1 # 207 # 1 # ID=693771_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.681 +--------------TSKYL--ALHRTLGIGAVFEVI-------------------NLMNNKNAYVrvvgrlpnigLNQNVMLRLTKSVFQNLGV---------LDQQVRMKMVYYKX--------------------------------------------------------- +>SRR5215204_261717 +-SLIAKTKKDEDFCPSPYLCPAKVHTIGYGTTRYFDTGKK---------V-TMKDAPITAKEADRLLRGWFAKYVSPLVD--KL--CRDDLYQDEFDAIADFIYNAGATYVgkdgkIKYFNLFEKVNKK---IPEAELTA-YWEN------------------------ +>ERR1719197_1466613 +----TQVERNEGYRQTVH-SVNGVKHVCYGFNLEKSGASSALSAhgySLSSV---LGGQTVDGNTCAALLSTDMTTATSCAS---RFVNNYNSLSSSRKSALVDMAYNLGCGGLGNFRRLKAAVERSDWSSAADEIQDSSYCSQVGCRCRANMYCMDYC-------- +>ERR1700761_4936524 +-AYLEALTGFERAVPHMYLDSQHVVTVGIGAQVFSENDAAKmkfvhrrngsladdtaKKGEFNKlskdpkyrllrdaqAFRSVTELDLAPGEAVRIRNIRLQAAEDDAR---RL-------------------------------------------------------------------------------- +>SRR5262249_24973513 +---VPAVQKYEGYAPTVVPDKLanGLPTGGYGETVGV-----------------KLGETHDRKYWSDRLAKRLAeDYDAGIGSI-----ClpacaRWRFPSPT------------TPALPR-SASRRRWRSGTLATSRAVRTEeX---------------------------- +>tr|A0A1C3V9H0|A0A1C3V9H0_9BURK Uncharacterized protein OS=Cupriavidus alkaliphilus GN=GA0116996_10774 PE=4 SV=1 +-----ITPQLEGKVLPGYREPIGIVTACSPLCDPGV-----------------VADAIR-----------------EWFDC------FDDAAGrgCM--------------------------------------------------------------------- +>AntAceMinimDraft_14_1070370.scaffolds.fasta_scaffold404943_1 # 1 # 447 # 1 # ID=404943_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.367 +-ETLEFIKRWEGFSSHAYWDV-SRWSIGHGTVSHE-------------------FETISKDEAAERLKAALQHFADELSE---A--IVFTPTEGQATALLSAAYNLGTGALRY--EITGLCNEGKFREAADALR--GYDHANGavltaltARREAEATLLE---------- +>ETNmetMinimDraft_8_1059916.scaffolds.fasta_scaffold1588794_2 # 93 # 212 # -1 # ID=1588794_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.400 +-AFSDYLLKFEGFDEVARRGaGEKYFTIGHGHYGPDV----------------KEGQRISRKDAGLLLKKDINKRIPRIQK---LMPEFNSFPASAQTAIFG-EFYRGSLDggKKGSPITVGLINerkfdqasKEflRNNEYRNRVK--LNRAGIGERMERVSGELMKmS-------- +>UPI00074AC986 status=active +-FYKKIIRKDEDFRGEAYKLpNEKHFTIGYGFYDPSI----------------KETDTITKPQAEKRLDLEVKQRLKSINK---LIDNFDQYPAYLRGPVFS-EHYRGSIQq---SPQTRKLMNegkfseaaDEflDNDQYRNADK--LKIPGIKARMERVSDALKKyG-------- +>tr|A0A2E9SWV3|A0A2E9SWV3_9GAMM Uncharacterized protein OS=Pseudoalteromonadaceae bacterium GN=CMK63_00130 PE=4 SV=1 +MNLIEQLKRHAGFYECVWHSN-SKCFIGYGHDLNAEPLPDYLHRDF-------ENNPITEDEAEQLLASDLMNLRDLVYQ----CLDLNQFNYVRQQTILSIAYWLTFPRFCREEKLITAFKQNNFEAAASIVLT----FKPEFRAEELAKQILTGEY------ +>APAra7269097635_1048570.scaffolds.fasta_scaffold32048_2 # 78 # 755 # 1 # ID=32048_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.671 +MNLIEQLKRHVAFTALVCKTKHNQRLIGYGHDINKKPLPDYLQRNF-------DYQPMSEDEAINLLASDILDIYDPLMA----YVDFTQWSMPRQRAILALVYMLGLTNFGRNRILVHVLNYGDFDHAAECVLS----ISNSGIYKEIAEQFECGGE------ +>SRR5665648_349182 +--CTDLTKHFESLHdgdltiigLQPKMCPVGIWTEGYGNAIIDPatGKFL---TGSGnKGRASLLSDIYDEAGASTQLEKNLVRYSDKALRI--VNMLAMKFDDDKFSSLVSFIYNLGAGALKN---MLTSYKNGMDI--NKVFL--LYRFAkvdgVktelpglVRRRKAEAWLFTTGKV------ +>SRR5574337_107116 +---LGLVKTYEGLGQPSKAVqtayadpylGWKKATICYGHTAGVR-----------------QGQTASMQQCENWLKTDAAQHCKLVYD--ALVPHSIWLTQGEQDAYCSFAFNLGKFKGTD-SVYGRLLKGDDWGACMGLLK------------------------------ +>SRR3546814_6078193 +---GMRISDWSS-------------DVCSsDLTRVK-------------------MRRYTDEECVNMLKEAVTeDFMKPVAKM---TPPI-ANEPYQLAAATSLAYNIGLGAYKN-STVRKKFLVGDFIGACKAFQ--SWNKVLR--------------------- +>tr|A0A1G4RIK7|A0A1G4RIK7_9SPHN Lysozyme OS=Sphingobium faniae GN=SAMN02927924_01382 PE=4 SV=1 +--LTADIARWEGKKNVGYLDLAKIPTKCFGDTSDV-----------------IVGKFYSDAECAAGLDRQATVHVEGVLRC-----APGLASQPqLLRASGSMAYNIGVAGWCG-STAASRFKVGDWVGGCLAIG--PYFTVLRKD------------------- +>ERR1700677_2576541 +-----------------------------ATFYDNGTPV-------------EEGDTISQEDADALLHEAIDSVASRLS----VTPIPEKVTQNEFDAVVSLAYNIGVSAFKT-SQTGNMFYLD--QDISKKFI--EWNRSGGkvvpgltLRRMWETNIYINGDY------ +>SRR5687767_4660252 +--------------------------------------V-------------KMGDVITIERAESLLRHTVDTIYAAAV----DRLVTKPLTQNQFDSIVSLSNNSGAAGLAK-SGLLKRVN-kDPnDPAITQEFG--KWRLgtvgGKKvvlpglvNRRKQEAEHYFKKDX------ +>RhiMetStandDraft_4_1073278.scaffolds.fasta_scaffold6797441_1 # 142 # 213 # 1 # ID=6797441_1;partial=01;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.625 +-ALLDIVKHFEGCELKAYWDRGHY-SIGYGTRAKSS------------------TEIISKEEAEKRLRDELQSSVNNVLSY----VN-RPLNDNQLAALASLNFNTGNIKQF--P-KLMAAlNSGNFNEAANQFLDINKDITGNvllglsrRRKAERDLFLGKSSGVNY--- +>ERR1041385_4850564 +----------------------------------------EIVRDFHrvmalakgmptAPDRApnEPRVELRDAEINSMTADKLSrEYLPGIV---RMFPAFEDFPFPAQQAVVDWCWNAGVA--amRNTQHLRPAIERRDWKAAAAACHR----------------------------- +>SRR5262245_60251114 +----------------FYKDKPGTVLVGLGHNPNDAEGAKlipfvnrhtnkpatpaEIEADYKavddqpkgrrhERYKSSTKLDITEATGLDLFKKDVARMEKLLN---RIFVAWPAFPAAAQIAILDVFYTTGE-GTfr-TFTSFQRAVRHGWWMEAGRESLR----------------------------- +>ERR1039458_7426324 +------------------------------------------------------CLAMKREDVHAMVKTFLDeKAWPAIK---RSFPNYKDFPKCARRAMIDILYNCGPGFLdagtpdapPKAPKMRAAILATDWKSAAKEVPA----------------------------- +>APCry1669193181_1035450.scaffolds.fasta_scaffold103550_1 # 1 # 1056 # -1 # ID=103550_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.558 +-EIVDIIIGHEDFRSTPYPDH-KQWSIGYGSRVSDKsgSFIdksqhsklrPKYNrlyrryqrsgssTDlkrllrwtnknYSgnwykdlhGSKNNtKKieASPLTRAQGKVMLVDSLKFELQRLQSNAnFK---LDEMPENVQIALSDMAYNMGGGFIIKFKKLHDCLVLidkiqrknnitkldqdvlaDLFTDASQEIQNSLYYEELPNRAKSNILLVKNAI------- +>SRR5665213_721763 +---LAFLAREEALVTVGFLDD-GTPAIGYSHHGLGI----------------VPGAVWTQDQALTQMITDCEVCATVLSR-----AINQPVMQTCWDALISLAYNLGPGTVSR-SPLAVALAKGDLAMASDLFVNSGWRN--PARRQREQTLFWHGDYT----- +>ERR1019366_6917736 +---LALIAGFEACCLLAFMDG-SNPAIGFSHHGKDI----------------VAGkTIWTVEQSVQKMIDDCHVCAVVLER-----AITATLTQKQWNAIISLAYNVGPGTIRD-SRFVRDLNALTgletVAGTGSFF--FDYEH--PGRRARENDLFVN--------- +>tr|A0A1Y1V803|A0A1Y1V803_9FUNG Lysozyme-like protein OS=Anaeromyces robustus OX=1754192 GN=BCR32DRAFT_330985 PE=4 SV=1 +-------------KLQPYCDEtSQLWHIGYGHNCNTGCKTNT-KCTKDKKVNnVKVNNKFTASSAQKLLDNDVQSHVKCVTD-----MKFTNLNNYRKSVLVAMSYQLGCDALNkNWPNFIKNVKNSNWKQAVYEMKhnskggKSKWYEQTPGRVDRLGCIMEK--------- +>tr|A0A1N7TAU7|A0A1N7TAU7_9BIVA Phage lysozyme 2 OS=Veneroida sp. wenbei OX=1739612 PE=2 SV=1 +-AVKAQLKIDEGYETKICNDNQGNLTLGIGHHIKASDPE----------YGKPVGTSVSADRINEAFTGDFNASVAILG---HYYPLCFSWPSEVKLILVNIAFNLGNRQR-KYpnPGFNFAMEEMDWDRVANEMELSVWFNQTGgTRAADLVSRMRTV-------- +>tr|A0A1G3M4E2|A0A1G3M4E2_9SPIR Uncharacterized protein OS=Spirochaetes bacterium GWB1_59_5 OX=1802176 GN=A2Y38_16235 PE=4 SV=1 +-RMMNTLKQDEGFRAKAYWDL-KQWTFGFGCKAPSE------------------GATISKKAASVLLEAHLDNAVIGFKK---MFKgHESKFNECREECFIQLIFNMGTGrpdgneGLYSFKNTLSFIFKnkeVPWASVAKGLERSLWFRQVGksgdpdgpgpkeGRGERIVRQVSTGL------- +>Cm1ome_4_1110797.scaffolds.fasta_scaffold32957_1 # 3 # 440 # 1 # ID=32957_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.596 +---STVIAKEEGVATKAYWDPPGqntLVSVGYGHQIQPEEYNQGFIQAGDERIpikGnRGIDTTITKDQAQKLLKVDTPKYEKRASD--PLGSSWNKLNEQQKSALISYAYNTGSTASLVKQGLKDAIDNGDMKLAASIIRDKgiktakgKYLKQLDERRHREADLFLSGPI------ +>SRR5690625_3146593 +---GAFVGLHEGEVRRVYSDIGGVSTYCFGGTTRID------------------KQEYSAEDCAEQLLRDTELAYRHVQR---E--VQREMPWSVHAAMTSATYNAGVGAFS-RSPMLPLLREGRWEAACAALVA-PYTT------------------------ +>UPI0004429758 status=active +-NIIEHIKKWEGFVGFTYDDAvypskpvkvgercKGRCTIGYGITDKKKA---------------KPGATVTKEQAEQWLKDVVNQtCIPCIERWQ--KRNKIEISKPIFDALIDVVYNKGCDGFTT-SRIAKKLKNKDIEGAGEELKNW---------------------------- +>LauGreDrversion2_5_1035112.scaffolds.fasta_scaffold1182805_1 # 3 # 221 # 1 # ID=1182805_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.763 +--NAMFtigkpkpafldvMKTAKNTGDPHPEITASQWA------------LE-----------SGWGKHMSGKNNpfgqkakkrngqpiepatlrrtrevingKDIYINDYFKDF-----------------PSIDDAIG--------DHVVKWTKRKTTPGSspveaAQAIKSAgyatDPNYVRSLSGIVASNGIDPKTPLKA--------- +>SRR6185503_8969666 +--YAERMAAHEGAKKHVYSDNSpqHYKTVGIGFNMEKKGAKGEFDALLGLPSSStlfddvlKGKANLTQDQMVTLVEQGKPSFEAGARAA-VSAPVFDALSPERQAALTELQANTK-NGVGGFSKMVTAIKNDDLPKASYELLHGksglpsVLVGTgpggvGPGRANKYAAALRW--------- +>SRR5690348_2283530 +--------------------------------------KARFNRILNQPANStffddvrNGTKDLTQDQMVTLSTAEEPTFEQGARSG-VSAHTFDALDTEQQAVLTEMQANVA-GGVGSFEDMVDGLKANDLSAASRNVLYNspgvptLFATTkpggvGSGRANNYAASLRW--------- +>SRR6185503_7140265 +--ALqSDTKADEGEKHVAYRDLGGVWTLCRGTTRGV-----------------KPGQSATTEDCDAMTAADLLVAVKGAQQC---APILK-APerFNQLRAAVRM--NNNTGAFCR-GwwkhrpSPAQLMQGGNLKAGCLEML--RYDLVKG--------------------- +>SwirhisoilCB2_FD_contig_51_14534087_length_373_multi_3_in_0_out_0_1 # 3 # 320 # -1 # ID=2452507_1;partial=10;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.588 +---IDHISKDEGFSAMPYDDIAGNCTVGIGILLSYSPCTEK-----------QKKTKYDLAKLNATFHDRLEEAQKYVR----FYVRETELSQAQFDSLTSFVFNTGVGNA---RGVLALANKGEHKLVAEEMNRFVYITQ----------------------- +>SRR5574344_256336 +-QAIRFIMLNEGMKLTVYKDfgg---NLTVGFGHSVIskdnl-K-----------------FAD-kISEDEATDFLIKDLHTAAEAIDS---L---RLDINQNQYDTLLDFTLNCGVNN------LYRLTKNgCRSPkEIYAALP--LYDKCGkNhlkslhHRRIREQQLWNGV-------- +>SRR6185437_2569399 +---LHLIEQFEGy-QRCAYWDPfGRVYTAGFGQTK--GV---------------YRGFCFAGrAAAEANLKRSVQIEYEWAVH-----AIGYPFNQHEVDALDSFSYNLGSGIFVG--A-LRsDLERGQVYAASRIML--AYDHAGGvvlpglkTRRELEVRLLLLPVA------ +>APWor7970452882_1049286.scaffolds.fasta_scaffold90132_2 # 1002 # 1199 # -1 # ID=90132_2;partial=00;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.530 +--ASILIKKNEGFLSTPKWDN-KQWTWGYGTKAPYGGKPDE--------APPLPYLTITRKDAQEELINYMKTEVldniEKFQE---EFPRYKNWTDNQIDGLTSFLYNGKPNWLR---QVTDNGTR--tNEEIAEAML------------------------------ +>tr|A0A178CBR1|A0A178CBR1_9EURO Uncharacterized protein OS=Fonsecaea nubica GN=AYO20_10214 PE=4 SV=1 +---VDRTKAAEQFKAAPYSDGRTGQSIGYGFHVNAWPEKAK-----------DIRVPITKAQADELFDYVDSWNANYMAK-KLGIDVWNSLDQRQIDALLSVAYNTGAPGLW--GAIESDLRTKNFEKVAQKIETHATSGnQgvvkvdLSPRRRRDAELFRAGS------- +>SRR6187402_1020469 +--------------------------------------KEV-----------EDQIPLTKAQADKLFEYVDSWNAEYLAK-KLGVGVWNALDQRQIDALLAVAYNAGAPGVW--RAVGVELQARNFESVASKLETHATTGhIgdkkvdLALKRTKEAELFRAGS------- +>SRR6266576_616566 +---FEHIRRAEEYTELPKIDRRKGHSVGYGHHIESHGEKVE-----------KfkNRV--TRQEAEGLMKKDAKQILQSFKN-SWGCEVWNQLPENCQIGLVSLAYNAGAKNTR--EA------------------------------------------------ +>SRR5262249_28749842 +--TVTFLKQKEGFRPNAYGDF-KQISIGYGTKALPGE------------------KSISPQDAEARLRQEAAKVDAWINQ-----NVKLPLTQGQRTALTDFGYNLGTGK-GGLSDLLPNIEAGNWQGIAKQMS--AYIHSGghinagliKRRDAEMALLFRSDGQQ----- +>HubBroStandDraft_4_1064222.scaffolds.fasta_scaffold4966709_1 # 2 # 247 # 1 # ID=4966709_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.675 +---IKMLTDDEGFRIKPYFleykDLNGkmvnetFRTVGNGHKITEAEEKSGKIHG-------FNIDTLTKEELKIIFKKDLKEVIKNVDN---LV-TNKNINPTAYSILVQMGFQLGNTGLSKFEKTIKAINELEYDLASKHMLNnyedkdykniskeigkTKWHIQTKTRAKKLSKLMASLE------- +>JI9StandDraft_1071089.scaffolds.fasta_scaffold548280_2 # 424 # 636 # -1 # ID=548280_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.695 +--ATALAQDALARKEKnWLVNFIYN-eATGYKDMd--DGQD---------------MVGDTPVNEDGKQINEEEAD--------------------------CYKDYKAGS-------------------------------------------------------- +>APWor7970451725_1049214.scaffolds.fasta_scaffold79291_1 # 3 # 236 # -1 # ID=79291_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.419 +--RVTRVRDNTGDA-GYYTDTEYVTpYGGYGVDaegnPItgrafrpFGDD---------------FKSGTPMTTVRSMHHRDNADRKVgqhlpvdtplatmgstggstnphshiemrpdsGFGE----TEGFDIDDAIAHRSVVANA---G--------------ADQPFQTSDQARANLD-RAIAARKAPAVASatpskpttpaptVASATTPR----- +>SoimicmetaTmtLAA_FD_contig_31_9461165_length_218_multi_1_in_0_out_0_1 # 1 # 216 # 1 # ID=548149_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.676 +--KTRRARDRRPP--SrVRSESrsrcgirrrhrPSRSpTGGWDHPhaAIaqaaarpEGGI---------------IRRGA------------------GAPE----EPP-----------AV--------------ASATLEPADAGWEAGDPGAqVAQRRRGITVKPRDTMAAmakranvdlrafmdanpdVNPRKIEP----- +>UPI0002C4C65B status=active +--SKE----------SrWATAKrgereidarraaFEANpEAG-AEFdaAIaqaaarpEGGI---------------IRRGA------------------GAPE----EPP-----------AV--------------ASATLEPADAGWEAGDPGAqVAQRRRGITVKPRDTMAAmakranvdlrafmdanpdVNPRKIEP----- +>tr|A0A258UVD9|A0A258UVD9_9SPHN Uncharacterized protein OS=Novosphingobium sp. 28-62-57 OX=1970409 GN=B7Y36_19055 PE=4 SV=1 +--IVAAVAHEEGLVLEAYKDSVGVWTWALGVAETGGHNVRQ-----------YIDKPSTVEAAVAASIDIMRrKYLPAVQR---AFD-GHRMKEHEIAAALSFHWNTGAIGKA---SWVKAWRDGDIAAARTGYL--AWNKPasIIGRRRRDAALFFDAVWPS---- +>tr|A0A1E4N757|A0A1E4N757_9SPHN Uncharacterized protein OS=Novosphingobium sp. SCN 63-17 OX=1660120 GN=ABT10_03055 PE=4 SV=1 +--AAAFLCSREAIGLSAYLCPANVWTWAGGIAESGGNQVRQ-----------YKDNPQPLDKCLRATVDLIRaRYLPDVVK---AFG-GRDLAEHQLAAALSFEWRNGSISRA---QWVRDFLSGKLDAARANIM--QWTNFgqQIARAQAERDLFFDARWPA---- +>ERR1051326_2638826 +--GLRQLKAFEGLSQRTYTLS-GQKYIGYGHLLPDSST----------------TTYVSRDEADSLLAGDVAAAVSLVKS-----VINVQLTQGQFDALVDFAFTISPEKFKN-SSVVQKINSGDIPGAATVLAQWVYATQNG--------------------- +>3_EtaG_2_1085321.scaffolds.fasta_scaffold261547_1 # 1 # 567 # -1 # ID=261547_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.464 +-SLAQQFIPKEGYSSKIYPDKDrlGkirGYAVGFGHKLTESEIAK---------Y--SKqgtvnivGVTVPEQQITEWFHKDFKRTKQATKE---QSAEVPNITPELEKSLFNINFQLGSDWHTKFPHAWKNLKAGEWDRAIEEFQfkrqgsniPSQWSQQTPERTQEMVSAIETQ-------- +>tr|U5MRW0|U5MRW0_CLOSA Lysozyme OS=Clostridium saccharobutylicum DSM 13864 GN=CLSA_c22880 PE=3 SV=1 +-KGAEFVSSWEGFSSTWEDVGDGYWTIGIGTATSgtLGKQLY------A----SGI-TSCTKEQAYKWLEQECSSCYEAIKG--KLDANNITLAQNQIDALISMAYNIGACGLVD-STLFkNILNgVTDEATIRSNFE--AWDKCNGvvwdglkKRRDSEADLYLNADYI----- +>SRR5580704_17927216 +--IRSELDRWQRPIRWMYRDSQGRVTVGCGTMLPSAAAAKRVsfvhdtnfqvataaeiEAAWNslhsgaatqkaaapnKKfsakhYEDKTNLRITEASVQALRDGHINADYVELK---KIYLQFDSFPDDAKIALFDMIYNVGPGqnkprqhrasGLRNYAAMNAPINAGKWSLAADRCT------------------------------ +>ERR1711871_724031 +------------------------------------------------------QKPLNNEEVLTIFDKDLSNAIEDTKK----FIDPATIEPEAFEVCVHLCFWIGLPRLQGFKRCKQALLDKDYVLASEELLDSKMGKSdvrgLVNRITELSARMRD--------- +>ERR1700743_2506204 +---VVFTPMWEGTDYVAKPDMVGTghpITWCHGQTNVDRDAAH----------KVAAGTRLHKQECDAELAKSLPTYLNPVCKC---V--KVPVPVKSMASIVDAAYNAGPARVCA-SPMVAAFNRGNIRDGCNAFD--GWIVR----------------------- +>SRR5581483_4501545 +--LKRQLTSDEARKSYLYDDATgkritkgttviGHPTWGIGFDCDA--------------------LDFTDDAIDAQFQAVRTRTINEIV---AALLWVAKLPSGPFRAIVDVAYHTGLNGLLEFHLMLGYAR------------------------------------------ +>tr|A0A2S5T942|A0A2S5T942_9BURK Uncharacterized protein OS=Schlegelella thermodepolymerans OX=215580 GN=C1702_00295 PE=4 SV=1 +--AKPFVAEHEGNELRAYHGVlsshrkagkkyaepghnsIEEVSVGYGFNLQRKDARHVFKTELGlsdKEFDEvfNGQRMLTPAQAEKLLMFGLYEANAYLDR---SLGPGVPLRDHERAALVSLIYNAGYSAVSR-SGLLAAVKSGDRAEVARRILKFR-TAGGalTERRRGEAALF----------- +>tr|A0A228QX24|A0A228QX24_9BURK Uncharacterized protein OS=Burkholderia sp. HI2714 OX=2015359 GN=CFB82_20035 PE=4 SV=1 +--ADKAIGGTEGFEARAYHGVysplrkpgdifvkpgqvsdatKSEVSIGYGYNLTGnADSRQVFQKVLGidsAGFDAirDGRQGITPEQGLKLRQYMIYQVNAQLDH---LV-GNKPLPDYQRAALVSMLYNFGYGNFQK-TGIPDLVKKGADPQVvAQKIRGASS-SQKalQVRRNAEANLY----------- +>tr|J3CQA4|J3CQA4_9FLAO Lysozyme OS=Chryseobacterium sp. CF314 GN=PMI13_00025 PE=3 SV=1 +-KGKDFIKDWEKYYKMPYDDSEGYATVGYGYLIAYKSYKNLTQSEVEkTDItWKEFQEGISETRALELFNAKVKKYENAVKR-----DIKVNLYQQEFDALVSLLFNTGSEFLNTggakggETKIKQNINNQMYEQGADEMSDVTNDGTlgLVKRRKAEINMFKNNIYDS---- +>SRR3972149_4805384 +--------------------------------RDAG-------------------LPVGSTGRR-------------PR----GSPRQadGDRRMGVTEASGTNGFKLWIARVADVIEVTGKISGGRRPV------------------------------------ +>SRR5438105_15205902 +---LHRR---------------------RRPLDPSRPVYQRGL---------QEVRSYTPQQRLNIFGRDIAEAQRAVNT-----TLKVPLHQYESDALVDFTFDIGTGGFAT-STCARAFSTKSA-------------------------------------- +>tr|A0A2X8DFP0|A0A2X8DFP0_ECOLX Prophage lysozyme (Endolysin) OS=Escherichia coli OX=562 GN=SAMEA3752265_03503 PE=4 SV=1 +--LDQFLDEKEGNHTTAYRDGSGIWTICRGATMVDGKPV-------------FPGMKLSKEKCDQVNAIERDKALAWVER-----NIKVPLTEPQKAGIASFCpYNIGPGKCFP-STFYKRLNAGDRKGACESIR---WCTSKQSVSQCV--------------- +>SRR5580698_9093551 +---YSLLQKLEGYSPALYSLHDGGYTIGFGFFVPINESY-------------KWRNGITWEGAQKMIQQKMPAYEDQVKE---Y--INVPLTQNEFDALTMLPYNLG--GFSKATSIINDVNN----QVDFDVLQRDWKRFIHSKA------------------ +>SRR5438045_3469669 +--GHRSDRTAEGVRYTPYQDPSGipACTVGAGHVLAWKYCTAA-----------QLATTFTRAQVAAFLRSDVATAERCVQA-------SGKLSQPTFDALVDLTFNAGCGSLGwheppYYRTLTSLAGAGELELLAAEVRH-TATTAGGk--------------------- +>SRR5687767_13490105 +--GVAFMQRWEGTRLCPYNDPVGHCTVCTGILLHFGNCSSA-----------ELRDCRSEKECIQEDKRKAKPYSEAVDK-------AGRWQQPEFDALTSFVYNNGPGAIQsgdtgA------AVRSGDRQRIANqml-----RWVNAGGsplpgliARRKAEVALFLRADYS----- +>Hof3ISUMetaT_24_FD_contig_21_1003446_length_269_multi_2_in_0_out_0_1 # 3 # 254 # 1 # ID=31531_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.671 +---EQLIKSFEEYRQHEWTDPnTGKKFIGHGHHVDSKEYEGnsvteisehadqitvrdtndlqpfgkmkfgehsEWISWanktstkllgcmrgLDGTIpqlhkkgvqwkfhGESYPEGITRQQANALFDNDVKKSISSVRK-----NIKMPINQNQSDALISLAQSLGPSTFAN-STVAKAINQKEFAKATTEFM--RYNR------------------------ +>tr|A0A255YQD7|A0A255YQD7_9PROT Lysozyme OS=Niveispirillum sp. 1-14 GN=CHU95_19955 PE=3 SV=1 +--LQQILRRFEGWHdgdkrtphSDPVLCPTGYVTVGWGRVLIDPQTGQQLKgkAGLARA-KELWPQGFSRAECEQMLREDMQRFMSGVID-----LLAQPVTAQQLAAMTSLAYNVGLgkKGFAG-SSVLRLHNAGDTAGAGRAFG--MWTKGT---------------------- +>SRR3954447_10495189 +---VTLTKIYEGLVGvAPYNNANSNCTVGYGHLIHMGPCTTA-------------DRSK-TYDVDALFAADVTEHERRLAS--S--LGDLPVSQREYDTLWDYVYGRGSLTASTSPNMYAALtdDPIRYADVPGILRANGDIqSLRglCDRRYDEAEVFAGGPYDRTYTC +>ERR1700736_4068612 +---HDLIKTYEGLRRKLYdndGKTDGNCTIGWGYFIHSGKCACKKpKVPCTNKAEKPFAKGITKKEAARLFDKRSSETEELVRRLA--SVSPNGLNQCQFDALTDFFYNLGSNALyKSKksktpSTIVTDLRTGNYGEIFPDIL--QYDKHDPVRRRRIADAMMFEKA------ +>SRR3990167_2836201 +---AAYTARFEGRRNRVYDpnpnDGRAEPTIGVGHYMDRGDSRETFARVLPSvDFDSayTGRTSLTNDQVGRLFNHDIRIYVERTK---KLVPKFDELDVE---------------------------------------------------------------------- +>SRR5687767_15031308 +---------------------------------------------------------------DTLYAYDLKKHYQRAR---ERVSGFDAFPLSAKIAVCDMAYRT---DLFKSPIVVSHLEANDFTAAAKEYIDGsPEYRR----------------------- +>SRR6185437_2097947 +------TGPLEGNVLWPYLDGPGNVTIGRGHLLYVSAPPSspALRaaivLGmplevVEPQwntlrrqkpgmrpeyYQGATSLRITPEKSDELFLGDIRSHILNCI---DYVPGFWDIPHEVQIACQDIDFNVK-GGIRTFPEMLTAIESRDW-------------------------------------- +>tr|D5GPX4|D5GPX4_TUBMM Uncharacterized protein OS=Tuber melanosporum (strain Mel28) GN=GSTUM_00012061001 PE=4 SV=1 +--AIKMIKKLEGFRGDIYKDQVGVDTIGYGHNCVTAPGTCE-----------ALNPPITEKEAEDLMMKDMEQFEKCVCD----LPNSEELTSNQFCSMVRYVCTFFFFAFPC-------TRIQPYSGLLD--------------------------------- +>ERR1700690_3732633 +----------------------------------------atkeEVAHAFqketemSgpdpaSRYRLSPSIEIPEEKTKELALRRLRTeFITENK---KLFRRFDDFPVPAREAIIDIAYNAEFGRaeshahgrankatgLHRFHWLKIAIEDADWMAAARSSH------------------------------ +>SoimicmetaTmtHMC_FD_contig_91_16769_length_213_multi_2_in_0_out_0_1 # 3 # 80 # -1 # ID=1812513_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.731 +---RAGTLADTGFTGAIILDDPLKPEDAFSKTARNkanrkilntvNSRKAK-----------SSTPIILIMQ-----RLHVE-DPTNFVMT-GNVP-G-NWHQISIPALIDDAYintlpehirrkvprdverdeKGRQSYWPL-KESLqsllqlekggqdkdgatvsRYTFSSQYQQAPKKlggdlvkaewfgryiklpVL--KWRAI----------------------- +>SRR5690348_13937768 +----FYIKKSEGLSTRLYNDV-GHPAIGYGHDLTPAELRSGAIHTDTGDI--STRGRITKAQADAIFAKDYNQRYQQVL---KKVPALASLNENQREAIMSYYYNTGR--LP--RGLAENLGRGDLGAVDESIL------------------------------ +>tr|J9EYC0|J9EYC0_9SPIT Uncharacterized protein OS=Oxytricha trifallax OX=1172189 GN=OXYTRI_08131 PE=4 SV=1 +---LEYLIDVEGgVFAEIYDSQLgAFQTIGIGHKLQNNDSN---------------LTDITVEQCFKLLENDIANAEKTVIQkLKDKNQDYYSLTQIQKEMLIDFAFNLGPTFYLKNHEFVEAVVNNNVEQMKQN-------------------------------- +>SRR5690606_5365436 +--LIPHLEQeealGDGKGRLVYVDPVGVTTWCYGDTGPVP------------------NTPLNQAVCEAQLNKRVKEECLPVAQ-----AIKVPVYVHEMAAICSWAYNIGWPSAAN-STAIKRLNACDYDRVPEAML--WFKYAQ---------------------- +>GraSoiStandDraft_11_1057310.scaffolds.fasta_scaffold688098_1 # 1 # 126 # 1 # ID=688098_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.444 +--AAPFIASEEGLSLVAYTDSVGVVTVCHGLTNGVN------------------LRkIYTRDECDKELLLRISRDVEAIRP-----YIKVEITDTTRAAIASFAHNVGVPSVKS-STMIKVLNTGDISGGCYQMY--DWHKGG---------------------- +>GraSoiStandDraft_16_1057320.scaffolds.fasta_scaffold2916510_1 # 1 # 630 # 1 # ID=2916510_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.540 +---------------------VGILTIGYGTLCSEKVL--------------PCPGPVSEPEAARVMGQELARKYGPCVS-----NLKAKFNNNQYSALTSFAYNAGCGSLANVANTC-GLNTAkpNYGCVPGRMK--LYNKGtVNGRLVTLPGLV----------- +>ERR1700722_373675 +---AAFIASFEGFGATPYDApSpVRDCVIGYGHVIHSGLCTSADF---------ETWGRITAQQGQALLQSDIDATFVPAIR--AG-IPGTPLTQPEFDALVDWVYNEGPVYITGRSSVRSALRAtpPHYSSVAQDLM--RYVFASgrklcglYRRRVSEVDLWSTGSYA----- +>SRR5260370_9545914 +---IAWIKLCEGFRERPYDNdgsaNnGRNCSIGYGQLIHPGACTSSDN---------QR---VTEPDAAALVNANADRATA--WI--NS-HITASLNQAQFDSVGDLLYNMGPEKFIRHDVWRDL---saNKLNLVPDDIM--TLTAGGggiAVRRAGEAGMFGSGTYS----- +>SRR6478736_642139 +--GLIALQNHEGFgkqlangMVQCYPDPGTgsePFTVGWGTTSAVLNGvTL------------TPNTVVSKAQCQTWLNYSITSIFVPSIK----RYIKVELTQGMVDAVISFVYNVGATNFKK-SGFLACINKQDWCGAYAALQ--TWNKANK--------------------- +>SRR6185312_15858291 +--GLAALQQRESLakirsdgM-vQCYPDQIGvviSIQT--HTHSFLLPVgrcpllalpPF------------NKENLLQKFDPMEWCNVTLIRSEL---------------------------------------------------------------------------------------- +>Dee2metaT_16_FD_contig_31_1534896_length_548_multi_2_in_0_out_0_1 # 2 # 547 # 1 # ID=893267_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.474 +---KIKDQLGAGFdleqlkgQLgdiQNVSKTLDgatKFKEA----SDQLKKvSD------------ATKELGNIRNLDKVLDtvnklaeIPEVADKIPNLS----KFNtivdnltdiknnstdiqtlagspatskVLELSQKINDLPTD-LSKIGGfkdgiaglNGFSdnL-SGLSNEISKVsdiakdlknfsefkaiggglanleknmgeikslstSLSGAAADVG--AIKDITK--------------------- +>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold1149636_2 # 100 # 735 # -1 # ID=1149636_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.766 +--LLPQ-KLTE--kiqtV-lgkRIYPLTESisaVDQQI-------------------------------------------TNEIDRTLN----RLIx----------------------------------------------------------------------------- +>APCry4251928382_1046606.scaffolds.fasta_scaffold60208_1 # 3 # 1589 # 1 # ID=60208_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.432 +--AQPYVNNPQGL--aniVYAGRMGNgntASGDGWTYAGKGLIQlTG------------KANYVGFANSLQMSLTdaaaYLQTTDGAVESA----FWYwmsnklstl-------------------------------------adagnisavssKINGAK---------------------------------- +>SRR5208282_1037291 +----FKLKtaSFEKTVPYMYLDSQGHLMVGIGHNIDLRGDLLDLpfvttdrfdreaakdgekgtpimekkvpnrpatpqeiENDasfLKEHLGlenyppddlaKYTTLELAPDAIEDLFLTDLNLAYDSLLN---EFGfaQFLAFPVPCQAGLIDLAFSTGD--IRGFPNLVRDVKgrgefagksvRDRWTEAARQSN---RAIASNERKNTVLQW------------ +>SRR5687768_39063 +-----MLAASEGLRLTTYLDTVGVPTIGYGETENVKP-----------------GDKTTPQKALvRLLESAENKHAAGMKK---CIGDNVEMYQKEYDWHVHFGYNIGVAGYCR-SETVKLLNQGKNKEACNAMM--GWLKNPE--------------------- +>6_EtaG_2_1085325.scaffolds.fasta_scaffold589146_1 # 1 # 171 # -1 # ID=589146_1;partial=10;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.421 +-NLVNFVKLKEGFRAELYKDSGGVPTIGYGTTSGPLL----------------QKGIVTKEEATAALLEELNQKALEVKK--QLDADGITLSQNEFDALVSFAYNAGTNALINKTQIYSKIKEGLRGEELRTIITNTYIKDNkgnvleglIKRRKEEASIFIDGEAPGYA-- +>ETN01SMinimDraft_1059929.scaffolds.fasta_scaffold766024_1 # 237 # 329 # 1 # ID=766024_1;partial=01;start_type=ATG;rbs_motif=GGGGT;rbs_spacer=4bp;gc_cont=0.452 +-SLAETIKKNEGgLVLEPYKLEyttrdgtnvkENVFTVGYGTQITDKEYND--------------YIKLSEEEKinfaNKKFIEKYEQAKIDANT---YMEsyGITNAPQNVKNVIVEMAYNMGRGsakdkkGLMSFKGFASAIKKGDYERAANELRyidpsvpekgETNYYKQVGP-------------------- +>1186.fasta_scaffold525110_1 # 1 # 255 # -1 # ID=525110_1;partial=10;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.725 +--LRALLRDREGYKNKMYPDGKddkGrqLYSAGIGHQLSFEEVER------------WKGKRIPDEIIEAWFEKDYNNALDDANV--LMVKEKIPYSENVFAAMTAASYQMGRTRFGKFKKTFGYLREGDIANAMEEAKNSEWFRGnkeketkgTPKRVHDFNSLL----------- +>UPI0001E63D3D status=active +---KQFISNLEgGFREKPYRVknddgTLGNWTIGHGFEYINGQPV-------------TPQTTITEEQSLQILEDKITEIDSHFL---ENYPIYGDLSPNQKGAIVSFAFNTGTNVVDvpENRILRKAIAGGDPNKIINAMG--LYFNSGgkpnqglKNRRNKEAQLFLNN-------- +>RhiMetStandDraft_4_1073278.scaffolds.fasta_scaffold1771873_2 # 219 # 329 # 1 # ID=1771873_2;partial=01;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.703 +---KQFVSNLEgGFRANPYRVenddgTLGNWTIGHGFEYINGKPV-------------TPDMTITEDESNQILDQKITEIDSHFM----QYPFYENALPHQKGAIVSFAYNTGTNVVDvaENRILRKAIASNDPTKIINAMN--LYINSNgkpnkglENRRNIEAQLFLNN-------- +>SRR2546425_12328605 +---KAETKGFEAVKNYMYADRLGFVTIGVGQMLPNAQAAAALNLintttgapatsaektsafntvtsAYgtaisqgsRapraEFYEPLTGVRMSNSDIESRLESGLRSTQNELR---KIFKGFDDFPAEAQMALIDLG------------------------------------------------------------- +>MudIll2142460700_1097286.scaffolds.fasta_scaffold2646518_1 # 3 # 434 # -1 # ID=2646518_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.544 +-TLLPFLRNNEGFISKITtavEDkeiANPTKDIGYGHKLSAKELKDK------KVYGIDVSKEISKDQADFILSKDLEKKYEQVS---KKIPNFKNLTPLEQALVTDFEFNVR-GGIKAFPKMLKALQARDVQTLNKEYKRYM--------------------------- +>SaaInlStandDraft_6_1057023.scaffolds.fasta_scaffold973758_1 # 3 # 149 # -1 # ID=973758_1;partial=10;start_type=ATG;rbs_motif=ATA;rbs_spacer=4bp;gc_cont=0.476 +-ILVPFAKANEGFISKMDtaiEDkgkSNPSMDIGHGHKLTPEELKTK------KVHGIDISEGITKDEADIIYRKDLKQKYNLAS---EQVPGFKNLTPLEQALITDFQFNVK-GGAKTYKEMLKAIKEKDFETLNKEYKRNM--------------------------- +>UPI00072D3DE5 status=active +---GSKIQDHEGFRPTSYPDRRqdGsmGKSIGYGFNLDKKGAANILKaAGITASLEDlkSGKASINKQQAQALMNAELPFFAKKAEEW-LGKTNWDKLADNQKMALTDMSYNMGGK-FTgdgMWPGLRQAIIDGNKTNMAKEMQDSNYWKQVKGRAMNNLSQLQ---------- +>tr|Q0ANV5|Q0ANV5_MARMM Glycoside hydrolase, family 24 OS=Maricaulis maris (strain MCS10) OX=394221 GN=Mmar10_1740 PE=4 SV=1 +--ARELIKRFEPFRPQAVKGDDGRWVVGYGHRAAAKP-----------------GVRVNEDEAALLLIYDVMRAEEVVDD-----SITGPLSRGQRDALTSFVHDVGVDSFRG-SEVARYLFEGRARAAGEALA--AFGDGVSSRREAESRLFLDALL------ +>SRR4051812_27131140 +---FAALRTSEGAVLHYYNDLANNCTYGIGTLAHSGPCTPE-----------ELQRQVTSQQVNGQLEGRVRTAEAAVRSG---VP-TFQLSQSQFDALVSFVYNVGAAGA---ATALAAANREQTGDVATQMNARVYVHP----------------------- +>JI9StandDraft_1071089.scaffolds.fasta_scaffold2841462_1 # 1 # 225 # 1 # ID=2841462_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.640 +-----KTREHEDFSAKPYKDG-GGWSIGHGFHLDKESITKaDLQgSGIPDVVkkallaGdhTNKNLKLTNPQSEKIFQNIYNRRRRETA---RTYPWVRDLPEQAKNVVYDMAFNLGRTKFKKFEGMLEALEQGDYKKAALHVKfvdgpkrgkLTPYWGQTKSRAEDNFDI------------ +>SRR5690348_16036458 +---------------------------GQPTLMIN-----SA-------TNQITNPG-YTYDNADLLQSDSVKFENIVRD-----NVHVALYPEQFDMLVDFTFNTGA--LPG-TKLQRALNAGNYDQVPDLMS--EWVHSGG--------------------- +>HubBroStandDraft_6_1064221.scaffolds.fasta_scaffold6995994_1 # 3 # 263 # -1 # ID=6995994_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.582 +-NLIAIMKSESSLNPqaiNPSSGASGLIQFMptTAKSLGTTV-----------------EEIRKMTAVQQL---PYVEKYFKSV----------RVQPgSSAGRLYAYVFLPGRANRE-------------VLTQ----AGENYYEsnkglDIDRDGKITIadldaRLAKYGG------- +>SRR5579859_516931 +----TLTKGAEKCVLVGYHDQGGVPTIGWGHTGPDV----------------KVGQTITRDQADFLLANDYAISDHRLTTELTKPGPFAALDLHEKAALLDFVFNTGGGKSlgdpKC-WEIWGDVDAGRLADIPDQFDRFIYIHVN---------------------- +>ERR1700741_7643 +----AITKRAEKLMLIGYADIGGVPSWGWGHTGPEV----------------RVGQPITADQAEHDFRRDQAVADAELKAHV-PPEAFAGLAEHEKAALLDFVFNAGAGPQgkSE-WNIWRVVRAGDLAAVPPELKRFVYVHVD---------------------- +>tr|A0A2R2V0U9|A0A2R2V0U9_9CAUD Lysozyme domain-containing protein OS=Pectobacterium phage POP72 OX=1965269 GN=POP72_042 PE=4 SV=1 +--------------YEPYDSDDktpGANTIGFGHKITEEEKRNGYIVIDGNKVPYKpGASKLTADMAMKLLEQDAKNHIPSTS---GWSVPFDMMHPGVQRGLQDLGYNLGKQGISRVPKADAAFKSGNFTDGFIYML------------------------------ +>tr|A0A2K9V4Z1|A0A2K9V4Z1_9CAUD Hypotheticla protein OS=Erwinia phage vB_EamP-S2 OX=2070198 PE=4 SV=1 +--------------FTPYDSDTgteGTDTVAYGHKLTPDERKNGYIMIDNNPVPYKaGESQLTEQQAQRLLQQDMKSHVPSTP---GWKTDFDGLPGNIRRALIDTSFNMGKGFLNKNPTANAWFKQGDYQAGFIQLL------------------------------ +>tr|C4IXG9|C4IXG9_CLOBO Uncharacterized protein OS=Clostridium botulinum PE=4 SV=1 +-HYTNFRNKTPQYSSNTSVRTAGIILIGALVFGACIAlapA----------------AAEIGTAIV-T-------------------------FASR-YNLAS---------------------------------------------------------------- +>SRR3712207_7420324 +----------------------GIRD---------------------------------------IGVTGVQTC----AL--PIYSKGVNLNQHQFDALCSFAYNCGVTALLYESILYKRICSGVRDSsLKSNFE--AYKKVgntvyqgLLNRSEERRVGkecrSRWSPY------ +>SRR3712207_8950822 +-YTTLFRSKYGAWYGLDGNSWCA-------MFVSWCAN----------------EAGILNRTVPK-----YASCANRSEE------HTSELQSR-QYLVCRL--------LLEKKK----LSATHRAH----LP--AH---aVSARR------------------ +>SRR3712207_2955169 +-HYTNFRNKTPQYSSNTSVRTAGIILIGALVFGACIAladR----------------KSTRL--------------------------------------------------------------------------------------------------------- +>SRR5881628_1583064 +----ALLAPIVGYPAY-----srqqsggfvrtFSVQDRVLLVLIENGGVDLG-------IPEL---------A-DKII--DLVPGSSVLPS-----GVR--------------------------------------DRFVSFLR--DK-------------------------- +>SRR2546426_4208801 +---ADIVVRWEGNAYGEIGTVmierdlanssewsrsSANIVMTWVHQIPTDGIDPR-------AW---------------------------------------PIVYSYSGTY----DPLGNGYFEF-EGEFQInafgeIKFNKHKVYSRSLS--DW-------------------------- +>ERR1039458_2055105 +-----------------QRDRreggp-----------------------eaaRCGHQNRR-----------------------KALCA-----GSGASAQPVSARRADGLLLEHRPPRplpanrkkkgkgnpepqegepkngygFHL-SGLAKDLEDGNYGNIPTQIL--SWDT------------------------ +>SRR5688572_20214540 +--GSQSLAGLESI---DYSAPNEFSSLTlsdittafLQDALMSGDDM---------------SAEGS---IMRVAGSDEE-QRrPGViDR------L----------------------------------------------------------------------------- +>SRR5438067_1000894 +--NVQRLAGFGMFAGASSLAAGGLAVA-dvtatvGGRagAVTAADAI---------------DAGLV-------SRSGNMSRLlPEI---------EAQLAK--VNPSS---------------------------------------------------------------- +>GraSoiStandDraft_12_1057312.scaffolds.fasta_scaffold826984_1 # 1 # 549 # 1 # ID=826984_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.552 +--TVQQLAA------VLYNEVGSLSGPGledaliaiAHVvenv-------Y---------------TAGQPGAVAPNVLSAQETIAIqNGVpS-------ANVAYGNAVvaaSSVlggyvpdptNGSLQFNLRYSDSLE---ARRNPITGNFITpLTPVLQ------------------------------ +>JRYF01.1.fsa_nt_gb|JRYF01096425.1|_1 # 1 # 585 # 1 # ID=96425_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.602 +--NVAALAR------VLMSEASV----GsdteqiavGSTvinrMLRNSTTF---------------VEDAWGAYAHNQ-----------EpT-------DAI-----RtraSNLlsge------------------------------------------------------------------ +>APFre7841882724_1041349.scaffolds.fasta_scaffold557419_1 # 2 # 385 # 1 # ID=557419_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.612 +--TDGIIVHHTGGRGLQSAIsTLKARGLGYHYMVDQDGSVTEFV------------PGDQKAW-HAGK---TDKQPGLTN---SNSVSISlvaKDDSDVSTAQLKSGFDLGKSLMSKFGASM-VYGHGETSSHKQA-----------TEGKTLAEALRSGKI------ +>SoiMetStandDraft_5_1073268.scaffolds.fasta_scaffold1510625_1 # 1 # 342 # -1 # ID=1510625_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.705 +--------------YGLYYDSEGKLTSGIGHLVKQGEIEA--------------HYKLKEKAAKAMFDKDFEKHKKALERVSKNiGVDLNRLNSSQEDSLISMMFNTNLGKRkedgsLSWKGMWGALKKatsPDtspeekekyLSLAGYEMLNSKRFTQVKSRAVEEADVFF---------- +>LakMenEpi07Sep12_1017427.scaffolds.fasta_scaffold02312_3 # 762 # 899 # -1 # ID=2312_3;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.428 +-LERAVIMAHEGFRENAYSdplRGDDVPTIGFGFTGGVQP-----------------DDTMDKETALARLDQEIEKHREILfaelalNTaslrkGSsdqdyinrfgdgwqydfisseadgeeyaplpsgatepyveldqyshvtiygfenreellaAGQERYQSLDANTQAAVMSITYNYGSAGgPDFVKEINKAVDTGDVLALADYIDNTeyGLAANdggaLYGRRNDEANLIRTGRS------ +>DeetaT_4_FD_contig_123_2014_length_245_multi_5_in_1_out_1_1 # 1 # 84 # 1 # ID=9442_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.655 +-------------------dDAKGIKTIGFGHRLSDdeikngsitidgktyniaGP-----------------NNGIPDAAAYKLFEQDFETHSEIFyaelafGEsaqtwYKreglpdkrenyvnlspnkdlkwnwntawslgddpaegtpinyeggdgpvrldvksngdttvygfnnreeviaAGRQIFMSMPETVQASTLSLVFNAGSSGPETIRKVLAAAESGDYEPLAQHWAGTtgTKLAGeeaiLETRRKQEGQYMVDPEG------ +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold446377_1 # 1 # 330 # -1 # ID=446377_1;partial=10;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.715 +-RMRQNLIDNEGFVNKANRiGNKGEIDIGVGHAMLNPNNGKtkakqtivsrrIFAKLFGNTVNfdavLNGKQTLTNPQIEKLLKHDINEHVNRAKRLFRDTSHFDSLPPYLQDAIFDGVYRGDVGT-RATPKTLRYMNANNWAAAAKEYLNHQGYKKAkKRRLGGITLRMEAN-------- +>ABMV01.1.fsa_nt_gi|175921436|gb|ABMV01127803.1|_1 # 3 # 107 # 1 # ID=127803_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.362 +---TGYLKEVEGNvmvggKHVPYKDSGGNWTIGYGKLIGKSQLGEgsigKVKKygSTIEIW--KGKnaLSWDEKQAEESLNKEALSSLKYAEIYAkeKGF-NWESIPERQKHGLADFMYNLGPTKMNT--------------------------------------------------- +>DeetaT_10_FD_contig_21_2312189_length_216_multi_3_in_0_out_0_1 # 3 # 215 # -1 # ID=113074_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.540 +---------------KAYYDKiGKKWTIGYGHTDGVEP-----------------EDTATINQADAFLEEDGKRLIHETDNLLkeKYQISIFDLPESGQDLVGDMGFNIGPDKFTtVWSNMLNALKNKDWVTVSKEYT------------------------------ +>ERR1700722_14295849 +--------------PHMYLDIKGLVTFAVGNPVDPVELAQAqpFRfknipviatpespanpgriaLAWQRlkddpDLatrGyggceTITQLELGDDSFDSLILDMLTRNEGFRKR-QQWSQGYDS-------------------------------------------------------------------------- +>SRR5215218_5374928 +--GILSIAGREALVLVAYQDGkfddgTPRYSIGFGDNSAK------------------AGDKITPAEAWKRLVHNIRERERIVNQ---H--LKQPVTQHQYDAIMSAYYQGGTRNLL---PLVALVNSGQADQIPEALPH-LDTNLAgehlkglRMRREMEAKIAKDGDY------ +>SRR5829696_5076356 +--GILFIAGREALVLVAYEDGenpdgTPRYSDGFGNNDAK------------------KGDRTTPKEAWKRLVENVRKREKIVNA---M--LLKPVTQYQYDAIISAYYQGGTRNLL---PLAAVVNAGQANQIPNVLPA-LDTNRKgehkpglKIRREAEAKIAKDGDY------ +>tr|R6H6H5|R6H6H5_9FIRM Lysozyme OS=Firmicutes bacterium CAG:137 GN=BN490_00388 PE=3 SV=1 +-RIIDFIKEREGFSATPYWDH-SQYTIGYGTYCGSSRDEV----------PASYWDGITRDEAEVLLRESIASNyEASVIR---YESgLGRRFTQGQFDALVSFTFNLGSGWMYDDCRLTRWL-ENPSTDlqLVWAMG--VWCRAGavnthlCKRRIQESNIFLYGDY------ +>SRR6266702_4502823 +-----------------AAGF-VTCSPVSGSNSAAA------------------SEGIDQAEGERRIRLEIANADALVKN---VYP---NLPKGIHQALLDLTYNAGPGWEH--ASLGAAVKAEKWDTVKADIL--QYNHAG---------------------- +>ERR1700688_2531516 +-----QTASTEQFSAIPYADGTdsnSnqVYSIAYGHQIQPGESF---------------PDPVTPEQGRQLMLTDSANVVDVINN------SGLQLSQSQFDGIWDFGYSAGIVPLQ---KVLATFQNNGLNAAASEMMAYVYWHPVP--------------------- +>SRR5262245_40700765 +---IRNIQDEEGCILHPYHgsaDPPGVMTIYTGHVIRPGEIF-----------------NNTQAEADAVLIKDLGWVQAWLWR---QCPWttnGTTGRQHNFDALCSLVFNAGHVF----GDLLAEVNTDNRPDEVRRI------------------------------- +>SRR5260221_9441026 +--------------------LPGVMTIYCGHVIHPDDQI-----------------EPPytQAKSDYWTNHDGKWAQAWLVR---QCPWvtdGSVGRQHQFDALVSLVLNAGHVF----FDLLQEVNGQNRPEEIQKL------------------------------- +>SoiMethySBSTD1v2_1073268.scaffolds.fasta_scaffold6720493_2 # 310 # 375 # 1 # ID=6720493_2;partial=01;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.606 +--IGK----------TSTGDQllfdvvaefdklAEAFKVGTGVSLSGGGYRSyenQVKVY------DNPEKWRLR--------PDLKEKCTNP-------------------------------------------------------------------------------------- +>RhiMetdeSRZDD1v2_1073273.scaffolds.fasta_scaffold2151246_1 # 1 # 672 # 1 # ID=2151246_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.632 +----RIIKTEEGFEPDPYPDGNGY-AVGYGFNFGKENVLPldstkGFKStnertkgmRFDEKY--FYRGDEGKSRALTKLRKIIKERYDRLSA----NSVFANLagegSAVRKAVMLSLAYQVGTDGVLnktdGYVKMLEAISKGDFDAAGYEIINSDTYEQSPERQEKASKMMKSGILL----- +>YNPNPStandDraft_1061719.scaffolds.fasta_scaffold640317_1 # 3 # 170 # -1 # ID=640317_1;partial=10;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp;gc_cont=0.613 +--AAALIELFEGIEVEAYLDPLGVPTLCTGMTKYSNGERV------------RMGDVCYESICTEYTKEQIERDvLPEV----SKIPGWDNLGSNRQSALISFAWNMGFNFFevEGFEDMQEALKEGVdHPEAYEDI------------------------------- +>Laugresu1bdmlbdd_1035124.scaffolds.fasta_scaffold116666_1 # 1 # 561 # -1 # ID=116666_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.624 +------------------------------MTKYPNGEPV------------RMGDVCRTSICYEYTIDQINRElFPLT----QRIPGWDSLGSKRQASLLSFAWNTGFNFDrsTDFEDIEKILEAGHkDPSRYEEL------------------------------- +>UPI00013D0631 status=active +---MYKHNSWEGIHYLAEKEGTRYPELvaaqwqlESGGGEHAsgkhnyfglkgngtlketeeetEQGRI------------SElaefMDFNTIANSVKYLITR-------------WYKDWDKYEGVDRASTIEEAA-------------EQLQtQGYAtDSEYAAKL------------------------------- +>tr|A0A1Y1IDB3|A0A1Y1IDB3_KLENI Uncharacterized protein OS=Klebsormidium nitens GN=KFL_003050080 PE=4 SV=1 +----SWVEKHEGRHRCVYENEQGDKAIGVNYNLEDDRSARksElktVLADYEKVL--KGEQCLNDVQISALLLADTKRKLDEVAE---TVHKLDDLCCDIQAVLADLDWSMGQKGLDDSDSFLKETENGNWEDAATALRATIWCVSHKKRCDADVAKIEKGCS------ +>ERR1017187_6404165 +----EYIAKWEGYISLAAFDV-NAWRLGYGSDTEGPDQAR-----------VVQGMSTTKERALQNLALRIPQFEKTAVRA-VGADAWEKLADNQRTALLSLVYNYGR--LP--NSVARSILTGDPANVAASIRTLQGANGGGNPKRRLggAPFYPTR-------- +>APDOM4702015191_1054821.scaffolds.fasta_scaffold86010_1 # 3 # 1235 # -1 # ID=86010_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.731 +---------------------GGAPTFGYGHKIKKGEDFS--------------QPKFTDAQVEALLMKDLKDADNIVKKK-LGIDAYNKLLKEypaGVQMFIDLAFNIGPAFavknhpdYLNYPKFTQGVLTMDMNLMRAEYH------------------------------ +>tr|A0A2X0V8I6|A0A2X0V8I6_9GAMM Uncharacterized protein OS=Anaerobiospirillum thomasii OX=179995 GN=NCTC13093_01488 PE=4 SV=1 +---ARFQTYFYKVSRTAFTDpADNVLKIGLNHTNQDIEPFT-------------QDSTWDDKKIFKVWRADLYYECFRLG---KWLQGYKLEQRHydaLIDLLTDDVFKlaDFRP-----HSLIELIKYGSFDAAADQFL--RWVV------------------------ +>RhiMethySRZTD1v2_1073278.scaffolds.fasta_scaffold1875888_1 # 2 # 694 # 1 # ID=1875888_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.657 +----------------------------------------------------------------EIMAHQKNVNPP-----------------------IHAAGRYQIIGDTLAAL-----IRGNYGPTGVNINDkftpdvqdklgialikyrlkTGATPENFineWRGLKFVD------------- +>SRR5271157_294275 +------------------------------------------------------------------------------------------VYPAGGYD-----G----------AGGVY--HPAGARGYRCG-------------------------------- +>SRR5579885_190605 +---RRRQSMHCKMRTWLGSTRRSPTCCRASLNFPGPL---rRRCSTWrsnwalagwcrnfrtSRWRQRHATG--RPAPISATGResrsggirrpqtcsggrrrCRMRRLKIALT---C--ALLAALTGVAVQAILLL----HAAT--VATRALPGAVTVELQAARTA-------------------------------- +>SRR5579883_2689376 +---------------------------------------------------------------------XMKTIQGVLS---C--ILLAAMSALVGYTILLVRT----------ATAVAAAIPAQIESTRTD-------------------------------- +>SRR5690554_2452655 +--SLQICKEY-GLSsersIaIAFDRTvnqgLGGAKRLYRKLVDKGNKKTvDEElEISI-KIrDNWSEGHFINTRLTKIIESK-----------------NLSDEQYEFX------------------------------------------------------------------- +>SRR4051812_14868910 +----------------------------------------iasgkaatvkEIEAEWLslkNnpNrlvLirsGasasaRVTDLELSPASRQRLFDCVT--NAHEVQLT-TYFAGFPNWPGDAQLAVMAMAWGLGMYFPPKFPKFTAACQAQDFDAAAKECNISSWR------------------------- +>SRR4028119_1530252 +---LSDL---yEYEIiDKQLTKVLKKRLIGYGFDMNRPQACAEWTEAFk-GElsFDAaiKKEIRLNQDQAHSLVVIRVSQCQKELAE--IYKPYWKRFKPNEQHAIVD--------------------------------------------------------------- +>SRR2546427_5851912 +------------------------------------V----------------VTLYFDL--CVFF---FFFFQAEDGI---RDLTVTGVQTCALPICFIDMAYNVPSALSPD-TTVMRLANAGQLNAACEQMP--RWVYGT---------------------- +>tr|A0A096DP34|A0A096DP34_COMTE Uncharacterized protein OS=Comamonas testosteroni GN=P368_21395 PE=4 SV=1 +------------------------------------V----------------QKNQYIqavVADPDMPDGMRIAMSAVNTR---SHLPPMVLYLPsvtrpl-------------KPASI-S-PSHRRK-------------------------------------------- +>APCry1669188970_1035186.scaffolds.fasta_scaffold326692_2 # 255 # 452 # 1 # ID=326692_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.606 +---KNKIS--------EFKneDGTDRAAISGNDEVPDPK----------------IGSTETLILLERLDDQGVDTTDPELR---KSIMDYIKG---ADN----TYYDEL---L--NPPEVEIIPEEKVQP----LMGKDEVX------------------------ +>SRR3954468_16411972 +-----------------------------GMhTWLDRPV------------TKYYPVYVTQGMADLEFGRQLAKYQEIVEK-----TVCAALNQSAYDSLVSVAYNLGHIN----PDIVMRIHAQQKPT------------------------------------ +>GraSoiStandDraft_45_1057281.scaffolds.fasta_scaffold1606294_1 # 3 # 407 # 1 # ID=1606294_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.662 +--GKAFIRQHERLTLQPYYDTNG-YAVGYGMhTWQGAPV------------TRRYPSFVTESDVEEEFDAQLNIFAITVLN-----AVCAPLTQPMFDALVSVAWNVGRVN----TSIVHKVDLGRPVV------------------------------------ +>SRR5690606_7048084 +--VMDFVGKWESGSKRVlvvYadKLAGGLPTVCNGLTKHVTTTPI------------VVGERWTDAKCAAEERRAMvEKVQTPLLQC-----LPRDVPQSVFDALSSHGWNFGVNRTCG-SVAAQYARAGDYARACDRIAHdvdgrPVWSTASGKF------------------- +>SRR5690625_5642767 +----------------------PPTAHCArPLPDAPPI------------------SEYTEAECAAMLATGVGQFYDGLTKC-----IHKPLTQGQAVAVTSWAYNVGLSAACN-STLVRKLNAGAPADEDRK---stRLNSS------------------------ +>ETNmetMinimDraft_4_1059912.scaffolds.fasta_scaffold15148_2 # 336 # 1148 # -1 # ID=15148_2;partial=00;start_type=ATG;rbs_motif=AATAA;rbs_spacer=9bp;gc_cont=0.419 +--LYQFLGEWEGESQ--ytvYadKLANGIPTVCRGITHWVTDTPI------------IVGEVWSREKCEAEEKAAIvAV-QSNLILCF----NSRYPPQSVFDMATSHAWNVGVRNTCA-SQAMKAWQAGDWELGCRRLQVsdggkLVWVYANGKF------------------- +>RhiMetStandDraft_8_1073273.scaffolds.fasta_scaffold1148705_1 # 1 # 216 # 1 # ID=1148705_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.722 +DQLFKYLALGWEkFIPKAKWDYAQ-YSIGYGEGYNWDKNRK-----------VIASDVIDEPTARRWFIKSAEKYFDYVNN-----SVKVPVTENQMVAMTSLTYNIGPGSatsgtgFRG-SQLLKDLNAGKpLTEVAKGFD--RYTTAK---------------------- +>SRR5262249_25163728 +---VSYLLTPQGQQNVVSIGQaVGDYFTGSVSPLSPGDI------------------GAAREEA---VAQAV----GGTIS-----RMKVX-------------------------------------------------------------------------- +>tr|R5PSA4|R5PSA4_9BACT Lysozyme OS=Prevotella sp. CAG:1092 OX=1262919 GN=BN465_02038 PE=3 SV=1 +--GKKFLKENEGLSRQGYWDTNGI-TLGWGHKIKSDDPKWL-----R---EKNVGDWISKKDADKLFEKDMETFINPALRRicVELNDNNVHVNQHMIDAMASLIYNCGEHGFKS-TDFYALLKQGKVKQASRILIHTKVA------------------------- +>tr|N9Z065|N9Z065_CLOBU Uncharacterized protein OS=Clostridium butyricum 60E.3 OX=997898 GN=HMPREF1084_01754 PE=4 SV=1 +-KYIELLKDLEGFTPTW-DNSSKYGAIGYGTDASGNVGKR---------LKAEGVTVCNKQQATEWLKEEVNYWAKQVRN--KCADMGVTLPQQYFDVMTDVCYQWGNQQWALLDLMA--------qtgnkAEVKSYIL----GLGYPRRDKARVNI-LDGRYE----- +>tr|A0A1S8RGG8|A0A1S8RGG8_CLOBE Gamma-D-glutamyl-L-lysine endopeptidase OS=Clostridium beijerinckii OX=1520 GN=ykfC PE=4 SV=1 +-SQKEFAAAVAPYAKtlfnkyhifpgviiscmiqeSWTGSGFTKLATKY---------Y------------NFGGVKCSASSENA-----IQDY----K----PPSSEGNMLYRKFDSLKDFIIYWCELISGSSYNYKSAIA----dkNTPKEQIF----GFDntpYAGDKTKGSQMWniySSDGFS----- +>tr|A0A059DRI6|A0A059DRI6_9RHOB Uncharacterized protein OS=Hyphomonas sp. CY54-11-8 GN=HY17_04740 PE=4 SV=1 +-NLASFLLVREGFRKHAYDDAqpnvtltedteiKGTLTIGVGTTHYPDGQPV------------RWNDSVSKQEALDYLQHYIQTVIEPALE--NL--IHVPLEPHQYDALGSLLYQYGEPEVSGW-RLINRINRGdDWRNIILEWV------------------------------ +>tr|A0A1N6U162|A0A1N6U162_9FLAO Predicted Peptidoglycan domain-containing protein OS=Chryseobacterium sp. RU33C OX=1907398 GN=SAMN05880573_107106 PE=4 SV=1 +--YVNhaadkggptnkgiTLATWKQYAKE--------------------DLDIDnP--------TLEKLKKITNEQATIIYRKRywePKKFCEINDE-----RVSLMIYDWTITsggaakqvqkllvnefgKDIEIDGGIGSQTIKAinnvenQDKLLNRlaeirkqyYTNLTYTDGKKNDQ----------------DVFLEGWHN----- +>ABLZ01.1.fsa_nt_gi|174572400|gb|ABLZ01220378.1|_1 # 1 # 102 # -1 # ID=220378_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.471 +-RARSLIIEKEGFREMPYWDE-NAYRAGYGSDTYtTESGEV---------KRVVEGQKVSRADADRDIDRRIKTEfMPGARNA-IGPEIFDKLPIDAQAALTSITYNYGAGA--MMPGGgaasvaqAAKSSNGNLESIAKAVEGLKDHNEGinAGRRQHEADLIRNSRK------ +>SRR6478609_1461791 +----RFNKPLEGEVLWPYQDILGLVTVGIGCLIENTTGDraalalsvpwvggptpRTIREEFakvdayPkalhfNRYRDACSLRLTEDGVSDLLQNRAEQFERVLK---AGFPGWDDWPADAQLGVLGIAWACGPGFWHTFTNFQRAANARNWLTAMRCAGiRSAGNPGVVPRNAQVALCF----------- +>SRR6266536_1710727 +---------------YKIKGEKgSNVTIGYGHLARSaedKAKY-------------LEGTKITDEEANTLFTKDYNDRK---------VTL-PGLTEEQNNAVTDARFKFSDTRAKI--------------------------------------------------- +>SRR5690606_7963153 +--GIDMIKGFEGSKDQGW--dV-RQFSGPYGVKRG-A------------------NERLTLEEAERRMKGEISMIAGRLAK------SIkKPLTQNQHDALVSFFYNLGTGkgRLNK---IAGMVNSGNAAAVPGYMR--QFTHA----------------------- +>SRR5688572_5262200 +------------PKDQGW--dY-AQYSGPYGVKRG-K------------------DEKLTLEQAEVRMKEEVAKVERDMAT------KItGPMTQNQHNALVSFFYNLGTGkgRLDK---VADMINNGQADKVPAYMS--QFNR------------------------ +>RhiMetdeSRZDD1v2_1073273.scaffolds.fasta_scaffold5518858_1 # 2 # 235 # 1 # ID=5518858_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.521 +--GVEFICKYESYQPYCYHDN-TQQSIGYGSKCGDGSLHT-------------EGAhYISKSDALQTMISKVNSeYAPHVRW----ATQGLTMTQNQFDALCSMCYNTW----QVeKSPLVRYLKGEISEATArSETAEFRINKGTkyenglRNRRKAEADLFFSGSSP----- +>APTNR8051073442_1049403.scaffolds.fasta_scaffold111866_1 # 1 # 180 # -1 # ID=111866_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.406 +--LRDKIFKNEGFREDMYLDQGGTPTIGYGYTKYSLTgknGIPNWREYWKA-DGTPTGKKMTREEADEIAQLVTQEYIDQVNK---DI-KNENLTEEQYNSIVDLYYRNGRGNVKR-SGVVELINEGKIDEAADLIEKggnnGRLAKVggkvvqegdegytgITNRNKRAADGLRSGSGA----- +>ERR1700722_969827 +-----IAAaqmSHLKWgvpASVSLA--QFILESNWGRSMPGGPSSN-----------NQFGIKARAGEPS--V---AAQTHEVEHG---A---TI-TIVARFRRFSSLTeaFDAHGALLAR-AKPYKlaMMHKDDPDRFADALT--GHYATD---------------------- +>tr|A0A0Q9ZNA1|A0A0Q9ZNA1_9GAMM Uncharacterized protein OS=Psychrobacter sp. P11F6 GN=AK822_04835 PE=4 SV=1 +-TILKNLKAKNSTFKADYNRrisggynldsvPivpNGAKLVSFNKLLKLKAGTSdnmfyksfdgfagnetNFHKTIdilegDRYlphsyYYVYAHgsnrsilwvnTRDLDPVQRALNItksknqyVTISGKTESqKR---DAINKFYEFISlngyTDSQTLVLLSCNVGNGLtktlgeyiaahpnigtvyaptsltwyssaDPSVSSYARWIHPDTG--------M----KTLpEKKLMGHFRIFN---------- +>SRR5579863_2057521 +-DLEDPVR-GEGFSPSPYADdkggitvgaglhpqnKaefRTYALVYKGTTLLASDHDKdlawdnikaFfnahKQPGSvkPVNlpagaYENVTNLEMDRTESRERAAal---LSTLDRqLR---ARFSDFRSYPREAQRAVLDMAYNLGMYGlATKFPNLDAAVRRGDWLTASRESKR----EGIaSARNEHVRNLLL---------- +>GraSoiStandDraft_1057264.scaffolds.fasta_scaffold483181_1 # 167 # 628 # 1 # ID=483181_1;partial=01;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.671 +-KVFEAVVEEEGFYDHMYLDSavpKSLVTVGYGKMLNSPEEAAkypfqigdtpasetQirqaWNTVYnsvqgRNNytasyFASMTTIRMTEEDARAITIAHIQGNLNSAR---EAFAEFDECPVPA--------------------------------------------------------------------- +>SRR3954466_8818294 +-KLIAYMLRREGIVLVPYRDNKG-WSVLAGHYLGGENQPHP---------PPTGLQELTISKAISLMVKDVNERAKLVNK-----YIKVPIKQEQFDTLVTLFYQGGTDWLD---AVCNIINQRDVinhdsvltsnREAAKELL--NWDTDNsGkhlegllMRRGREVAMFTAAEYG----- +>Go1ome_4_1110791.scaffolds.fasta_scaffold71265_1 # 2 # 463 # 1 # ID=71265_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.444 +DDMIPFLIEQESLHDKSGKDVegEGKLTYGYGHLDSDGSLGKN-------------IAKLNDTEykqwSDSTLKSDLNEAYRTGRQSfsnnfsgatlnkdgyTTDYSVYDSLPDDAKAIITDFQFNLGN--IKSYPKLMNALKDGDWETAEDEYVRKLNGIPLGKRNDDT--------------- +>SRR5690625_7648074 +------------------------------------------------------------------NEKVMLAIQAQLAMC-----ISHPITQSTFEALTSHAWNLGWPKTCN-TQSVCLINQSKIKEGCNLLSNK---------------------------- +>GraSoiStandDraft_2_1057267.scaffolds.fasta_scaffold4696803_1 # 2 # 244 # -1 # ID=4696803_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.551 +-TLLNFLKRneNDGYKEQGIHIVDGEEHIGYGHRIQSAEEKKIFEKII-----NENGGIFPEAMASQLLVDDIRNHKGDARKVYNNfvksksqdkynninlkTRNFDSVSPNVRDMLTALAFNIGGKenklfgedqsGLAEFDDFMIAAANGDYPTMSKE-------------------------------- +>OM-RGC.v1.032023033 TARA_070_MES_0.22-3_scaffold88075_1_gene82853 "" "" +-TLITFLKKneNDGYKKQGIHLDhKGNKTVGFGHKIKNKEEEVYFEKII-----KENGGIFPEELANRLLIQDITLAKNGARKVYNNyiksimtdkynsknvkVKTALGNKSHKDHKKAK----------SLYQRFMDKFKKKKDKP--KK-------------------------------- +>DeetaT_15_FD_contig_21_1511974_length_278_multi_3_in_0_out_0_1 # 2 # 277 # 1 # ID=114883_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.529 +-KLLNFLIKQEGFTSDMNY-DrNGKYTQGYGIQLLPEEVKAH----Q-----SDRGNEIDEQTAKKQLGKEILRREDIARNVYNYytkgkdRKSFDKLPENIQMMLIEMSFNMGSNtkqgrGLAQFTKFIDGASLEDYSIMKN--------------------------------- +>tr|A0A2D7VI23|A0A2D7VI23_ACISP Uncharacterized protein OS=Acinetobacter sp. OX=472 GN=CL490_18190 PE=4 SV=1 +-----------GNRFMPYYDPRGeYLMIGYGHKLDGDLS--------------NYVQGITIEEAESLLISDLKEAYNKASK---NYYNFEDLKPKAKEALVDMYFSLGDD-VNKLNDFNRELSNGNIVSAAKYLQ------------------------------ +>SRR6185503_4717253 +--LETHLKDVEGYSPEVYADQKGIPTVGTGINLRSPSSAKAMDdlGIVQKNA-QEAQELLGEDDLERIKTNVIGQKKDLLNTIKSqSF-PQKELKENQEAALLSLAYNS--PQLIG-PNLRQRLNENDDLGAMREII------------------------------ +>SRR6478736_631865 +--FDPFIKKVEGEAQDSYMDTKGNPTVGTGLNLNDETVQGLMNlrGIDPEQV-KTGERKLASEELDDIHNQYVDKREALVRDkMGKd---LYDMLKPHEKAAVMSMGYQS--LNNLG-PVLTGRIATDDKIGAMREMI------------------------------ +>SRR6218665_3611180 +-----------------------------GFNLQDADIQGIMAdkDISHEDV-AAGTRDLSDPEIDAIQDSYLDKREPLVRNqIGGd---MYDLLPAHEKAAIMSMGYQS--LNNLG-PTLKGYLANGDKIGALREVM------------------------------ +>GraSoiStandDraft_40_1057318.scaffolds.fasta_scaffold606115_1 # 1 # 699 # -1 # ID=606115_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.627 +----EFTKDKEAFRPNFYDDG-DFDAIGYGFNLGNPLTAKT----LPQEV-IDRERPLTRAEADRAFPVFYADAEKKMRNF-AGSKAFEQLNDNTQNALIDLAYTMAPDRLATFEKLKQEIKKGDTEGIVREIKDSHWYRTHKIRGNRLFVKIR---------- +>SRR5215472_6897462 +-------VDAPAIEVPSSALSAsARPLIGAGFSLDLPEREhvQrdplnphlflepssaDlWLaAGLDpaklqrileefyarrDEWSRrtfrrqiyKLDPQITDDEADALLRIGIVQSIDNAK---AYCRNFDRLSASQQMAMTQLVYQMGV-NLEEFSSFLTLLNQDNGNgadtaalaNADAeywrnvqlSLVHSQWARLYRARATAVIAMLDP--------- +>SRR5215213_9471725 +----NLIMSFEGFVGHVYDDRslaqiitrddcvlqggvyvvtstGGTATIGCGETSPEVID-------------LHWQTPMSQDEASTILSGRVRGFTDAVASH-----CHRPLSPRKHAAFTSFAYNVGMGSFAG-STAVARYEAGDDRGAVEALQ--LWNKVDG--------------------- +>tr|A0A239A0L2|A0A239A0L2_9PROT Phage Mu protein F like protein OS=Azospirillum sp. RU38E OX=1907313 GN=SAMN05880556_101377 PE=4 SV=1 +-EAVEMLVRHEDAYEYPYGDSLGFITVGVGANVNQWDQFKslpwrmetedgRFAtltevtdgyeklrKFIDdaraeaekngekkinylaKKYKKITKLRLSSADYKKMLKDSVQQFEKDLR---KKFSGFSCFPTPAKIVLMDMIYNIGTTKFNaeKWKNLFAAIAIRDWGAAAQQS------------------------------- +>Dee2metaT_28_FD_contig_51_125094_length_427_multi_4_in_0_out_0_1 # 3 # 425 # -1 # ID=1137928_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.574 +----ASIYSDEGIRTKAYLDTNDNPSVGVGFNLKDPVVKTSLmqmigedkyKLLMKeAKYddESKKTVELTEPQMDAVFEAVIAEKESHVST---WYK-DIDLTEEQRATIVNLAYQGGGTFVGPTTEFYKAVKKGDWDSAIYEIKHRSNAKDiqgIQNRMDRHSDVLSNN-------- +>SRR6516225_5896998 +-----------HPNLGAYLDSKGIPTIGIGLNLNALDSSTksqlaaDVRSFYStqaannvpgytnidnltdtqvvnmlkTQaqqagAGGQGQDALTKADAQDLFNTVYAKHEQAAAAA-IGQSVWDSLASNVQWVLTDIDFNT-GSV-ANFKGLLADLQAGDFVRAGFDFMDSKRSNDVQyARTLAGFEFLLQGH------- +>SRR5438046_2104667 +-------------------------------------------------FPQRVPKAITAVDAQSLFEPVYASSIQAAITA-IGQAAWNTMPTGVQWVLADIQYNT-GAV-ATFSTMFTYLRNGDFVGAAFDFADSLRTQQVQyDRTLVGLQ------------- +>LFFM01.1.fsa_nt_gi|998555788|gb|LFFM01000561.1|_32 # 25425 # 25646 # -1 # ID=561_32;partial=00;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.680 +--------------LGTYSDA-NQPSTGWGHRIVESDSLGvsRYgPTYIPRKYGNyreqqdkndKWFRDITLEEAETFFVDDYRDKSEKARN--MLLPgKFDLLPINAQAALVDMVYNMGENEVGnRFGRMLTALNEGNVDLAAENIMYvnadrpnvrhykvlSDYYSKGGDRPRYIMDLMRGN-------- +>tr|N8SAG2|N8SAG2_ACIGI Uncharacterized protein OS=Acinetobacter guillouiae CIP 63.46 GN=F981_00706 PE=4 SV=1 +--LRQAMIKAEGAIPREYPSPEgGNNTIGIGHKIKDSEIKSgRFK------AGGEYPQPLSMEQMLKLKDEDVGLNGGNTLN----QIVNVPLHQYEMDAIIDLCFNGGKGALSGtastlydengnkqpagtnKVALVDLLNSGKYSEVPKYLRnhfntsNRMWSRGVQNRRNMDARMFVNA-------- +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold11384081_1 # 2 # 217 # 1 # ID=11384081_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.667 +--TANMIATQEGFSNTPYKDG-KDRSVGYGFYLPALEADEkALIKD---------VNNVTKEEGAAVLRLKVQKIGNYLD---QEIQGFRNIPEKAQSAIISMGYQLGVTNIPkTWKKFTAAIKEaAQYTEgsveqaqalakAKFEMLynvaedgtisLNKWATQTKKRAFEMAEAVGED-------- +>JI102314DRNA_FD_contig_81_646107_length_302_multi_2_in_0_out_0_1 # 3 # 302 # -1 # ID=1079606_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.313 +--AATTIMKDEGFEYTPYDDN-GKQSVGHGLQIESLEPDEkALIKD---------VNNVQPEESTAVVALKVQKTSDYFT---DVVDGFKNLPESAQSGMIQMGYQLGRFNVTkQWPKFMESIKEaAQYAEgsieqatalanAKFNMLynvaedgkvtATKWATQTKDRAMRVADEVGAD-------- +>SRR5262245_11631570 +---LAKIKVGESFVPYPYVALEgektkkGGCTIGYGHVITLNDGRVCeHdpndpqeNDPKKKALkrcicTPPWHMDKDSQEAEELLKRDIAVHAKWIKE-----HVLVDLDQGQFDALVDLSLHVGSLP----QSLLDVLHAKictDDEAVRKEYLQtSLYIKDNPAR------------------- +>SRR6266404_5678039 +----------------------------------------gplatmqEIQSGWLavkhGgmdKAGggkqsGLSDLRLDDAGIQQVIGQKLTNNESILR---KRITNWDVLPADVQLACLLMAWAMGAN--FAYPKFFSLISSASP-------------------------------------- +>SRR3954465_1799160 +-----------------------------------PAALAalplmhpggvpataaEKIAAYHavkdDphaaAAGgtyaaKLTTLRLTREAMTALALAKYDSNNRELV---ALLADFEDFNACGQMALHSWAWAVGAP--SPYPKMMAALRERDFTTAAVEIFIEEWTKnrkgeriK----------------------- +>SRR2546422_10568260 +---VNLYLQQAGVGsttV----EQAGTQTIT--QNEFA--------------------LYLQVQRELVLRDRLrtrllhrsraHTGLlQVEVH----RTDWHASRT------RSGAGRVARSEER----------------------------------------------------- +>HubBroStandDraft_6_1064221.scaffolds.fasta_scaffold8991201_1 # 2 # 220 # 1 # ID=8991201_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.703 +--TASFTMHHEGFKTRSY-TLDGNRVIGVGHNLSAPASPKIIREVFGGKISHgdlvSGNRVLTEKEVEKLFAHDLQSKLDDAH---RMFGEekFNGFPRDLQRILIDGIFM-GA--HKSGHTTIKAIRNGDWAKAADNINR----------------------------- +>ETNmetMinimDraft_31_1059906.scaffolds.fasta_scaffold223768_1 # 3 # 206 # 1 # ID=223768_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.676 +----NFLWGLTANALAE---SgltsnaAGDPESVIGKRSFTPVkKFCSFG-YWQLNL--CSKDgeganllkSFGQTLTRDLYDTDTE--------------------EMYFDFIV----NENQQ-FSWVSKRMKELFPDDWNNNSTSAE------------------------------ +>GraSoiStandDraft_28_1057319.scaffolds.fasta_scaffold870398_1 # 1 # 438 # -1 # ID=870398_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=3-4bp;gc_cont=0.680 +-----AIR---------AKVGgpsnanaimaaypavaaqQGLAQKivqvasnlqiqdpgwLANLINFETGgTFDAAmqntngsD-Cwgl--IQF--CRDSgaknvgqsweagskppdeLIGRgaVHQMDYVEKYLAQFKGK----------LNSQVDLYAAVFYPASLSWGPD-FNIYEH----IYNRT------DKG------------------------------ +>SRR6516225_2363442 +-QLKNRVKKYEGEVLHPYLDKVGIITIGCGFNMEKHAVdhpgdPYTYNSmqradpsgdevwqlaDIPENFqevlasvhaPVNQRRTISPQSAEKLLEISLDQAIHNAG---AYFRHFNAMTDRQQEAAVYLVYNMGTH-LENHPKFLEVSNGqegPDWRKMGDLLL------------------------------ +>ERR1035437_130284 +-------------KWFPHKSVEGgMPTIAYGHKIKNDIELS------------HYSSGTTDEEANRILIEDLEIAKQKVYSYIkSKYKVNIKLSLKQEEMLSEFAFNLGG--LEKFPKFVNSILRNDLEVTNNEYKRHTGGKELTGRNTAFYNRY----------- +>ERR1035437_6188634 +-------------------------------LFRSDIELS------------HYSSGITDGEANRILIEDLGIAKQKAYSYIkSKYKVNIKLSLKQEEMLTEFSFNLGG--LEKFPKFVNSILRNDLEVTNNEYKRHTGGKELTGRNTAFYNKY----------- +>AntRauMinimDraft_3_1070383.scaffolds.fasta_scaffold00335_18 # 12995 # 13162 # 1 # ID=335_18;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.500 +-------------------------------------------------------------------ADASSMPNQIA--KVrERFKTLVMRQST-----------QGQ--KENLPISGKDLLDMGVPegkaigRIKNVIKKAVL-knpSLSKEEAMKIAGRV----------- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold5076654_1 # 1 # 183 # -1 # ID=5076654_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.645 +-------------RWMPHKSVEGgTRTLAYGHKFPSAAAQKQW--------YKDNPTGITNDEAEQLLTNDINEHIVRAERVVdSKHGKgtWDKLPFESQLMLTDYEFNVG---LTKFPTFTKAVISNDWDTAKKEYKRYTGKKELTRRNNAFFNLF----------- +>GraSoiStandDraft_5_1057265.scaffolds.fasta_scaffold5428306_1 # 1 # 219 # 1 # ID=5428306_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.639 +--------GKEGNIYKAYADSgkDGYLTIGIGHLVDPRKKGSvprtarMlrqINPLLDVQAIIRKEQGLTKSEVFQLFEMDVQEKIDTTR---RVFPHLHTYPEYVQIALIDATY-WG--MLGKSPTTRKLIKLGRINEAKEEWLDNKTYRRG---------------------- +>tr|D8PEP5|D8PEP5_9BACT Lysozyme OS=Nitrospira defluvii OX=330214 GN=NIDE1980 PE=3 SV=1 +---VKRIAEHEGTIDGLYNDPSKYCTYGVGHLVRKSECFMlagansdeQLKKsiqkqwpgksyettYVPRtivtsenfakikeaatrnaqEYfaqrqhKKSFvnlasadqerikthaeaaikeETDLLPFVATDQFKKDLQSYETTVNS--G--VTGVALTQGMFDALVSFVYNVGKGAFNS-SQLLKKINENIFMSgddmkkreeaikeIEEEFL--KWNKSGGsvlkgltTRRQDEADRFLS--------- +>tr|A0A1H0SAN6|A0A1H0SAN6_9RHIZ Uncharacterized protein OS=Phyllobacterium sp. OV277 GN=SAMN05443582_104418 PE=4 SV=1 +---EDFTASREALRRNEYDSdgaATGNVTIGIGKLVHEKAFVMdqtkvdqtvQYLRDngqpvtVDnivSAVGpatglkgkdltnameeMKYVNGITDQDAIDLFKNKTYPEHrKNVEK-----VIHVPLYKNEYDALSDVAYNRGPGIIDKtgkkkngqewnpGGNYAEWLNRGRYHYTGDDRIRNLARTAVPNRRDDEADVFLNGNYAPQK-- +>tr|A0A146H0Q5|A0A146H0Q5_9AGAR Glycoside hydrolase family protein OS=Mycena chlorophos OX=658473 GN=MCHLO_00029 PE=4 SV=1 +----AGLKPDEGYRSKTYLDVANIPTYCWGHADRSSK----------------VGTYHSPAECEAIFSKDVNAKLEAVKKC---TPILANM-PNQLAASTRLTFNIGEGNYCH-STIAKNFNAGKFKEASAKK-RSAWKD------------------------ +>tr|A0A2D6MGH9|A0A2D6MGH9_9ARCH Uncharacterized protein OS=Candidatus Pacearchaeota archaeon OX=2026773 GN=CMI47_18280 PE=4 SV=1 +--VSSFIQKWEGFLGSPKMlPNEEYYTIGIGHRLDGSKRSRsAFSKALPhKNYDQfyRGQGSVTKKEAQTLFQADLPNYVDRARNL--TGDNFDSYSSNLQKNIISATYR-GSWGY--SPKTRRLLAEGKFEEAADEFLNSNEYRDAiklnrrgiRKRMEAVAEAIRQEG------- +>SRR5580698_6380186 +-------HRHEGWVLAAYPDPNtGRPLIGAGFSLDVQATphVQrdplnphPfvepssaelWQaaglapdrlqQIlaqFDHnvaAWSPktyrrkiihhMLTPQLNEEGNARELgRLDGFMETYGEHWKtvqgtlidSQWARRYSSR-ASSVIAMFDPEYSQGP-NAAEERIEATLRPPAppkprihstatlraasYSKRSSRAHGKKAHNPQTKRKLTX---------------- +>ERR1035441_3365658 +-------GHQ-----------QmNASPIEIGFWRRRTHRdmNSadleggeElnqpvlergRHaigvrveQEageFT-id----rgr------RRTFIGQHDGIAAIQAIYNAR---AYCRNFDQLSDSQQMALSQLVYQMGV-NLEEFSTFLGLMNDDnvdssqlplpaegpeYWRTVQLSLVQSQWARLYRTRAISVIAMLD---------- +>GraSoiStandDraft_59_1057299.scaffolds.fasta_scaffold4550551_1 # 3 # 212 # 1 # ID=4550551_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.576 +----------------------DAEFIGYSHRCTQAGECA------------KLPDPITEDIASNLLKEDIKPLERCVTAL-----VNAELNDDQYSALISFVFSSGCSAFQK-TDILALINQNKLAEAGKLFC--SYTK------------------------ +>JI9StandDraft_1071089.scaffolds.fasta_scaffold683045_2 # 243 # 557 # -1 # ID=683045_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.654 +------ATPAAPapsasGPCAEFADTLSFFSPGWDVARMQQI----------------MY-RESRCQPGARNASGA--SGLlQI----------MPLHVPNL--APCG-------------------------------------------------------------- +>tr|H6RA96|H6RA96_NOCCG Putative peptidoglycan binding protein OS=Nocardia cyriacigeorgica (strain GUH-2) GN=NOCYR_2935 PE=4 SV=1 +---IPFNTPLEGRVHYMYCDWKGWVSTGVGNLIDATKNAMaapsaeereaslalanqyrwttpagdlagpdLVANDWDAvkaqlglaaqghrAYKQFAKLELTDEEIDRMVFVKLDQMETYLKS-RDEFKGFEEWPADAQLALLSMSWGMGPA-F-KFPRFQNYVANADWTGAASECKFQPDQGTIKIRNLLNAQSFRNA-------- +>AntAceMinimDraft_10_1070366.scaffolds.fasta_scaffold545967_1 # 3 # 224 # -1 # ID=545967_1;partial=10;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.559 +--PrfDTELSGFKALLNdeafmreleelkkefPTIQNN-ELFNVifkesSFNPTAKSA------------------ANAAGLLQMMPKVLGEMGLTTEEVlsmdpadqLIVYK--GYLKRWGYDGSSSLGVLQAAPAYRNAD---PST-VIYRKGSK-QANMNP--GWQDANGN-------------------- +>LauGreDrversion4_2_1035121.scaffolds.fasta_scaffold3625676_1 # 2 # 322 # 1 # ID=3625676_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.374 +--TdlVGLIKTAEGLRTESYWDF-KQYSVGYGSKGK-K------------------GEVIDEAEAEKRLAKDISKFRAIVVKAKE--THGYDWNSDQIDALTSFTHNLGATNFN---KLIDGGKRGDEE-IIEFLP--QYNKARVngklT-------------------- +>SRR3546814_15418900 +---------------------TLLPFTTSFRSGLEVL----------------LGMVGTDERCETVAEVVRLHAAAGGRGC-----SARPMSNVVLNAYADFAYNVGVAKFCG-STLVRRYNAGYDDEACAQLS--RWNKAGG--------------------- +>SRR5580692_1785990 +---ADFTAGWEGFSPSPYYDVNGY-AIGYGNHYYEDGSAVG-----------ADDDPITQDRALQILTFYITQNANALIP-----QITAPINNNQLAALVDLRYNCGTI----TTTLLDLINSGaDAATVAAQIL--KTCTTSGgvpdpdltPRIQARAALYQSGA------- +>ERR1700685_981974 +---APFTASQESFQALPYYDVNGY-AIGYGNHYYSDGTAVD-----------AGDPAISQSDAPYLMVFYLTQGGNTILS-----QITVPLANNQLAALMDAKYNEGSL----GSDLVNLINGGaDPATIAAALL--QTDVAT-NRAQAESNLYLNTS------- +>SRR5438067_1805756 +------------------------------------------------------GTPIDEPTAQKLYQLGYDEAVTTAR---GIFNAFDNIPQAQRAALLSLVYNLGDQVSEVFPKLVEYVNKGQFALAGWELVAAIRTDQVG--------------------- +>ERR1039458_7193647 +------------------------------------------------------RSNVTTDEAAGLMANDLIPVEKTINN---NFN--WNLTQSHYDSLCDFGYNSGSGHLQT---AINFLLNNDVTGFTNYVE------------------------------ +>tr|A0A2I5ARC6|A0A2I5ARC6_9CAUD Lysozyme OS=Synechococcus phage S-LBS1 OX=2008320 GN=SLBS1_A30 PE=4 SV=1 +--LAPALRRSSrRSRVASWRPTQtldlevcqnYHWL---GHNERAWCSSA------------RKGDTISQALADELLRAEILRIAAELH---EIIPAAAKWGGNQQAAFISWAYNVGLGAVK-DSTLRRRINAGesAQVVIPQELP--KWDKAN---------------------- +>MesohylFT_1024984.scaffolds.fasta_scaffold111564_1 # 2 # 532 # 1 # ID=111564_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.650 +---SQIIREREGFEPTAYNK-DGTWTVGLGTTRWSSGKPV------------KQGDTITREQAHKEHEHHIHNVViPKME---KTVPHWDKMNDHQKAAVISFSYNVGENFMKnkNYEKIQDALSHpDNWNKVPGTMA--LYNK------------------------ +>GraSoiStandDraft_34_1057297.scaffolds.fasta_scaffold2907887_1 # 1 # 276 # 1 # ID=2907887_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.529 +---LALIRKYEGFSARPTQLPDGNWVVGHGHVRIGE-----------------AGGPVSKAEAGHLLALDLAPIERLVNA-----QVXCAMDAWRKSEVSGepeivaalvarravekALFLKGL-------------------------------------------------------- +>ERR1711977_84512 +--FENLLKKLENDKKTGYDKKiqtwkshgsleGGTATIGYGHKFKKSDKD----------------RIYSDKEINDLLRKDISDARKDAKDVLeKKNIDWNNASEVSKNVATEIVFNTGKGTFEKYKKFHNEFENQNLDG------------------------------------ +>ERR1711977_144587 +--------------------------------------------------------------SVEMFSYSSSFSGGVSKWTLeKKNIDWNNASEVSKNVATEIVFNTGKGTFEKYKKFHNEFKNQNLDG------------------------------------ +>KNS7Surf_BmetaT_FD_contig_71_1404037_length_500_multi_2_in_0_out_0_1 # 3 # 500 # -1 # ID=341043_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.365 +------YKAFEDGKYYPYKDRKGKITIGFGRTNSAVKGLD---------IENDYKDGISVEQANDFLKQDIKSYMNNLNENyDTKYgsGEFNKLTDTEKYMLLDFEYNIGD-AVGIYKNFTDAIRTGDYERAAKEYKR----------------------------- +>GraSoiStandDraft_41_1057321.scaffolds.fasta_scaffold12874413_1 # 2 # 211 # 1 # ID=12874413_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.714 +----WIIKKAENYRADSYRCQAGKKTIGWGFTNVSK--------------------VKNIHHADEIFRDIIEPLYEEVNR------SYPKLTYLQKAVIVSLYYNSGSLTKIKRSDFAKALVKNDIKKAVKNFK--QWNKV----------------------- +>GraSoiStandDraft_56_1057294.scaffolds.fasta_scaffold3068817_1 # 1 # 246 # 1 # ID=3068817_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.707 +-----YIGGPEGFSGTVYEDPEGVKTIGYGFTQDAEGLQEAL-NYINETYGTNytiaglfdGSQTLTRAHAGIILALISTRYYNMVVEGMNE-AGFhtqtRGL-EMIFQIIFSLVYQMGAGFFDKFPNMIQALVEGRFMDAWKQLIykrgtkgsgTTDWYNQTGPNSPRF--------------- +>ETNvirenome_6_85_1030632.scaffolds.fasta_scaffold190730_1 # 2 # 307 # -1 # ID=190730_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.363 +---------------------------------------------------------------GIILALISVRYYSITKEGMEA-LGFnletRGI-NMIFQIMFSLVYQMGGGFFDKFPSMIQALLEGRFMDAWKQLIykrgtkesgTTKWFDETGPNSPRF--------------- +>LauGreSuBDMM15SN_2_FD.fasta_scaffold3498885_1 # 3 # 215 # 1 # ID=3498885_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.441 +-------------RYAAYKDYgEDLWRIGYGSKKLGKRWL-------------KANDIATEEEIDTQLVEDLKIFSDLVSQ-----YVFVPLNRNRKAAILSFSYSIGISSLKT-CRLLELINS----------------------------------------- +>tr|A0A2R3Z0K0|A0A2R3Z0K0_9FLAO Lysozyme OS=Gramella sp. SH35 OX=2126553 GN=C7S20_00050 PE=3 SV=1 +-LLVDFVAEYEQKhdgsRemigLQPKPDAVGNWTEGFGHAMIDEYGD--FRTVRDYpTLesILPFSQVHTDEEAWALLKWDLRNKAAGANM-----RLRVELPQNKFDAILSHSFNCGYS-----QKLYHLVNTK----APDRLIK-EWFTKhyvtaNgiklqglVLRRHDEWEIFSQGEYK----- +>SRR3954454_694712 +-------------HLVPYEDAnptknadgtttkHGHCTIGWGHLIHHHPCDGQAsetggAADISG-HKFNYADGLTKAQAEALFQQDIAERVAVMRG---L-INF-DLNQNQYDALFDLFFNHGYGCKSdscwDPNDLIEAVNCGDLPHVLRLML------------------------------ +>APCry1669192587_1035420.scaffolds.fasta_scaffold84603_1 # 86 # 298 # -1 # ID=84603_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.469 +--FIEHLKKNEGYAdlvlsrkhsktkssHGAYYDPKGYLTTGYGSLISKAAKGSeqEkkDIAAFKEKHG-IDPFTLTKKQAHSFLTTKILPkYELGLNKF-LTENNLNNLSPTQKEGLLDMSFNMGGAkgkvtkgksGLRGFVSMFKALQEGNIENIEKEAKNSKWFKKDVGRSRSL--------------- +>ERR1017187_39494 +--LVDILrdRRHEGWVLAAYPDPRtGQPLIGGGFSLDLPEREhtqtdplnphqflepssAdLWRAaGFDpgrlddvlEvfyerkrHWSKrtwrrqlySLPAQISDEDAIQLVRVGGIQAIYNAK---AYCRNFDQLTGPQQMAMAQLVYQMGV-NLQHFTEFLALINGESGSAhgeagsvsepqvaamdltassldvdaalvfggqapeywqvVQKSLMGSQWAHKYRTRAVAVIAML----------- +>WetSurMetagenome_2_1015567.scaffolds.fasta_scaffold206733_2 # 773 # 1048 # -1 # ID=206733_2;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.496 +--LVDILrdRRHEGWVLAAYPDPRtAQPLIGAGFSLDLPEREhpqidannphpflepssAdLWQAaGFDparlddilKvfyerrhTWSKrtwrkklfSLPAQISDDDAIQLVRVGAIQAIYNAK---AYCRNFDQLNGPQQMAMAQLVYQMGV-NLEHFNAFLTTINPGAAPRgqkivsadagaevqtaadspmfaqamqdatatpapavmgqmdqspeywlgVQQSLMGSQWAHKYRTRAISVIAML----------- +>GraSoiStandDraft_45_1057281.scaffolds.fasta_scaffold4560761_1 # 3 # 158 # -1 # ID=4560761_1;partial=10;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.705 +------------------------------------------------------TKVYSPEEVRKLFVQKVNESKRDVAS-----TYGTDLPPDINKALVGMRFNLGAPRLGGFDAMNKAIKSKDFANAALNVIQtgkelTPYAEQVGDRANYYAALML---------- +>SRR4051812_8452615 +-----DVQQHESggkLHPVAHLDSVGIPTICDGIIRWPGGRPV------------KLGDTATAEQCQALMINEVLPRARALVAC---APQLY-GRGNQIRALIDLSYNAGFSGICN-GAIGRGVRAGDWTAASVAIL--PWDRGTFP-------------------- +>GraSoiStandDraft_58_1057296.scaffolds.fasta_scaffold69807_2 # 1356 # 2300 # 1 # ID=69807_2;partial=01;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.499 +--------------------PKHWPTIGFGHVVQRGEPYR-------------KGIQLTERQADALLRKDLRKFVSL----------YKHMGERNAILLGTLAYNIGPGAVNR-STVYKKLKRGDTNIYkeyTAHChYKGKFHKQLHQRR------------------ +>GraSoiStandDraft_32_1057276.scaffolds.fasta_scaffold935996_1 # 1 # 60 # -1 # ID=935996_1;partial=10;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp;gc_cont=0.667 +--------------------PKHWPTICYGHVVQRGEHFT-------------RR-QYSESEADALLRRDYAKFCEL----------YKEYGRDK-YILAALAYNIGPGAVNK-SSVLKKLKRGDRNIFkayTSHChYKGKWHSGLHKRR------------------ +>ERR1700684_3083456 +----------------------------------------------DnlrtApKgqkqggpaangGQNfggYTTMRLTPAAINTICSKQLTAHEAIVR---GYYAGWDKFPADAQCCIMSMCWAMGAGALKAFTQFNVKLNASDFGGAVAFA------------------------------- +>UPI000523532E status=active +---------------------------------------------------------KTKKQIDNQFEKDISIAMNAAKRN-IGSKNWKKLSASQQWGLTNMAYQLGQGNQAQFKSMFKAIKEGDFKKAAANAQynfkkdspneiynETSWYKKTPTRVLDFQKFM----------- +>GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold1852080_1 # 59 # 334 # 1 # ID=1852080_1;partial=01;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.649 +-VLRSVREKEGGYQDFKA-DPGNYdPTTnnLVGTNYGISARKLaEW---YGRPVTRNEMINLDESVANKIYKEEYYD-KFNIEKLPDnlqnivmnatvlnegggvetlqkllRQKGAKNISGQNKGKLLDIDGRMGPNT-------KAAMKNAKFtKEQFKNMYllylrnnnkkktktgFTGWQKFGKGWTNRFEDLSK---------- +>tr|A0A1W1XZS6|A0A1W1XZS6_9NEIS Lysozyme OS=Andreprevotia lacus DSM 23236 GN=SAMN02745857_03794 PE=3 SV=1 +---------MASSRLPPyETRKgNGDWTVGYGHKIKKGEPYY--------------PYGNVRSITeqqaNSLFDADVKrEGEDKVNSW-----VNVSLTQLQFDALVSMAFNAP-GLCKSL--VIPLVNQGKHIEAAATIKtaqSSsapfSVYPGLKKRREKEAAIYLQGSYE----- +>LauGreDrversion4_1035100.scaffolds.fasta_scaffold110102_1 # 49 # 582 # 1 # ID=110102_1;partial=00;start_type=ATG;rbs_motif=ATA;rbs_spacer=3bp;gc_cont=0.502 +--FRKVLNLVTDQRLItQRLpacPvTRWRYQNGQfptqassmataTSQYVEQDIPI------------TLFTSPYVEQADQMLREDLDTRLKEIK---KAYPNFDTYPQDLQLQLTQSYYR-GTLTPKASPKTRRLINKGKFQEAAKEFLNNEEYKN----------------------- +>UPI000427448D status=active +---FDLSIKE-------------VdvkfwpeaDNDFY---------------------------------------HQMHVYLEELAP-----AEEPKGV---QQRLANLGYLDS--KIDG-SIgpltesNIKALQYvaql-DLSGE----------------------------------- +>GraSoiStandDraft_59_1057299.scaffolds.fasta_scaffold2407934_1 # 3 # 302 # -1 # ID=2407934_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.710 +----DLLKPS-------------Deleklakr---------------------------------------------QVSAYR-QGNH-----SDEVKLL---QEALIKLGFDLGKAGADG-DFggktktAIEQFQKsyqpshqthpsysigpvdgivgkGTLLAL----------------------------------- +>AACY02.4.fsa_nt_gi|132942579|gb|AACY020777393.1|_1 # 2 # 589 # -1 # ID=192804_1;partial=10;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.672 +---KHVIAAGNG----ISAESQGQtitmtpsgqqatvgvAAKGFGTsatpelrLLESAPWY---------------QKSLKSQ---------FASLTSAEN------LDDKELAANVFAYLTsiylktaelAKKFGIYINEWDP-MS--EQITPNANGLT----------------------------------- +>ERR1700733_3971163 +--FISAIKQTEGYDARPRWDV-HQWTVGYGTRASGP------------------DERITPAQAEARFNTEIAKAARIVDG------VNPNLDAGTRAALTSLTFNTGDAWTH--SGLGDKIRSGDLNGARDSFL--QYNKVDgetndavAARRAREASWFGRGDIS----- +>SRR5437016_11027577 +--------------------------------------------------------NLTSEQERKLLKIVVPSYENTVRT-----LVRVSLNQNQYRSEEHTSELQSLTN------LV-----CRL-------------------------------------- +>ERR1041385_2389849 +--MFDAIGPKEGYVSGLYDDKapgpnmakpargldwGGSHAIGYGTDLGKTYDPEnltsdqLFKRRLFNQIGYsnddfdkfmKQELFRRDGEGEKLFGLTITDPQDGYiKVAKGMFPNLNSFSLEQQTGVIDLMYNYGSGNLKKYgQSVISAIKGGDFKKAGDEVLSlpskgTTYGVANPTRANAIKSSLQS--------- +>SRR5689334_20794922 +--------------------------------IGRTFDPNhltpkqKFVERLLTQMGYkdpltpparsllqrfvDHDLFLNDGDEARLFELTFSDPQEGYlRLAHSMFPQLGTYSIDQQIGIVDLVYNYGSGHLHSDgQAVIQAIRAGDWTRAGDAVPHlpmpngr----------------------------- +>DeeseametMP0441B_FD_contig_31_955089_length_229_multi_3_in_0_out_0_1 # 1 # 228 # 1 # ID=201808_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.491 +--VSDFIISNEGFTSKPHSDV-KQVSIGHGTSVSDGTTQipKNWKQTLYKRYDVpeekqklDPKDGISRDTAKYIFDIKYNKNKRILDE----ISYIQYFPEHIQTAIFDLAFNMGPYFFEKFANFNEQLklaaeelkggmiTPENIDnantflaAAKTELTgnydktgkytgPTKYRKDLPNRSGKIKDILDAGVS------ +>SRR5665213_514408 +---EQFIKSWEQKRLKLYTDATGNATLGYGHLVEPDHEE-------------EFKNGITDSRANGLFYEDWMNKAHiPMT-----SALTVSLSQSEYDSWASLYFNIGEAGIAE-TRTLRYFNAGQKDQAAQEWL--GFSHgmvdgQSvtlsglLARRQAEVNILLRGTYV----- +>tr|A0A2V2GI43|A0A2V2GI43_9BACT Uncharacterized protein OS=Lentisphaerae bacterium OX=1932692 GN=DBX90_16130 PE=4 SV=1 +----KHLSYREGRKEKVYRDGKSWPTLGVGANLAEEHILATFktlkkfpkevIEDLQGlkKLSdserekkakdiekQFANITFSNQEIDKLFEASFQVAEKDVKEAFNKgvweerrlkngetVkvwngqkvdsKAWEKQPELVRAVCLDLSFNTGGPGLRKYKSFLRAVKAEDYRRAALELLNSKDYKDN---------------------- +>UPI00031AEAB0 status=active +-------------------------LLGYESKSLETNIPF------------TQDVVDKESPSVGVFQINVDTAAPQIFQ------------------------------------------------------------------------------------ +>SRR5262245_45365342 +----SVLKDrEGGFYAEPYKDS-GGLAIGYGMQKWKGKKV-------------TPDLRVTQEEAdAEFDRQVGEKYGKDILE-----RLTVPTTQHQRDALLSVAFNHPLTAF----KIIDKLNAGQQPTL----------------------------------- +>SRR5262245_39818229 +--PGDLIKPYEGCVLHAYDDAqdvalserfirkvggawfrldgttcRGVPTIGWGDTAAPRRG----------------IEHCTKAEADQWLFDFMRQKSLpAVQR---Y--ASDRDPAAQEAV---NCFGYNLGAAEK--------------------------------------------------- +>ETNmetMinimDraft_20_1059909.scaffolds.fasta_scaffold455397_1 # 1 # 96 # 1 # ID=455397_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.375 +--FRKEIKAIEGDPPKnksgnyiAFDDGFGNMTIGWGHIDNVKKGD-------------IITPSKAEEYLTNDIAIKVKTAKRKF-------PKYDSFDIQIKRAIVNALYR---GDLGPATSKLINTEPTNWNAVADEYLNSNEFRR----------------------- +>AntAceMinimDraft_15_1070371.scaffolds.fasta_scaffold16086_1 # 3 # 824 # 1 # ID=16086_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.448 +--FLKKMKQFENSVMSgwnskkklwypHKSVEGGAKTIAYGHKIQ--SGE-------------DFSKGITDKYATKLLMKDIGTAIYKIKKV--LGIDISSLPRYVQQALVNAMFR---GELKSTHRTVELMKQNKWQEAAKEYINHNEYIS----------------------- +>AntAceMinimDraft_11_1070367.scaffolds.fasta_scaffold01750_11 # 8029 # 8949 # -1 # ID=1750_11;partial=00;start_type=ATG;rbs_motif=TAA;rbs_spacer=9bp;gc_cont=0.417 +---------------------VGTLTIGWGHTGKEAK----------------IGNKISKLKAEQLLTKDIIEKENVAKN--TLFPKYSKYPLYIQRVLVNAVFR-GEAKT--SHEWVKAINSGNWGLAAKKYVE-GWN------------------------- +>tr|A0A2N2TNX2|A0A2N2TNX2_9PROT Uncharacterized protein OS=Betaproteobacteria bacterium HGW-Betaproteobacteria-16 OX=2013707 GN=CVU22_05645 PE=4 SV=1 +--LQLTIAETESLHARVQDVGDGKATIGWGYTFNRNDNHAIWQasginltdaqwneiRAMDaapaqdkTRLGLAFGRELSAAEANQLLVASADTYATHADAL------GMPD-SRERLALISVTYNRGVGAMRGh--ALLDAIGNGDRAEAWYQLRYNCWGSSTqyeaGLRKRRLVEA------------ +>APFre7841882654_1041346.scaffolds.fasta_scaffold41750_2 # 218 # 499 # -1 # ID=41750_2;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.479 +-----------GTPVHAYVDGVGVPTIGWGSTYYDDISAG--------TKKVKRGDKITKGKADSIMYSNVNKLAMKYK---NKIPHWNKMSATQRAGLISMGYNAPNFYGA-YPKITNALNSGDMKAVKANL---TWGGPSATRIREAQQMMTQGP------- +>UPI0000F254AB status=active +--AALLLRNYEGLRLNAYPDPYtnaEPYTIGIGATYYPPGFRLG----GGrpgRK--VEMGDRITEAEAFEIKRYHVKQFADGAESK-IGSETWNKLPAGVKVALISKAFNYGEVYDSAIPLIKSGANSGDFSELAGYFRNRLAKHNDginSWRRNDEASVILTNS------- +>tr|A0A1U9JSH0|A0A1U9JSH0_9RHIZ Lysozyme OS=Candidatus Tokpelaia hoelldoblerii OX=1902579 GN=BHV28_00850 PE=3 SV=1 +--ISKWMKCVEGYSSKGYADGaKGRVSAGYGRNMHKSQMP----------------SFVSRNLANKWLDEDINKAYVAVDEL-----VKVPMTSNERDALASFVYNGGPGMLQK-SSVLREFNRGNKQGAIEAWH--QYNvgtidgvKQkipaLSRRREAEINMFQNGEYI----- +>ERR1043166_3585063 +-----------------VANIFRSSSISFGNEM--AEKYP---------------SMPGAAnaARHQVWQCDITKAVNAQQA-----HMAGDAHEAQD--AGSK-------------------------------------------------------------- +>SRR6516225_3631744 +--TKTAAAGFEGYDPNAKWDY-RQYSVGFGTRGER-------------------GQHMSREEASGRLQDDLWKAKKAVLA------FAPNTPQGgPLESLTTFTMNLGTGWQH--GPLGRAVQAQNWSQAAQMMQ--DYTHAGGrvipglqSRRREEGNMMLTGQWSP---- +>OM-RGC.v1.032993972 TARA_070_SRF_0.45-0.8_C18311917_1_gene321364 COG0367 K01953 +--MYELIKGYEGTVdpttgkptTFIYKDSKGLETIGIGHLISGNPDPVLLQVIggSQQDYDAiiAGTVALTEQQQEDLFKIDIETTANKAR---KALPDFDTYPEVLRNHIVNGFYR-GDIKLG--HTTVQLMKAGEWEKAAIEYLNHDDYRAsvasnaagkphgVARRMEDNAAIIKKG-------- +>SRR5574337_2131727 +-----------------------------------------------hRDLHSfpTRRSSDLREQAMRLYQTSSAKRLSAVRDA-YGAADFDKLPSNQQAVLKDLSWQVG--GIKGYPKALAAFKRGDIDAASKEILV----------------------------- +>HubBroStandDraft_2_1064218.scaffolds.fasta_scaffold3037214_1 # 79 # 327 # -1 # ID=3037214_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.627 +--TFEHMKKMEAIRPHVYDDAtsknvssyaevKGNPTIALGKKIEAAEKST-FAAYL------LGKAKLEGAALKKVIDETFSPREKKLTS-----MIKVPVTQSMFDALFSFGFNTGFGAK-SFKTIVDKLNAGDYSGAHKAVG------------------------------ +>tr|A0A022GLD6|A0A022GLD6_9BURK Lysozyme OS=Cupriavidus sp. SK-4 GN=CF68_21630 PE=4 SV=1 +------WQASEGFTERPVIPTRgDVLTIGHGSTRYEDGRPVR-----------MSDPPITRERGAILARNLMVVDERRFVA---SLP-GVRLHQEGFDLYVDFVGQFGIGNWSG-SSMRKRLLAGDYAGACQQRCYGVWTRPSPRMVPSLIL------------- +>JI7StandDraft_1071085.scaffolds.fasta_scaffold280498_1 # 3 # 935 # -1 # ID=280498_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.633 +-NPIHLLa--------------irSDYCTIRYRVHTINGN----------------WLPWVTGCS--ISNSSKYAGVHGRCID-----K--IQI--SAQSSIYKISYRASAS--------------------GKNFL--PWVTEynnvnsD---------------------- +>SRR5882724_2916716 +-------------------------------LLYKHGSFLlaaakesaDFKAQil---KshgvkylpcsapfqdgFDDLTDAALTkaqvavgakaqssvdleveylSRTPADVLQEDLVSREQTVRRS----V-KVQLAQCEFDALISFQFNTGGLV--R-SGLLTSVNKAMYRA------------------------------------ +>ERR1035437_1232889 +-------------------------------------------------------NPNVhvtKEQAEKWRDADLGAARR-VVL----ANVHTKLNEGQLDALTDLAGNMGGPkfaGQ--TQTLLGDVNAGNMTKASQDFM--LYDKVMdmatkrlvPspvldARRAAEGDAFARASG------ +>SRR5574344_327429 +---INYLSDIEKIKREVYLDANNNPSIGIGHLLKNSEVQ-------------KYTKPLKDDEIYTILAQDLLDINEDLKVL-IGEDVYKDIPVCVKESVIDLAFNKEVGAVRDNTELLKGLKEKDYVKVVSNLT------------------------------ +>SRR5437763_9626014 +---VTLIKRLEGLGlpgkpGWAYEDSVHKCTIGYGHLLLPEGPCG-----------NRAKLHWTKAKADQVLRSDLNKMMEPYVR---KASLRLGFSQCEYDSLLSFAYNVAHGKGGesqSWQKLMKGLtPTGSWRQAV---------------------------------- +>SRR3954463_10448938 +------------------ATRNrgrPWPPPCSGHTGPEV----------------RVGRRTSDRDCAVLLEGDLSREAVGVYRC---GPPALAGRPQVWAPLTDLAHNIGIAATCR-SSAMWWFQAGRWREGCGRIA--RFDKA----------------------- +>SRR5437764_1314394 +-------------------DRLgrnQPITICFGQTGFvhtpdghlvRV----------------TLGLTFPIAHCQKILEESEVQWAVQIWRV---RPS-IAAYPHTWSAAIDFVHNFNIEVYRR-SSIDREFAAERWLAGCDRFL--LYNKGTV--------------------- +>Dee2metaT_32_FD_contig_31_4343777_length_201_multi_2_in_0_out_0_1 # 1 # 201 # -1 # ID=1767254_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.498 +---------VEGRRDEYYWDQaafngEGAITSGVGHLVPHTRGKEIYK-------QKLEGVPLKKKDVDKQYEKDLQEHGNAGRD---FFgrEAFDKLPENVQTVLTSLAFNVGVGKeaytdaagkyhratgIKAYKRMKEAVE------------------------------------------ +>OM-RGC.v1.011099623 TARA_025_SRF_0.22-1.6_C16986801_1_gene738675 NOG19905 "" +-----------DDKFHEYLSIEgGENTIGYGHKLTEGERKS-----------GKFSKGLSKPEALDVLGKDLFEAYKGAYnQYVNQWsglsseqkeKKWNELSNDAKVALTDLNFNIGNIKDYK-GIFKTAVKTGQIEEVRTAIGN----------------------------- +>SRR4051812_14925294 +--------RFEGRILWPYLDAKGIPTAAIGIALPRVTDAYqlpwfagsrpatldEIASDWmrvlHAqDlagiggakpqWAKLSQIRLYSDDVDDLTLRRFDTNDPKLA---ARFACWPRFPADAQLVAHSMAYAMGEHRFDDFPKFCQAMEAFEFVIAADE-------------------------------- +>SRR5690349_3408396 +--ATTILTVWEGTDLVAKKD-----MIGTGHPLTYCHGQT-------DEFGKvKEGQRFTPAQCKELLAKSLPKYLTPLQKC-----VTREIPVKTMAALLDAAYNAGPSAVCR-SPMVAKINAGDTHAGCLAFK--GWYV------------------------ +>YNPNPStandDraft_1061719.scaffolds.fasta_scaffold693605_1 # 1 # 240 # -1 # ID=693605_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.642 +---FNSIMSYEGFNPNskGYPDTNGI-AYGSGLNFKFLKpnQKDEIIK------ATKGGKKLPKKINDKIVKQRISELEKNFRKDMkGSNVTFGKLNPARKAALVNMGFNMGFGKTagkaeGGFTEMYKHMKNGEFEKAADNVLfnfdkngkkigKTEFYKRLPDRAENMAKQIRDGIF------ +>tr|A0A2X5SMG1|A0A2X5SMG1_ECOLX Phage lysozyme OS=Escherichia coli OX=562 GN=SAMEA3472064_04985 PE=4 SV=1 +--------GALSLGLNHYRDNAITYKAQRDKKVSEL--------------------ELANATITDMQ--QRQRNVAAVEK-----NIRIPLTGPQKAGIASFCpYNIGPGKCFP-STFYKRINAGDRKGACEAIR--WWIKDGGR-------------------- +>GraSoiStandDraft_15_1057317.scaffolds.fasta_scaffold1480375_1 # 3 # 131 # -1 # ID=1480375_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.752 +---MDFISNvEGGFKGTMHYV-NEVPHIGYGVNLEVHKDLIkEKLNFNDNDMAKlmKGEKAITQTQGRLITEFILQDMDKLVTD--KI--GDIPLNTDQRIALNSMAYNA--PTLIG-PNLVKHIQNGDMDSISNEILNRSnKSksKGIDNRRKMEHDMFFGS-------- +>KBSMisStandDraft_5_1062788.scaffolds.fasta_scaffold8022995_1 # 2 # 262 # 1 # ID=8022995_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.590 +-LLVPHLKEMEGWRARRYKDSKGLPTVGHGALIDGSFVGTmekVFPhqsKEWRQKV-AAGNMELTAAQGHELLTHQARQKHDQVRDI-IGHETFDSMHPNLRMHIASEHFR---GMIKKSPKALALIRKGDLKGASKEYLNADDYRENtensiGKRMKNLSDAL----------- +>DeeseametMP0441B_FD_contig_31_149099_length_909_multi_5_in_0_out_0_1 # 2 # 319 # 1 # ID=182189_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.456 +---AEFLKRAEGTIafqtkkgsydpktkeFLTYPDSEGVDTIGYGTTENVRK-----------------DMRVSEKVADMALDQRIDKEISTLIA--DKIPRKFLNNPKIKTALVSLQYNLGRSGW---PMAKEALQKGDID------------------------------------- +>AP03_1055505.scaffolds.fasta_scaffold105509_2 # 541 # 879 # -1 # ID=105509_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.283 +--FEEQQYSWEGDHGSVPMPTndareqnlpieERTKDIAYGHKITVKEM--------------EAglihGIPFINKETGEFIElttEDKRfikeqdilQNvnlaLSsgwdtklqergLSW---DTMPDKYKLPLEDLaynvggkkageswTKIFDDVQNDNVA---GfvknlrrqdaGQNTAGMDNRVAKAAAASGLI--TNYQQAldyglk---------LT----------- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold11087455_1 # 2 # 247 # 1 # ID=11087455_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.646 +--IRERIKREEDFRSEIYPDN-KSPAIAYGYNLTPEEIEQGYVVTPQG--NININNPINEEEASRLLDMRgMENLQAGVNLLEQRGIDYNALPSGIQKSIQDNVYRGGAGILSSSPNFIEALSKGDFKTAIDEIG------------------------------ +>SRR5215831_11165133 +-------------------------------EHIPAR------------------HRSASRKRRAAWMKKLARDRAEIER------LNPRLPEGAKKALTSLLYDLGGDvnKLKE-HGMANAIAGGDVE------------------------------------- +>GraSoiStandDraft_1057264.scaffolds.fasta_scaffold1844948_1 # 1 # 159 # -1 # ID=1844948_1;partial=10;start_type=ATG;rbs_motif=AGGAG/GGAGG;rbs_spacer=11-12bp;gc_cont=0.629 +---------------------AGDFQFKTLTDSNDPTSTKGS------AFM-TGLQRVENF----------YNAYDQTVP--YWVEEarqhndpnKLSYSEQKELFLVNLQQQKGT--DDLL----DKMLEGDIEASKQLYG------------------------------ +>GraSoiStandDraft_5_1057265.scaffolds.fasta_scaffold2871032_1 # 2 # 283 # 1 # ID=2871032_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.500 +----T-IAFAEGTS---DADGYNKWFGGRTDMDlskmtINQVVV-------------EQKRRINSGEAtYNGytsgavgRYQMMkPE-MAAVAA--GLDPAVAKFTPENQDKMVMAQYIKgqagitdAQIE----GGITPQMIDQL-APVFASFP-NLFGAdnkgRV----GTNTSYYG---------- +>LakMenE18May11ns_1017448.scaffolds.fasta_scaffold1474558_1 # 3 # 167 # -1 # ID=1474558_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.267 +----PTA-------------TNTKWAPVLN--------------------------LIASVESVGGSYDSIyPSttkPgLSSMTI--AEADTWQASTARSRGSAAAGRYQFMNIK----SQAAAAGI-GPNEQ------------------------------------ +>SRR5690606_39334538 +---------------------------------HPLDP--------------------------------------RHAPG-----SPATPPTRRSSDLKRRASQSGEATTCA-RCRPRSAaspgtsaaKaaalriksssSDAIQSASPALV------------------------------ +>AP95_1055475.scaffolds.fasta_scaffold1242842_1 # 3 # 215 # -1 # ID=1242842_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.272 +-RFLKHLEDREGTSNYIYLDTLDKPTGGTGHLMLKDELNMyditEYKdFNIPnigtRRVAfDSLGNpiKLDKATNTKWLKEDSKKAIESAKK---QAEQFG-ISNGrFIEALGSVNFQLGANWINKFPSAVEALKQKNYDEAIKQIKtgsgedgKSQWFLQTPARVLDFEEAIN---------- +>ETNmetMinimDraft_9_1059917.scaffolds.fasta_scaffold79971_1 # 1 # 408 # -1 # ID=79971_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.338 +-ALLNYLIRQEGFEPSQYQDT-SEKAIGYGFNLTPEQREKGIVTLADG-RTININKDITEEEGRAMMAERANFNFNVgMNNLMEKGVDVNTLPQGVQFALQDLTYQTGGGIFDKAPKLVQALKDNDAEKISEELKT----------------------------- +>OM-RGC.v1.033393543 645991.Sgly_3072 COG2827 K07461 +---DIFIKNEEGSInpdvtgvgyPDPYRENasgvdiTDQFAIGYGSQTWYLDENnnekmPGIRvncASYTLqsgryvctqcnIVDLYNGNPITQAQATKNKEIHNRKkIYPAMAD-----NIKVPLSQNQFDAVVSYMYNVGTAVE-GSPSFYRKLNQGDYEGAADEMDVVTSGGQVleglVKRRAREQTLFRTGNYN----- +>WorMetDrversion2_3_1045171.scaffolds.fasta_scaffold176865_1 # 707 # 802 # -1 # ID=176865_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.292 +---YAFVIVREGEInpettalgyPDPYEEDgngvdiTSKFAIGYGSQTWYLDANnnemtTGIKvtcNSWSQkvvnnkkrwvcdqcnIVDLYG-GPITEAQAHTNKELHNQRvVYPAMAK-----NIKVPLTQYQFDAVVSYVYNIGTGTS-GSPTFYKKLNAGDYAGAAEQMDVVKSGGRVmkglIIRRAKEQVLFLTGKYD----- +>AntAceMinimDraft_15_1070371.scaffolds.fasta_scaffold488621_1 # 4 # 213 # -1 # ID=488621_1;partial=00;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.495 +-EYIDYLKRVEGYDNkvgdkfYPYDSPEgGLKTIGYGYKIKTLEEQNT-----------LEKTGLSSTEVDDLLLEEAEHSYQKAKKFcEQKNYNWEATELRLRFALADICFNVG--------------------------------------------------------- +>SRR5579871_1017464 +--IAAFVQSQEGLRYFPYSDN-GVPAIGFGSDLIDKTTGqvnTQFEAiintYLANNpqLGftfqdflDEKLVYIDKKTAIALFSSYFQTVVQYVE------TNYSGLTFNEAAALIDIGYNIGTAGLKKFTSMNADIAMGtpfGFACAGLELVNSIR-------------------------- +>SRR5579871_4136835 +--------------------------------LIDKTTGqvnTQFEAiintYLANNpqLGftfqdflDEKLVYIDKKTAIALFSSYFQTVVQYVE------TNYSGLTFNEAAALIDIGYNIGTAGLKKFTSMNADIAMGtplDSPALGWSLLTRYG-------------------------- +>SRR5690349_2775129 +------------------TGA-YVPAIGYGSDLEAKKTGtvnPVFKKiinaFLKQHseISftfndylNPDsGAYVDQATADALFNAGFQTAQSYVE------KTYSGLTLPQEAALIDIVYNVGTEGLAEFASMNADIKMCtpfGFACAGLELINSKR-------------------------- +>tr|A0A0G2ZHK9|A0A0G2ZHK9_9DELT Uncharacterized protein OS=Archangium gephyra OX=48 GN=AA314_01173 PE=4 SV=1 +--MWLDIIAWEGVVPYMYRDTVGHVTVGAGNMLSRLEKNGpedvmaakglPFQNmdtgkpateaEIAKAfeevqklpkamratdYALRPTIALTDQTIRALAKKRLDNeFLPHLV---NNFPGFHSYPRAARRGLLDIVYNVGVGKFPGrpnkpgkdflFDSLTKAARARNWAVAAENCH------------------------------ +>SRR5256885_2878773 +------WIAAEGFSad--PIIPVRGdVPTIGHGATRYEDGTDRK-----------S--TRLNS---SHLVISYAVFCLKKKTK---SNEGPHEPEPSYAASDGW-------RLLRA--------------------------------------------------- +>SRR5260221_271426 +------WTMTHTATALTIVYVLLqaAYLLWRWRNEREDRHRDT-----------VAPPPHRGARARELAINPLEPQKGACGR---DSLGDTRVHPAEFAQAVDFAGQYGCGAWRG-SSMLARTRAGDYAGACQSYL--SWRFMT---------------------- +>SRR2546427_13303121 +--------------------------------------XMD-----------MDRETHVRTGALEGAKAAPPVTV--VAAN--VANGWTMTHtaTALTIVYVLLQAAYLLWRWRN-EREDRRARQaed--AARRKASST--ATDTGA---------------------- +>SRR5580704_1343653 +--------------------------------------------------------GRGRTFFLNLLHEDIQRVAIgPMNT-----ALHASLNQNEVNATASATFNCGPGF-VE-GTVGADINAHRFAAAANAFM--LWDHPS---------------------- +>SaaInlV_150m_DNA_2_1039686.scaffolds.fasta_scaffold224041_1 # 2 # 220 # -1 # ID=224041_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.461 +----------------SYPDGRG-FSVGWGSYNKLSNGAS-----------VTATTSITKEQADKEIEIEMREVDKRIFP-----KIKVPLTETQYAALLDTAYNAGAGSLNytsnrngeTFPSLLTTVNTGGDtTKVfpKVAISDSGSGKVLPslIRRRNDASKLFSGGYDTLY-- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold4011826_1 # 112 # 327 # -1 # ID=4011826_1;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.648 +----------------PFPDGKT-YSIGWGTNNKLpSDGTP-----------VTANLVIDKARADKEYQVTIYDKTVPVlDK-----LITAPLNEVQYAALISFMYNAGPYALKyn---NLDDIINNGGDvIGIlkRTALKDARTGKVLNglINRRKDEAALYSGQQNALY-- +>tr|A0A2S6UF56|A0A2S6UF56_9PROT Uncharacterized protein OS=Alphaproteobacteria bacterium MarineAlpha4_Bin1 OX=2013074 GN=CFH09_00592 PE=4 SV=1 +--INDYITDGESPVSYPYLDTKGCLTIGKGFKIDSEDQFARldleVIkDGvpaaateaekrrafrqMQAKYealggrkknrpaGAfnrratayetVTDIRMSMASMNAILEREIATRTGKIRTE-VGDAAWNNLNRAQQTAIIDIDYNTGGGGLVGFPGLKRAIRNGDAQVMARESL------------------------------ +>ERR1700690_1502001 +----------------YRVDPQGNAEQEDGHRVELREHL---------------VGGSKDATICGSPG-------SCVSR----------------DALQDTQSVEAG---KA-YTIERTWTVD--GRKTQVVD----PNTGRpadrg--------------------- +>SRR5271169_3054691 +----------------E------------SEALHPGE-----------------------------GKTKVETAVEGV-------------PPGAYDTIRAVES--TGKAPQG-YIGGRTFAN----------------------------------------- +>ERR1700690_170060 +---------------------------------APGIGL---------------VLGLAV--LVESGGADETPAVAEAEQS----LestagevltRFGTKAESTLEKLSADA----ARAEQS-A------------KVDNLHGVSAMANSMKaggtalrSAVEKSFQVANTGRNP----- +>AP58_3_1055460.scaffolds.fasta_scaffold53652_3 # 836 # 1306 # -1 # ID=53652_3;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.382 +------VYNFEGFKSKPYPDA-GGISIGYGIQLFKDAGNkggKSWQevfygEKLGLDIkGkgknkyiirnsvktkLKAIKSITEPEAKLATDLDIPNRIELM------HkvyPWSKELPRDIQLALLDMSYNMGMWfNMSGFKGNlkaaSESISTGDFDIA----------------------------------- +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold1703228_1 # 1 # 600 # 1 # ID=1703228_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.710 +------------------------------------------QevffgEKLGLEIkGkgrnkfiirngirtkLNKIKSITEDEGKSATDKDIQERIKLM------NsvyPWAKELPRDVQLAFLDMTYNMGMWfKMTGFKGNlktaSSCITNNNFKMA----------------------------------- +>tr|A0A2W5N2F6|A0A2W5N2F6_9PROT Uncharacterized protein OS=Micavibrio aeruginosavorus OX=349221 GN=DI551_04520 PE=4 SV=1 +-EMINIIKNYEGDVPYMYRDAKGNVTAGLGLHLRREEDALRypFKAMisekefgrlatqmekrdaykkvQEQPYGQGYiSKRYNPFENKDLLAIGLDPAAsreetrrillqeEGLLK--SKMRDFDAIPHPARQALLDMQYNIGDKKFRreyqddnnktvkAWPKLFDALEAKDYKRASEEVRSSD--------------------------- +>LauGreDrversion4_2_1035121.scaffolds.fasta_scaffold761772_3 # 468 # 629 # 1 # ID=761772_3;partial=00;start_type=ATG;rbs_motif=AAAAA;rbs_spacer=12bp;gc_cont=0.327 +----LPVIIWEGYSPVPYRDTKGVVTVGVGQTGYWAD--------------------IPFPEVFD-------HFEQQAR---HLTTDFDLLPQSVQDAIVVACYR---GDWQMSPRTRRLFNNKMYLEAVEEWYDNEDYRVAcaqrsgvKARFEYVGDAIESM-------- +>SRR4051812_6340330 +-------------------------------------------------------LILTAGQERQLLGFAAIPVAKDIAG-----KVEAPLTQAQFDTLVALAFGSGKttL---L-LMILRMLSAGDLAAAVAAMN--GAAVAAdrqaaqaGNRRAADIQLA----------- +>ERR1700722_636008 +----PWSGPFEgPLLTYPYTDVDGWVTTGTGNMIDAGAPGQQKgvncgkgtgapcgqatptakaLAmpwtggSIATDWAaikaawpgvqstackGITSARLPQDYLVTMAMDQMKANERDVL---KLVPGFAEVPADGQLALHSMMWAMGSGALGTFKTLLGAVNSGDYMTAAAQ-------------------------------- +>tr|G1XFN9|G1XFN9_ARTOA Uncharacterized protein OS=Arthrobotrys oligospora (strain ATCC 24927 / CBS 115.81 / DSM 1491) GN=AOL_s00081g181 PE=4 SV=1 +--GINFIRGAEGFCTTFYNQGDNRKTIGWGHNCNITQSLCN-----------SLTLPIDTCIGENLFAADIQRLLHEAF---RPLPNAKLLTQGQLNALLDLVYQCGNSAIAKGTPIWKDLEAKRADLVCGHIT------------------------------ +>SRR5215207_6439154 +------------------------------------P----------------DDPKIELRTAIAWLRAGAAERETLLNR---W--LDVEVTPHQFYALFSLLYQSGTESTR---DVVAYFNAGKPMLGMCRFA--HYPF------------------------ +>SRR6185503_3515820 +---MAFIARWEQFVDHWYEDGIwrsgpkeGqaKFSIGFGHGEAGDyepkvyD----------------PTLKITYDQGISIFRLDLESKAKAVRN---R--LKVKVNSYQFNALVSLVFNYGQGSVD---KVNSVFPLLNQELYVAasvGFL--Kcn--------------------------- +>SRR6185295_16855885 +--------------------------------------------------------------------NDMETKMGFLRK---K--IKVPVTTYMFNALGLLVLNTGEGNFL---E-GPVLPLLNVERYVGasaAFM--HhskswqqvkdengaIVF------------------------ +>SRR5258708_5225071 +--VITALKSVEaaTRFEGEAAAALGESVIGFQRVVTSGGRVVG---------------EIDVETTKAIV--EVTSGSSPrKLQ-----QVTALINDKALNPAGKLVVVYAPN-IGK-QAaqqlvkagavIVRTLE------------------------------------------ +>JI7StandDraft_1071085.scaffolds.fasta_scaffold1353463_1 # 1 # 366 # 1 # ID=1353463_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.593 +--AIQLIMKHEGFVGDTgnskttlikKNDTKG-SVVGHGMNLKFMEKNE--RDFY-NRVTRNGKLPMKKDDAAELTRMRVSGIVSKFNKT-KSF-KWASMSRNRKDALINVVYQNGWSGFTkGFEETVKLIKAGEWTKASEEIVKSSWGRTYKTRAKEIQKLLKDG-------- +>SRR4051812_27311896 +------------------------------------------------------TRVVSKLEMQAMLQNTLPEYLAGLRM--RAL-QHPNLGSMRIAVLVHVAMVIGLPSLLGMHTLWIAVKANRWDEGARVLLKSHWPGSAStdverDRIVDIADIFRTGM------- +>SRR3569623_181819 +------------------------------------------------------RQHISPEIAEAQLRAQARDLLATIRA--RIW-QMPGLSVPRAAVLLHVGLLIGSEKLLGLTELWDAISKDDYETAQEVLLVNGWAQITGddp--------------------- +>SRR5579862_7068091 +-------------QELDTAIHRDRSPFGVVVRRHRPHVTAHD-----N----AEWGTITMDRGIQLLQQDAAIAENAIEHF-----IKRQLTQWQRDALASFAFNCGGGALEG-SVGAAVNGGGDPSA---ALE--QWDHSGhvvlqglLERRKQEAHLYLTGDYG----- +>EndMetStandDraft_8_1072994.scaffolds.fasta_scaffold471550_1 # 155 # 562 # 1 # ID=471550_1;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.647 +-DMIPYIKEVEGYTDEPKKEPLEkkLYTIYWGHQLTANQLED------------YKTGKL-ELDDEDILKKDLIEADMLAEKVYnkylkskkfkevasdlgldATGPNYKNLPDTSKKMLVDLAFNLGSRPKLKFGDLDAGLK------------------------------------------ +>ThiBiot_750_plan_1041556.scaffolds.fasta_scaffold10627_2 # 1976 # 2425 # -1 # ID=10627_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.798 +---------AEGFDKVPRKD---kgKWVIGHGHQIKKSS--K------------YYDDVM-RIlkeegqiseeFAQKLLFDDIMTAESDARVNYnkhtksktfdgl---------SFNQKIMIMAINYHLGRERASEYNKLSNAiknndwasiteeskvkerIN------------------------------------------ +>SRR5574344_664014 +-KIISMNSQYEGYNTCVVDDNLvdDTPTMGYGHVVYEGDKFY---------------NNISKEEAYAMLVTQMNGStfSSGVNNF--MSGNGIKFNQQQFDALVMLSYNLGTGVLYNSdvkSILlncvessgstsstvayvnaddglnlrsgagtdcsvyetmgygskvtvlertssswykvsasdgtvgycaceyltfassvVRNMNLVDKKALQNELI--QWHHAGNtcvwgllYRRIDELEVFCYGDYT----- +>tr|A0A0H2ZG43|A0A0H2ZG43_PSEAB Uncharacterized protein OS=Pseudomonas aeruginosa (strain UCBPP-PA14) OX=208963 GN=PA14_59630 PE=4 SV=1 +----DFIARNElaaGQRAKTVAIVQGHITIGYGFDTFVHEASElnslNlvgstrqkVLPAlqLStsdpGFWSvyALLGQSLTDDDGLLLFSAKARAVVQRIAS-NQFAGKWNGLPPAIKTVALDLYYQYGQT--GNFPKFQQAINSHDWPAVIHELR--NWNG------------------------ +>tagenome__1003787_1003787.scaffolds.fasta_scaffold11218791_1 # 1 # 312 # 1 # ID=11218791_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.692 +--YMPIIKEYESLGEKIIVNGvvkyknykgkgEEKITSGYGSYRDENKL----------------EDSVTIAEANAQLVEDINDRLPAIKK---NIKNFDKFPLDVRQHLVSSWFR---GSLSGSPLTLDLINAGEYEKAAKEFLKNEEYNNaisldrrgIIKRMKNTAKAIKS--------- +>tr|G9J3H9|G9J3H9_9CAUD Lysozyme-peptidase OS=Clostridium phage phi24R OX=1128071 GN=phi24R_gp12 PE=4 SV=1 +---LLFVKQEEGFAEYGaYFNGESFKTGGYGVTENYQTKYY---SQ-------LEPFPVSEEKASQVTYDLLNNEFgipvkNAMLK---ANINLSDIPIYQFDVWVSIAFNYGMGGLSElnaWKMFLANPKD---TENI--ATAIKNLKANPNRRQREGALFESGVYP----- +>tr|A0A1Q6U9G8|A0A1Q6U9G8_9PROT Uncharacterized protein OS=Azospirillum sp. 51_20 OX=1896972 GN=BHW58_08555 PE=4 SV=1 +-ALKALLIGEEGNVPHMYLDTVGDVTYCVGHRSASPEEAArcPWykevsgsverdrtnladrdrvfqthryVQTlpYGQNYGAgtfedKSDLRLPPDYCLDLLERDLAERRRNLQ---KTFPNYDKMSPYLQNSLLEVNFNIGNISPEKWPGLYKAAQEKDVESFCKNLH----RKTTDSKGKPIANMPKRNAW------ +>SRR5580700_5968080 +---------HEGYSLSIYPDSSGVPTVGIGLKLDSSNASFataalnaagvSYSdlvQDWssiKSLWvsqhhpladlkdtsslwaqfvSqnpSVADPVLSASQASTAFSYAVSAKLATAAA--FFGSQFYLLDRDPQIALVDLAFHVAD--ITKYKNLASQIEAND--------------------------------------- +>SRR5262249_13314966 +-------------------------------------------------------VSITSSQANALTSYaeqatydGLVTAYNAAAA--AKGNDWSTVPDGARTAIVDMAFNFGLTKLKSF-KFWNDIISQNWIKADEELR--TWGGPEKLRARRA--------------- +>ERR1041385_544581 +--------------------------------LKTVDDAVklpfvlrgtntpasgdDIAADWNavknskkgqraRYYQALTKCDLPADKIMELFQHRVDEFCGLLT---HYFPEFSNWPVGPQLATLDWVFN----------------------------------------------------------- +>GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold1170174_1 # 3 # 83 # -1 # ID=1170174_1;partial=10;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.679 +---------------GLYYDSEGKLTSGIGDLVESEDQVE-------------SLKNQTKESAEAKLQENIKLKNKLghadLKE---SGVDVDNLPSHVLDALENANFQLGSM--AEFPALKEGLRSGDYNKAATEsFTtkgtgtdASDWVKQTPVRVRDFVEAIADD-------- +>AP99_3_1055487.scaffolds.fasta_scaffold513660_1 # 3 # 356 # 1 # ID=513660_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.347 +--VTPK---------NSILSGVSPQARAWLSTINkveannPDKYNTLV-----G--G-EIVPELTEMTIQEVYDMAYGREIGKGNLPKRFgGREV---T-YGADSHAAGAYQFHPDTMlerareagmDptkTLYSAeTqqllaLQHMRNLG--IDPN--kemtrdsllksgSIAgWEGLsv-------EKGKITVPKAlELYQ-- +>GraSoiStandDraft_52_1057288.scaffolds.fasta_scaffold1255600_1 # 3 # 404 # -1 # ID=1255600_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.639 +--IVE-------GSTTSYEAGTAH--PGFIEVYNlarelGDPFP-----------ELTAGQWA--------LESDWGRDLTGKNNPFGL------------------TGNPKTEDVvwlptpgdpdggeKpflNFASLkDaikykLDKWGHVYadaSSLDEALHmlqshgddnryaqgTDNdWSGYidkvkeVIRDnnIESERDRELS--------- +>GraSoiStandDraft_45_1057281.scaffolds.fasta_scaffold4431313_1 # 2 # 241 # 1 # ID=4431313_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.613 +--EAEELKVNELGDHIVHDDGAGNPTIGYGHLIDeDSPYYYLL-----S--LPKENQWITEEQAQALFQQDWLEHLEYAKNTPAWnNATM---Q-QKAA-LIDMAYNFGDLNVywpkmlkalengdIdk------vlaNLRWNDP-SSKDRELTpyflkSGR---RfqtiedLMKNegIDPTRTNEYLPENiEIL--- +>GraSoiStandDraft_54_1057290.scaffolds.fasta_scaffold3476512_2 # 148 # 249 # -1 # ID=3476512_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.735 +--LNLKEGGAadlRVGEDKAFLKGLHKLAEKWE----LDPAHllALM-----A--SES----S--LFADNMnDG-GYAGLIQIGPDTAKEmGTTVEALTKMSRAEqlvYVDKYFQKWGL--pkratKgELYQAvLapstl-----HYYdegVGLG----adeyiyksgSAAYEEN-----------KPLDLNdDGFI-- +>tr|A0A1Q6T3V8|A0A1Q6T3V8_9PROT Uncharacterized protein OS=Acetobacter sp. 46_36 OX=1896965 GN=BHW56_02735 PE=4 SV=1 +--MLLLIAHCEGVRTRAYWDPNGkKWTIGIGNTVRPDGSPVR-----------SFDRIRSQEELMDYFKTHVEKIILPVME--QYLP-LKDMNRGEIAAFGSLVYNCGPGVLRG--------------------------------------------------- +>SRR3972149_69768 +--MLNFIQKQETYKSLPYKNSSGDWIVGFDHKLKNKELS-------------KFKNGITKEQAEELFKNDNERSKNNIDELLK-ENNFdpDEVPYEKKLLFLDHLFDLGKQGASKYKNYMKALVGNDYVFMSKNFR------------------------------ +>OM-RGC.v1.004209912 298701.DA2_1583 COG0536 K03979 +---------------------------------------------------------LKISSkeaakLNKFVKKQIFK--TLKTKWNKEsKIKFNTLSTEQATTLASVSFQYGDLktkt--PEFYKM---ALAGNWGQEEGvrgvyEELldfrdke-----KSINDRHIELAALLKK--------- +>LakWasMet44_HOW7_FD_contig_31_335144_length_229_multi_2_in_0_out_0_1 # 1 # 228 # -1 # ID=42844_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.518 +----------------------PFWTVGVGQRLTQNLPVD-ANGV----ITSAPPAGQTKEQVINNFKGNVRTAFDGAKKVMeeDYGAVWGNLSAGGMSVTTQVAFQTGEEGLAGFKDANTKLVAGDIAGAKNAYLDSKWAKSDSPR------------------- +>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold5910489_1 # 134 # 265 # 1 # ID=5910489_1;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.727 +-------------------------------------------------------------------QSALLKIYDEKKANVerIYGEALAQYPPKVKAVVKRTLI----dmnmqikKGTSGFKMMNKALAEGNLAEAAKQITGNYevngkPAFSTDPgakkvgntdyfnqmsgdN------------------- +>ERR1017187_10103224 +-------VKNEGWDPFPYADSIGYITFGMGNLINSPTAMAryDWRnpdgtktsysDIVSawhavkNAYPavqSlncasLTKIRVTDAEIQRAIDLAIVSAETELL---KSFPGLLDMPADEQAALLSHAWAMGGDFvpVKGFFKYAALVNAGQYADAIVEG------------------------------- +>LakMenE01Jun11ns_1017448.scaffolds.fasta_scaffold6990336_1 # 132 # 329 # 1 # ID=6990336_1;partial=01;start_type=ATG;rbs_motif=TAA;rbs_spacer=9bp;gc_cont=0.384 +-SVIKDILEHEGYASLPYPDE-KQWSIGYGTRVSDAKASFltkdqakklrkQYNQikskvyhknkkvnanrknkalknWVISKyrnwendfyqkYKIpdeiqtknNGIIGISKETAKIAADSTLRNIINKMSKTSyehgdnQKQMYFDVLPSHVKKVFYDLAYNMGLGFLNKFVQFNNNIEAaaviltkdiltnDDiqdaemfFKSAANELLYnykkdgsykngkkenskTSYHKQNKKRALTNFKIMNKPIDI----- +>HubBroStandDraft_5_1064220.scaffolds.fasta_scaffold6519424_1 # 30 # 203 # 1 # ID=6519424_1;partial=01;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.592 +--LVDYLKNVENnpsnFNPNIYKDEgSGRWSIGFGHNFTEKSLKKFMK---EKGITNKNDLVISIEEAEDLLRKDILAAEQLASDIygnfiknntkaAENSPQFQYLDNNRKMMIIEYVFNMGHR-FNDYDDFLMHLTNNDYRGMLSEYKR----------------------------- +>tr|A0A2K8MAX0|A0A2K8MAX0_9SPHN Uncharacterized protein OS=Sphingomonas sp. Cra20 OX=1327635 GN=CVN68_02760 PE=4 SV=1 +-------------------------------------------------------QGFRDAQHAFIERTHYDPAITRVRD-----ATGLDlgtrhaavreatwsssvqharapqlleaairrtdrevgRDSaNYDRALVSNIYAER-------TTYLQGLAASGRYSRAEANQ--LI----svtqnryPNERRDVLALF----------- +>SRR4030065_531381 +------------------------------------------------------GMTITVENAEKELEMAVRTAIAEFDD--VFLFCPVEIHAVRGEAIIRLLFYSGLPRFMQFHGMISSIFKGQWSEAAYQLRSDPWYEDWGQQARRIVHEIREGEPI----- +>SRR4030067_3779904 +------------------------------------------------------AITRRVENGEKEREMAVRTAIAEFDD--VFLFCPVEINAVRGEAIVRLLFYSGLPRFMQFHGMISSIFKGQWSEAAYQLRSDPWYEDWGQEARRIVHEIREGEPI----- +>SRR4030067_201614 +------------------------------------------------------GMTITVENAEKELEMSVRTAIAEFDD--VFLFCPVEINSVRGEAI-vRLLFYSGLPGVMSGVSsgtiSLVGIYMGQKKHTDSQIRTERWVAAIGETVFYLVEAHNHLHPE----- +>ERR1041384_6446900 +--------HGEDCREKPYIAHksESVCTYGYGHQISGCPIVDratgkelKLAERLKANLddlKCQCAetesIDCKGSKAESLLTADTHAKVLHVHQ-----VIPVDLDQAQFDALVDLALHHGSV----PPYLIEAIKL----------------------------------------- +>SRR5665213_408642 +---NLYAQNHKGFDPSKLSDTQVLNAYKNGAFANDAAPDPF-----------QLLGAVSAPIG---------AAPS---------LDTGNWHQGGYDSAVDSlldHYKAHGEEVGA-KS------EQDYLRKAEAFK--DYVKQGGAtkkaiegVTENVTRYYKNGKYI----- +>SRR5271170_3139021 +---PQTQTQPKQLSA-------------------------D-----------DVSNGIQSAK--------------ADTGSnakKTVD----------FLNSFGT----------------------------------NWNLSGDAlrqglkdSKVDAHGVDNKVDSV----- +>SRR5215469_857803 +---TTTINVTFGSGK-----------DNAGAFLGAD----R-----------EVNTRVTAPDG---------SPANVAGGNdnttltnisqkeavnAVGARAFTDAQ---QSAVPSYGAQF---------------------------------------------------------- +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold5808630_1 # 2 # 358 # -1 # ID=5808630_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.501 +-----------------------FYTIGYGHQLSKEEYK-------------LYKKGVSQEKVDEFLQKDIAEAQRKLDTRYGDHQWYRDLHPREKAILLDIEYNVR-GGMNSFPELERAIREGNKTL------------------------------------ +>tr|Q8XLB0|Q8XLB0_CLOPE Uncharacterized protein OS=Clostridium perfringens (strain 13 / Type A) OX=195102 GN=CPE1132 PE=4 SV=1 +--IIYYVKGIEGYAPYHYYDSVGVKTLGYGMTGSELSG---------------VSVPLSETSATHYLVDNFNrLYYTPVLN---MLKARGatNMLQREVDALASFAYNCGLDsnGLGG-SQLLKKYVAGE-RGESIHNEFKKWVHGggqvlpgLVRRREEEWKIFSGSSspvggY------ +>SRR3569623_1850642 +--------PFETPIDGPYLDSKRLVTFGLGNHVSRAQFTNypwqrrdtnarataaEVLAAYDtvakAPVGyrAawymrLTNIYLSGEAIADIFAARVPEFTGQQV---NLIPGFERLPPTAQLGLFDMIFSLGVATFSTFHDLLAAANHQ---------------------------------------- +>tr|A0A2D5GX32|A0A2D5GX32_9PLAN Uncharacterized protein (Fragment) OS=Gimesia sp. GN=CME31_11905 PE=4 SV=1 +-SFLDHLEDKEGSLNLIYKDSLGLPTGGTGHLMSDKDLKKYGVVSyIDskTDYGMrkvavdSQGDiiKLDEADTENWRKNDSEKAYMAAVE---QAKELGITSQKMINHIGSVNFQLGTGWRqkGKFPGVWEAMKAGDYDLAAKNVEwvspsdtskgRSDWYKDTPQRAEDFMGAL----------- +>GraSoiStandDraft_11_1057310.scaffolds.fasta_scaffold5230344_1 # 3 # 236 # -1 # ID=5230344_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.611 +-AFLDKQMDRLG---ITPEERDAYTKNmydwsrqvrnvesdnnrmaaad---------STSAKGVYQfTDasVATGRnrmanlgFEDKftSGISSNPQEWSDEQADSMFLANMF---AQKG----SDKYLKEIGRGD-------E-------KARQEAYYK-----FHhtnpdeattgRVNK-------------LM----------- +>KBSMisStandDraft_5_1062788.scaffolds.fasta_scaffold5420363_2 # 225 # 329 # -1 # ID=5420363_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.648 +-YFADQFNI-----------------------------PLEQLKI---DTARye---------GPGDWKFRVGGGgetfdvqAGDVARQ---YG---PW----DMERVH------------------------TP--------ree------QRMKFLQE----S----------- +>GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold4515593_1 # 1 # 219 # -1 # ID=4515593_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.603 +-KTLDQLLLELQLMETHGSDKDKIPKG----------SPLEPVFSaMGkyPHHPKyl---------IDGEWVSGNPDMddstmmnAPMRMMF---YTPDKGWV---GMETILGYD-------E-----------YQSY--------kni------DKQSLKKE----I----------- +>Cm827metagenome_2_1110796.scaffolds.fasta_scaffold05036_3 # 1485 # 3314 # 1 # ID=5036_3;partial=01;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=5-10bp;gc_cont=0.546 +-AGEREDVTRAG---IRDIMNRVA---fhesagtgktdlaqyg---------GGPGRGVYQfERgaGQGGAtagnrlatqlgsydmkvpkWLESfnKSGKGDVSGLTREQQDMLFVGNMM---QHPQ----ANMGKVE---SE-------E-------MDLADFWQK-----YHqaggegvrdaRM---------------------------- +>tagenome__1003787_1003787.scaffolds.fasta_scaffold7540627_1 # 1 # 132 # 1 # ID=7540627_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.742 +-YFENLLGYNRN-------------------------------------------------------------LYKTNKI---IQPEKGWG---YIEELGKQ--------S-----------TTIPKI----QKhlgksg------DKKKLNKAI---E----------- +>tr|R0K8U9|R0K8U9_SETT2 Uncharacterized protein OS=Setosphaeria turcica (strain 28A) OX=671987 GN=SETTUDRAFT_20207 PE=4 SV=1 +--GLERLKEAEPYRQKAYQDGKNGMSIGHGVNNKHHPEI----------FG-RHGTSVTREQAHQQLVDVNRQHARDLRK--RLGPeTWDGLKQGQRDGLLKAAYNAGPRTVAE--ATREHLQKKDWNGVAEKLG------------------------------ +>tr|A0A178ZW31|A0A178ZW31_9EURO Uncharacterized protein OS=Fonsecaea erecta OX=1367422 GN=AYL99_03154 PE=4 SV=1 +--GIEFLKYQEGFKSEFSQDGRsaaSGQSIGFGINSNANSIICaQIKEEFQ-----RTGKPLSKEQGEVYLKQVLPQYEKAVSS----LENSDKLSQNGFDALVSLAYHKGGTGMRK--TVGEYMKTQDMKGVYKAVKNdqttSKAGETYSKRRLDEAELFRSS-------- +>SRR5579871_5088087 +---------------------------------WDNAL---------------FADGCTPEVAEQHLRDALRGTLGDLKSALAsRQpaVDYDRLTHRQQETLLDFAQSEGVQGLR--EEFVAAVLAGDWERVVKNHLYVRYA------------------------- +>UPI0003C67627 status=active +---ADSLKLEEGSVEYIYLDMGKhgEPTSGVGHVMPIGSKDYnkyvkpkktgwVWRTIQtgkDKDGNPkyenrkvahdkdDNLITIDSSTRDSWLKEDAKEHYNYAIKQSK---DLNISDENFIRRLAHVNYQQGPDWWypKRFENTWKALKGQEWQKAIDNIKwinpsnkqkTSLWYRQSESRAEKFMDAIS---------- +>OrbCmetagenome_4_1107370.scaffolds.fasta_scaffold224910_1 # 31 # 264 # -1 # ID=224910_1;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.491 +-----------------------VSQIGYGHNLTPIEISTGQVSINGSQY--SLAAPLDQQLIGQLFQQDMVTVQNWMRP-----VVNVAVTQTQYDMLCSLAFNIGQTNFTN-APVIKSLVEGNIQNVPNEWM--QWT------------------------- +>SRR5690606_22076327 +-------------------------------------------------------LTLTVEQARELTDAEHRGYMDDVVVpwwdRGRYTIPYADLPWQAQAVVFSLVYQCGVRGAEhRAPVTLSALRRGDWEKASAALLDRdGWGGeYLGRRA------------------ +>Laugrefabdmm15dn_1035133.scaffolds.fasta_scaffold569576_1 # 54 # 245 # -1 # ID=569576_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.411 +----SQLKSSENSSNKGYDKKKklwfpfdvGvneEKNIGWGINMSTFSDEDK----------KRMSKGVTSEEIEDIFNKRISKHLNKSKEkITEMGGDWDSLPDNAKLALADFSYNLG--SLNKFPKFTQAIIDNDLETAKKEYK--RYYTDKGvkkemtSRNKDIYEMIF---------- +>SRR5688572_28595148 +---------------------------------------------------------ITPPRAVARSLAHIAKDEAGLKRC-----VTAPMSQTEYDILVDFACQYGVAATCK-SAVVRRLNTGDYVGSCEGYT--LYRYS----------------------- +>LULM01.1.fsa_nt_gb|LULM01035886.1|_2 # 740 # 967 # 1 # ID=35886_2;partial=01;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.706 +-AMINLMLRNIGWSGFAYKDVDNIVKIGYNLTDGV------------------SSVGLTEADAFEKWIKHFKDAERRFK---EIFV-LDSISQSQYDGMVSMYYFTGDWtrvgsEQRTF-QLYDYVKNREWEYVATAMTSSG--------------------------- +>APLak6261665767_1056052.scaffolds.fasta_scaffold49720_1 # 120 # 548 # 1 # ID=49720_1;partial=01;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.420 +--YFERIKGSEGpARLKKYPDKGG-YSIGYGHFLRKGENF---------------DKGINMQQAWKLYADDTREKINFLKGD-LGKDLWGRLPFSVKQALVDLDYRGDYRKPKpdargKEYNWVKLFKKGKFLEAAEELRDHDEAR------------------------ +>tr|A0A1D7XPB2|A0A1D7XPB2_9CLOT Lysozyme OS=Clostridium taeniosporum OX=394958 GN=BGI42_14940 PE=3 SV=1 +-KLVEFAAGWEYFSPHAYEDEyhrgdKSCWTIGYGTTYQVKPSA--FPN--------GLDSTCTKPQALVWLKEEMNKVAHEVKS--VLHKKGASISQQAFDCLCDIGYNAGTADLLYgKCITLNAVISGDADRITKAIM--MWTNANgqfshglKGRCKGRVNMCLHGIYD----- +>APCry1669192269_1035402.scaffolds.fasta_scaffold19247_1 # 3 # 107 # -1 # ID=19247_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.476 +-----MICCFELYRGTPYLDGdGGWASIGYGRLLKEKSEPFI-AKAHD-----EQQGGVTEDEAWAGLQRYKSIYEAGVNK----LSGGTPLPQNVFNGICSFLYQNGQNRTTaNGQDLKQLISAGNWGKVAQAIQG---HGADRHRRSLEAQLIVNGCYP----- +>SRR5256885_16243390 +------RSTLFPYTTLFRSDHlaGGLPTYCAGRTDPTA----------------VVGTKLTSDQCQSINKTTLLEYGYAVLGC----VNWDYLTARRLIGLTVFAINVGKDGACG-SRSEERR----VGKECRSRW------------------------------ +>tr|A0A072P5I6|A0A072P5I6_9EURO Uncharacterized protein OS=Exophiala aquamarina CBS 119918 GN=A1O9_13071 PE=4 SV=1 +--GKECLKRRENFQSEFVLDCkdnKCGTCIGYEINCKHRPEMCeKIRRHMK-----ETGRLLTEEEGHMFFEELLPKFEIPVTR----LPNSEKLNQNQFEAPVSHAYHRGPSGTR--SLVEAHMKNEDWNAVYEAIKNDPVrgrgGEKFPGRRREGATQFAQ--------- +>OrbCnscriptome_2_FD_contig_121_233806_length_2380_multi_3_in_0_out_0_1 # 970 # 1218 # 1 # ID=72268_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.442 +------GVPNEQRQSKPYP-------AGVRMDSGNENRP-------------SMVIPTAenakaTSSLASKMRkgDTFAHPhepgtdilaQKIM-------------------------------------------------------------------------------------- +>GraSoiStandDraft_54_1057290.scaffolds.fasta_scaffold754101_1 # 126 # 626 # -1 # ID=754101_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.563 +---------------------SNETNVGYGHMLTSEELSSGFITINDEIV--DYRKGLTQDQVESVLKADVQWAQTHAES--SLKKVGMEGDEGKLQAVTSLIYNVGSGSWGS-SKAKKFLEAGNIE------------------------------------- +>RhiMethySRZTD1v2_1073278.scaffolds.fasta_scaffold1665577_1 # 2 # 97 # 1 # ID=1665577_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.750 +--------------LGEYNDLSGNRTYAFGLEVRTNKNPD----------GVVPAK--TIPEMQLQFKERIQKDIDFVNKLEDTtgKRIVNTLDTNQKAALVSLVYNIGQGRFLNSKAFREGLAVGDMDKFKFEAFDSE--------------------------- +>APWor3302395875_1045240.scaffolds.fasta_scaffold659646_1 # 3 # 86 # 1 # ID=659646_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.476 +--------------LGEYNDPSGNRTYAFGLEVRTNENPD----------GVIPAK--TVPEMQLQFKERIQKDIDFVNRLEDTtgKRIANSLDTNQKAALVSLIYNIGQNKFLNSKAYREGLAVGNMDKFKFEAFDSE--------------------------- +>UPI0004BCAA2A status=active +-------------------------------------------------------SRASQQEIDEQLQKDLETLSYQVAR-----YIFWPLNDKRKAAVLSYAHSVGISFFKD-CELLEAINTGA--------------------------------------- +>tr|A0A1W5S4D1|A0A1W5S4D1_9CAUD Uncharacterized protein OS=Marinomonas phage CPG1g PE=4 SV=1 +-DYIQMIAEFEedgEFRPQAYKDG-TQYSIGFGTRTTDPNEM-------------EGTGLINEEEAYRRLEEWTSKDRVFIQEVGK--REGLEWSDNELDALTSFTYNLGKDGLTQ------LVSGRDKATIADKMN--EYVNFQgkplnglVRRRKAESELFSTPV------- +>ADurb_Cas_01_Slu_FD_contig_81_880290_length_447_multi_2_in_0_out_0_1 # 1 # 447 # -1 # ID=442304_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.535 +--IINYSSEKEGFRVMPYRDV---ntLSIGRGFNIRDLterDFNfipkdlaTelkTLQKDLKsksytseellakeREFKRnlssKGIRGLLQQSADKIYTAKIKDIYDQYV---REFDNFGTLSSERQKSLIDFSYQLGHENVkNKFPLYYESLR------------------------------------------ +>EndMetStandDraft_5_1072996.scaffolds.fasta_scaffold3952630_1 # 1 # 204 # 1 # ID=3952630_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.647 +--NINYVRKFEGYDGTAYVDGDGKnatISFGAGFNARFMtddELAlmspkgqEavkKIQALLNdgmdleriaKAIDTEYGVFITKEESEKIFKKKMLDNYEMFV---DKYPMFTVISADKQMALLDHAYQMGFGPKGGFVNYWRNVE------------------------------------------ +>DeetaT_16_FD_contig_31_7944699_length_222_multi_2_in_0_out_0_1 # 1 # 222 # 1 # ID=1645973_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.590 +--IVNYTSEKEDFKVAPYIDA---ntLSIGRGFNIQYLtdqDYEkmsdnlaSllkPLQAWLNstpdrttaqlvekmNEFKRnlGGSEGMKQQVADLIYTDKIKEIYEQYS---TEFTNFGELAVDRQKALIDFSYQFGHDRLkRDFPKYYGSIT------------------------------------------ +>tr|A0A1K2HU41|A0A1K2HU41_9RHIZ Phage-related lysozyme (Muramidase), GH24 family OS=Devosia enhydra GN=SAMN02983003_0618 PE=4 SV=1 +---FAMIASLESISTKSYPDAGG-RSVGLGFYMDKEGARDVWSRAFGNTVSfdavYSGKSTITTAQAKHLFDNDILYYEKVVDRA---L-GGTAVTQNQRLALVSVAYNAPK----RLSDLAPVLRSGDNAAITAALL------------------------------ +>SRR6185312_8649512 +-----------------------------------------------------------PGTSAKLLESVLPFYVSNARSL-VGASVWDnDLDEKEQAALIDIVYNRGYTNTKnEFGDVITALQNGDDVAAAVALMNAkpstgkskgkVWALRDPARA------------------ +>SRR5512137_2200859 +------------------------------------------------------------RRVLVWHGCYL---GRQ-------LCRWLDhqpLGqM------igLEAILGLGGEIIKRVW-----PDPAQQASAQLELLKLQQSrelaK------IVGQLEINKAEAQ----- +>SRR6185295_18780834 +-----------------------------------------------------------PSPQARWHHQCLEVGVRW-------SCRWTHitpWYlWYYTGALmaFDPI-TAGIELGKTIFGIvsKAVPDAEKA--QEI--TREI---QQhTNAEIMGQIAINVEEAK----- +>SRR3989344_3097262 +-----YIRNNEGLRLDPYRDLVatDRWVVGYGHLVTGQENLNPPLVDANG----N-VRSITQAEAEQLLSQDLSSHEAVARRW-IGEDAWETLSEDRRLALTDMAYQMGGVTLGEFTGTRTAIRealgvqscggsCDTWQDVYDHMMDSTWARQTPNRVGHNAVIMLYGSME----- +>APCry1669189567_1035234.scaffolds.fasta_scaffold522228_1 # 1 # 201 # -1 # ID=522228_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.478 +-----YFGPDVAMVN-------------------DLFGGSPPVMDLDG----N-WDTSINDpDVQASIMAFLDSEDSRGVVW-SNEDIedkplfvyWPRDQEEPVSiMPRDGELLISQGTTGDYTpeewaQTQPGLTegafwvgrrwVKDWFNX----------------------------------- +>Dee2metaT_33_FD_contig_21_12074423_length_208_multi_5_in_0_out_0_1 # 1 # 207 # 1 # ID=657436_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.628 +-----VFDmhLQEGDRL---WDYVmtGTQLTGNDD----------------P----T-WGTAYRMpD----GTVFVDEPGGSNRHW-HVECDtadhdfi------------PNMX------------------------------------------------------------- +>ETNvirome_6_1000_1030641.scaffolds.fasta_scaffold153748_1 # 1 # 438 # -1 # ID=153748_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.397 +-----FL-CDVGLNCTCPAGQDtt--KVCTSSSLTwhpcKPFDTAQP--------------------YCNQTASGATPEKGQVAA-------DWSCFAKGTLVCIGNNTYTV----------------tdkgkaitgrrFDVWSETCNEAMQVTGTYTVT--V---------GSCN----- +>850.fasta_scaffold12066_6 # 5547 # 5780 # -1 # ID=12066_6;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.581 +-----FQFdpPQAGCDQPFLQAAIcsGKWLQMKVNNKltqeTtdinLiK--------------------------------------------------------------TAIGKY-NGSGSNGFYVmnqfdSAHQGMRikgtiekgpkgcpkgsVDTGSKCRVDT--------IDKRPGALKVYcEISQKCN----- +>JI61114C2RNA_FD_contig_51_3796659_length_246_multi_1_in_0_out_0_1 # 3 # 245 # -1 # ID=924345_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.255 +-----YIYvdkCQTLPYALCYWGTEidasTPWRYMMENNKkelvI-----DP-----DV----Y-CRSIINkpcsvggnkDpQCNIKFADFDPAKTGTTQFD-GSFNMqtkqlyc-----------NTDGSSRCSLGALGSKCdnsaqcgyieatKDTPKIKlvcqtcdepnrcvkplrkgdvCGSNEECQA-----------SISTDCDNLTaMTCGKCK----- +>SoimicMinimDraft_17_1059745.scaffolds.fasta_scaffold341098_1 # 3 # 383 # -1 # ID=341098_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.677 +-LRLVLMTLLEGKDDTPYFDTAnpPLITVGIGFNIDGTGTvlVrNRvLDemsltdtqkTAIDNLFGSqalkdirnqphstqadkavqntaltdlfsqTIgqAFSMTEAQMTAVFNGIADKKDDVaqGF-------THIDTLSFERAVLASLQYNTKynppnPKALLG-PSLLNALNnINDSALARAE-------------------------------- +>tr|I2E8W3|I2E8W3_9CAUD Gp15 OS=Clostridium phage PhiS63 PE=4 SV=1 +--YFRYVKGIEGLQQYPGNIGDGQITYGYGVTKANEPTYF---AK-------LGNPPCSEGTASKVlFELIPDKYGSLVKNqMIKDGVDLNKVPIHIFDAFVDLCYNSGYYN---SRMYRAWIRGASLDSIYNDWL--TyATmpgtifeNGLKRRRKEEAEMFKNANYI----- +>AP95_1055475.scaffolds.fasta_scaffold1081000_1 # 3 # 236 # 1 # ID=1081000_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.624 +-------------LYTPYSSYeGGEKTIGFGHKIGVDPEID---KLY------DFEKGLTKAQTEELFQRDLSSAYKRTWQ--HYfnehgQTSWDNLSDKVKIALTDVSFQKGHPS----KELMKAVADKNEKKVIRLFK------------------------------ +>APDOM4702015118_1054815.scaffolds.fasta_scaffold186248_1 # 3 # 830 # 1 # ID=186248_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.676 +--YMDTLIADEGFIPVAKRLPKgknpitgetiyeEFPTGGFGEYGPQV----------------KVGQVYTREGDLPRLQKRVEDRMPYLE---RTFPGFGDLPFNVRDSMVSSNYR---GSLPGSPNTIKFIQEKKFMQAGDEFLDNKEYRDAennpakrgiRKRMERLSNAL----------- +>SRR5215471_2859512 +--FSQPINIPSIFTQAVFLNIERipgQHRLWYAT--TSRR------------------DTISEPEAARRLDEELARDRAKIEQ------LNPHLPEGAKKALTSLLYNLGGDvkKLNE-HGMANAIASGDVEA------------------------------------ +>ADurb_Ile_01_Slu_FD_contig_31_2469730_length_376_multi_3_in_0_out_0_1 # 3 # 374 # -1 # ID=203323_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.599 +-------------DYYVYLDKVydkakdeykYVKTIGNGINIDAKhggdHNIRFLKNLVGeDAFEEllSGKRPISLDVNKRAVRYNIELGIKYAR---NAIEPFDRYDYEIRKVVTDMFYNMGGNLPKKMPSFIEALDQGRVYDASLELKHknpfgkkgkpvdmdtTNYFDQTTGRGKLNFEVL----------- +>SoimicmetaTmtHMC_FD_contig_31_2250720_length_251_multi_1_in_0_out_0_1 # 1 # 249 # 1 # ID=276088_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.739 +-------------HYYAYDDKVkdkstgkvkSIRTIGNGINIDEEHNRRFLIDLVGeDAFQDmySGRRPIDMSVNKAAVRYNIQLSLKHAK---NAVKNFDSLPYDARLVTTDMFYNVGGNLPTKMPSFIDALNRGELVEASMELKHknpygkkdksidmdtTNYYDQTTGRGKHNFEIL----------- +>SRR6478735_4174607 +-------------------------------------------KKLLpyaskteemaKDFLKKYPLILSKEELKELTTKVGDHFERTVakqfNKNSD--LLFEDLSTKKQTVIMSVAYQYGNL-PKKCPTFFKLICKGDWRSVVEELRSFGDA------------------------- +>SRR5438445_620931 +-ERAAFILSREGFDPYFYCDQLGWVTIGIGTLVAREDDARriarnpnvHFTFHNaphrhasvdedAadwqrvhdrpglaeDDYRTVAQLRVDTASVNYQMRQEISRSSNDLY---RIHPFLLAFDSRVAMALVDTRYNPAGVSPYqspQTRPLWAALDLRearfDWNTAVAPFE-SIWANRGG--------------------- +>SRR5882724_11464227 +-ELFTFIKGREGVIDYFFVDNMGYVTVGIGTLVGAKGNdinkglaalRKvlaaapgvQFLNKKtklpasareiEadwvkvktaylasrtsgthipeRKYKDIADLVLAPTANYLLWKTEVFTSIANMY---KQRPDAQGLDEYIQMALIDVRYNPAGVNPFsigekRISKMWSYFHAQDPDNAFKEFH-Q---------------------------- +>ADurb_Total_1213_FD_contig_123_10076_length_774_multi_5_in_2_out_0_1 # 3 # 749 # -1 # ID=36557_1;partial=10;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=5-10bp;gc_cont=0.456 +-RAKYFIRTMEGTRFEAYLDDIpdpdkksqfisaakfysysqerqdelrsqcvdpitgkqkkPITTVGTGMVIESEEVQVKFDKLFGTQgfMNQvyWGKANITPQQDDVMLTYDLNERLIELRK--IYGLDWPKLRANEKLSILSEHFNGGKALVGEHTNfrrhMGSYVDTSHPvamQHAIKEIVANNPTKnpGLQNRRDADATMLASTEAPAYT-- +>APDOM4702015023_1054809.scaffolds.fasta_scaffold700357_1 # 2 # 343 # -1 # ID=700357_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.447 +---IESILEEEKFRPNAYDDLnpnkpltgqtilKGTLTIGYGTTNLSGVYQKKFDGR-----SITFSSKIDKYDALQLVLDYTSNLKQYLVN---NAP-NLELTQNEFNAIISIMYNAGPGEKFR-EVILNALKAKDYPAAAKIIPNfAVTSKD----------------------- +>tr|A0A2G2L7Y6|A0A2G2L7Y6_9GAMM Uncharacterized protein OS=Kangiella sp. OX=1920245 GN=COA86_07170 PE=4 SV=1 +---VESLKGWEKvpggdFAASPYKVgGKGNWTVGYGTEITDEQKDS------------GDFDNINEDKAVELLRTGVARAESRVNQFIQNNNSEIKLSQQQFDAMVHLSYNAGYLGavkkdgGLKFPKLVGNFKKGNLKGMIREFKDG--MDGLKKRRAHEIKIFLHGDY------ +>LakMenEpi03Aug12_release.lakeMendotaPanAssembly.Ray.scaffolds.fasta_scaffold865414_1 # 1 # 786 # 1 # ID=865414_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.653 +-------------------------------------------------------YAITENQAEALMKHRYANHIRDLIKSNDdfagnddsKRINWSGLPRQVREVLFDMSYNIGPKFLtrkqDPFKNLRQSILDyqanptrENVEQMVVDIRESKYYReQVGNRASANMQTLLD--------- +>SRR4029453_2952791 +---------------------------------------------------------IPDDRMRRSLNDKMLSFWSQNGASL---PTFGSIPAQAQVAL--VSYNYGA-RLRTAPKMCAAVRAGDYTVAARESFVSIWDGQKN--------------------- +>SRR5262249_17861235 +NQIVDYLAEFEAYATWMYLDSLGLVTTGIGILLDPYEKYGrvlPwydkvsmqpvndegvIKTEFQmvkSKNgpkGvpawstdpaqnfayfktfePITQLRAKDSDINSAVLNIGSQKEAACRQ--YFGADYDRYPADVQVVLTQMSYAGGLSA--RKHDLIPLLAAHDWLGAR---------------------------------- +>tr|A0A1M4EAK6|A0A1M4EAK6_9ACTN LPXTG-motif cell wall anchor domain protein OS=Nonomuraea gerenzanensis OX=93944 GN=BN4615_P5421 PE=4 SV=1 +--VKQMLIKREGVRTEAVSTTektkkgktRTTVTIGIGFDMNRGDARTIINGILKETtapseqddikSGKafddlkAGAAELTEEQVNALFDVSYEEAKELVTR--RGIESFDKLPRSVQAALIDLAF------------------------------------------------------------ +>UPI00026FE96C status=active +---------------------GGTKTVGYGHKLSIGDDS---------------DIAYSSTEVDSMLTQDVFNAYRTVFRkMKNSKYDWNTLSDEDKIILTELYYNTGNSKLL---------------------------------------------------- +>GraSoiStandDraft_43_1057313.scaffolds.fasta_scaffold473333_1 # 1 # 360 # -1 # ID=473333_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.519 +---------------------GGTDTIGYGHKLSKDEHEDGYVVVGDKN---IPHGKLTDAHIETLLVDDMNKHKAIARNQFnkhGHGKDYDSMSRREQDLFGALAFNIGTLedkdGNFGWPGLLKAVNDKDYEGIKRESLTS---------------------------- +>tr|C4ZHK5|C4ZHK5_AGARV Protein tyrosine phosphatase, receptor type, J OS=Agathobacter rectalis (strain ATCC 33656 / DSM 3377 / JCM 17463 / KCTC 5835 / +---SVDIMPSNSVEDEEINTesSVKTYASDS----ESAS----------------QSIFVNKKNYGELSKSDEVDNYKFTLS--S--AGSVRIDFGK-----EYEDS-NRGWT----veVYNSnweiikRDefRCGNSKTDSSSVL--GLASGTYylrikgirwewtdtkynftlvYSASNSWEKEANNSF------ +>MDTG01.2.fsa_nt_gb|MDTG01113413.1|_6 # 3751 # 5292 # 1 # ID=16530_6;partial=00;start_type=TTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.327 +------------------GTdtGIATYSLA-----EQNY----------------WKSFSNDYYYNKLTTEEKQAWDEL-----E--QKCIALASGT-----D---NASDVVT----dqCYFSgwklqdV-----------------------ynfiflFRYAHPQFYFLSNHV------ +>SoimicMinimDraft_17_1059745.scaffolds.fasta_scaffold80050_1 # 1 # 396 # 1 # ID=80050_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.636 +-ELQANIQRLEGnFRPDVhmvrtYvkdtDETVNIPHIGYGFKIDKKDYAFyNLLEDGTQD-PNKPAYIMSKDVADKRFLKEYNKAHKAAKKL---AKV-KNITeFGKIAALTDIAYNMGTEWHLKFPSAMKALDEKNYLEFTKELLrgkdkftKSKYVKDVG--------------------- +>tr|A0A2W5SIB2|A0A2W5SIB2_FLAPS Uncharacterized protein OS=Flavobacterium psychrophilum OX=96345 GN=DI539_28815 PE=4 SV=1 +-LIAQFLKSFEGFESKAYDLKDGRFTLGYGQTNWLTPAGGII-----RP--VRLGDSIDESNAHLQLYYYYIKVIPSLNL--FLFKNGYKLHPRLVAMLVEFLYGTGPAGLSysFFKNICAQCN------------------------------------------ +>tr|A0A2S7I3J5|A0A2S7I3J5_9FLAO Uncharacterized protein OS=Cloacibacterium normanense OX=237258 GN=C3729_10425 PE=4 SV=1 +-PVLSFLKLWEAWHPYGKDIEgRGKFTTGWGSMNLLRPDGSVI-----RP--VYKGEVWSKETADLQLKYYVENSARKLNR--YLVSSGYAVNNKLYYAFLQAAYNMGDGYFDkiSFKNIVASCR------------------------------------------ +>307.fasta_scaffold1798207_1 # 2 # 385 # 1 # ID=1798207_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.544 +--LFSFIKDGESFKEEAYFDTAVPPKatIGYGVNLHLAQYRNYvFNamgitdathrqlytdAILGatPGVGAnlnndlqsrlnnvyaqlnpnaaERIFKISTTQARSVFDSIIADKETRLHP--ILGIGTanplVPLNSREYVALMDMFYNTEVLITRD-NDLHRALLADNRAEAWYEIR------------------------------ +>GraSoiStandDraft_23_1057293.scaffolds.fasta_scaffold5077556_1 # 1 # 186 # 1 # ID=5077556_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.301 +--DFLIkvIEnpnllaGTSTSGLEPHGD--GRGGlaIGYGSDLVANPtsFVTEhLGnvgitlspaqvmaleTAHQvfETLRRgggsverlvenaetaraaLAGFSLSSEtVATDLLKSVADEKEDQLSS--AA---FlgtdLPAESLERAVLVSMFYQTGTYFKRngggsdD-TNMSAAIRSDDRATAWFEIR------------------------------ +>GraSoiStandDraft_11_1057310.scaffolds.fasta_scaffold16900_5 # 3750 # 3878 # 1 # ID=16900_5;partial=01;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.721 +--------------------------------------------------------GNNDADRPAAIAAARAGAR-------------------RW---I--NLRAVGGSWRD-QSNLIAA----------IGG--RWAEDAR--------------------- +>CryBogDrversion2_9_1035297.scaffolds.fasta_scaffold110006_1 # 1 # 129 # -1 # ID=110006_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.589 +-------------RAEVHDDGKGIPTIGYGTALVIKGKE-GWtvleklderlqeagvqldssryaadisklndiAVALDkqkaeeaqKLIREhNFSITVDKQKARKLHDIGMEWDHLSIVRS-KldteGTkkrrkpgksGTYEKLKgsrelialadisysgpirltdeligyvvaGERQKAFYQMAYKMRS-------------------------------------------------------- +>SRR3989304_2339500 +--------------SVAYRDKIGkgqPWTICAGVTGPNIH----------------AGMTATPEQCAKLEEPKRIEAAQAVHEC---VPEI-SDNPDMRDSLTDAAFNLGRAVVCG-STLQKMVKRGQYFEACMQLTDAtgadgwpdGWTKASGKRQKGL--------------- +>SRR5574344_1435276 +--CYNWIASKEGWRNNTYQDSVGIWTIGLGVRNDGF----------------KYGKSISNERVIELFLQHVKSNASDVIK--KWKKYFGDIKplQNELPRCYNIWR------------------------------------------------------------ +>GraSoi013_1_20cm_3_1032427.scaffolds.fasta_scaffold366284_1 # 3 # 248 # 1 # ID=366284_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.760 +--LVPFLQQVEGKDNHAFWDAQnQrvKISIGYGHQIQPNEYAQGYIDTGtagriavTSAnssSNPPGNATATDEQCVALLNIDAQIYIAGAHR--QIGGAWDLLGPYQQAALTSVQYNLPSAltWMVA-NGLNNFITNNDLQGAANLIIQA--PRkGIGDRATKESN------------- +>SRR3954470_13016648 +----EFNEPLEARTRWMYLDIRGFVTPAIGLLIDATRTPPqepndaeraashqlarrlAWQlpdgspagsaeidEEWDNVkammdlapqgggtFEDRTSLRIDDDEVDRVVFEKLDEMESVLIG-REPFADFADFRPMPSRDYS--------------------ACRGAWARISGSRSSRRS-----SRT------------------ +>KNS5DCM_BmetaT_2_FD_contig_91_894634_length_389_multi_2_in_0_out_0_1 # 3 # 389 # -1 # ID=507049_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.401 +----------------------STVRCCIGQSCSVNGQNGKcsdvsmcSGsalsggcpgdsnYKCCVATAGPTSAPTTAVSPCPTGATFDANLGFCKDSQ----NLYGPFTNKMTLDCvARGGGEVGCKRE----------WDVNIQGTAAKIQ--RWS------------------------- +>SRR5438128_2052190 +------------------------------------------------------TIRISRADIRIDEQASVDFHGGigsqmldGLL---GEVAGIDQLVIDVVLRkkscneTGLLTFTTGEKPYghpplLSr--PFX---------------------------------------------- +>ERR1035437_9438009 +--------------------------------------------------------IVPWESALAVYEsKTVPRFAAMTE---HTYPGITLAPPDIQGVMLSTTFNRGT-dlrekRRQELLWSRCDIITGNYTRLPDCQIGmrRDRKRvv------------------------ +>ERR1035437_7251314 +---LDRLAPFEGRVPFLYLDNAtrPNVTTGVGFLIVGIDDACklPFYRVsdgqpatraeITNDflrvrsmrgglVASAYkgGLRRAEADIDSEGFRRLRQFLADL---PGVFPGFDGFPNGVQLALLDLSLYGCGDhrdlpslpar-------------rSPD--------------------------------------- +>tr|A0A261DBW6|A0A261DBW6_9RICK Uncharacterized protein OS=Rickettsia endosymbiont of Culicoides newsteadi OX=1961830 GN=RiCNE_04680 PE=4 SV=1 +--FTKFIIDHESEQLKIYDDRFGVPTIGIGFALINKVSDGweAytekklqdlginltaeqYkiIKDYAKAktNGSdtshlrskldRFDFTITQEMAQNLLQHSIQKKYDHIKNN-IGEDKWDKLNLAQQVGVMDHAFQRGNI-----LSLTESLIAGDYATTAKII------------------------------- +>JTFO01.1.fsa_nt_gb|JTFO01042056.1|_1 # 2 # 313 # -1 # ID=42056_1;partial=10;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.522 +---WNQLKSTSGygyrecADFAASMAAGFFN------VIKSPY----------------EYRQVRRRFGDTTTV------------------------------------------------------------------------------------------------ +>UPI0001FC2E79 status=active +--SRAILSQKEGFIGTPIWDI-DNWRIGYGSSTITKSNG-EVI-TLPNNPSNKPDLkdpkwggfpdgKITEEDALRDKKRRLNDeFILEVNK--RLGDKKDVVPSSVKSIFVSMFYNYGSN----SNKFNDAINlakNGDYCGAADKIEDRkddgPFRTdgkpYNYKRRTDEAQWIRDCVC------ +>SRR6056300_259726 +----NFILSYERYRPVNQFK-NNIEQIGYRHKGSA-------------------KYGLTEEECYNLFVKDIAETSLLVKQS-FKYSSIESLTQSEFDSLMSLHFSQLQIKTLegnnGTYDIIKTLELENPLNFASVLHDSKYFS---RRRRQEADMFILSTYS----- +>GraSoiStandDraft_56_1057294.scaffolds.fasta_scaffold109863_1 # 2 # 445 # 1 # ID=109863_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.599 +-----------------YIDSQGVDTIGYGTTFYDNIFSG------RNAV--GRGDTSTVGEMESTMGRHVKEIDEKYD---EEYPMYKHLKPQQQSGIISYLYNRGPNAILFHGPSDRALSSGNIRALAEL-VQGDIVSVDPKRRKEEADLIRSGPLQ----- +>GraSoiStandDraft_54_1057290.scaffolds.fasta_scaffold1587880_2 # 269 # 397 # -1 # ID=1587880_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.519 +-----------------TGVYGDRTTLGWGFTFLDEITSG------RKKV--TPGMTMTKKEADDLLEYQVRHYHDNFldT---KYFPKFKYFTPEQQAGVILYGFNQPY--FWYSAPdFSKAIEEGNLREASEQ-NN----RGLPTREKIEKMLLRSGPLQ----- +>tr|Q2NPA5|Q2NPA5_9CAUD Uncharacterized protein OS=Xanthomonas phage OP2 PE=4 SV=1 +--AAEHLMVFWPRAEFPRRGPDNVLRLGYGRAIERS--------------------AQPESVARMALNADIERVSRALES--RVFVLMTRQPNCvpKVGYLYAVADIIGVERIRDWDELWTAAGRADWQTVCLYLMALQWEA------------------------ +>AP12_2_1047962.scaffolds.fasta_scaffold467487_1 # 2 # 409 # -1 # ID=467487_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.544 +--AMELLRSIHAMQPFPKIDALGIERHGFGRPIRKR--------------------PISEYAASVALSEDVKATVETLRI--RVFELVAVHPEAvsRLKYLYAIADLLGAEKVKDWSGLWDCLRKQDWDGAAVELLTAQWDR------------------------ +>ERR1041384_2256723 +--YCARVDAF-----------G---EQYEIHVFRGD------------------------AEIGIFGRNGwISRHGFrGTP------SVPRSVLnklngenavqmrarnl---MAPKGEEDIRAGRYS----PGVSRLFNRlnsvaigATILGAYLDIR------------------------------ +>APAra7269096936_1048531.scaffolds.fasta_scaffold49336_2 # 394 # 984 # 1 # ID=49336_2;partial=01;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=3-4bp;gc_cont=0.646 +----NQYRaDIVRISETYGIPPAALATVVYGEnvwrsyisELKdeAAIL-GL-----------IMEGHDA-----SI-GICQVKFSTALM---LDFG------------YPPKEFAEGTR------------------------------------------------------- +>EndMetStandDraft_2_1072991.scaffolds.fasta_scaffold798644_1 # 2 # 148 # -1 # ID=798644_1;partial=10;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.619 +----KIHEaMYGAIEAMYS------------Daa--VmcNRSV-GK-----------ILAEHFG-----LT-GVSN-RIRPDVV---ITEG------------GVSYVYEIKPA------------------------------------------------------- +>AraplaCL_Col_mCL_1032037.scaffolds.fasta_scaffold116789_1 # 2 # 259 # -1 # ID=116789_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.632 +---INTMSD-EDF-DRYYTGLMYMVSTDYALsemfseLMKarNSA--GF-----------FIHFYIKTSSSTSISSKTDSHYEPDNN---L--------------------------------------------------------------------------------- +>APLak6261689865_1056190.scaffolds.fasta_scaffold84742_1 # 3 # 365 # 1 # ID=84742_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.689 +----AVAAfLL--KVGLTG------------Qav--VdgHGNI-GI-----------LIAVDMG-----GG---TP-NAGAALA---FTYT------------GADTIFDLRGI------------------------------------------------------- +>LauGreDrversion4_2_1035121.scaffolds.fasta_scaffold1038577_1 # 3 # 77 # -1 # ID=1038577_1;partial=10;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.613 +----EREKdAFGEAIE----------------YLKegSGTA-QI-----------LVNTLEK-----SE-EVIVIWF--------LKYP------------LLD-PSRKRHS------------------------------------------------------- +>tr|R5SAC3|R5SAC3_9GAMM Cell wall hydrolase SleB OS=Acinetobacter sp. CAG:196 GN=BN527_00142 PE=4 SV=1 +-HLIDLMKYYEGDScyyyeaiTTPYKDKFGTWTNGFGELSDK---------------------LTTQEAAYKKLAANIEAYAGEVENLLnrrIGKGTYDALPNSIKEALIDLNYNKGLSKISGNKVLMDALKNKDYSKVVANL------------------------------- +>tr|A0A0H5Q1T3|A0A0H5Q1T3_9ZZZZ Uncharacterized protein OS=uncultured prokaryote OX=198431 PE=4 SV=1 +--------------------------------------------------------IVPRALADLWLQDDLELAADQIRK-----RDFAPMTQGAFDALACFVFDMGASDLRQ-GDLCHCLELGLIAPAARILAAHAWRRSdqrTPRRKRRIAEalLMQTGR------- +>SRR5215204_3505605 +--------------------------------------------------------FLDPSVVKWMFDKTMASMEKKVS---TYLPEWDKWTADAQMAIMSGAWNMGPGFPEDWPNLTRLLNSGNWFEAAYGFM------------------------------ +>tr|R5YBT8|R5YBT8_9PROT Uncharacterized protein OS=Acetobacter sp. CAG:267 OX=1262684 GN=BN575_01254 PE=4 SV=1 +--------EFEGYVPHIYLDTGGYITTGIGALVDDETVFKTidWlvdnkrsatdaeiQAAynrfqdlkrqkkFGQDYGANlfekeTNLRVPEAYAEQraydHLKDDLKNLRSE-------FKDFDSFDYPLKELLLDIAYQTGPITEEEWPKLRDGIRKRNLSKV----------------------------------- +>SRR5215831_7048644 +--VSRWVRAKESFKSSAFADF-GQTSIGYGTAAG-G------------------RGAISEPQARAEMEGKLRESLARINE------LNPNLTHGQKQALASLDFNTGWTQRPGEknEALRAAIKGGRIGEARELFG--TYTHVH---------------------- +>GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold2453691_2 # 182 # 319 # 1 # ID=2453691_2;partial=01;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.565 +-KWVQFReyGKIAGAKYPNLVaaqfALEsgwGTALSA-QHNYFGI------------K--AAEGESATT-----------SNTREVIN-----GqSVY----------MDEPFKNFASPQ---------------------------DAVNHLvkqWYKDYkgykgvnnagdvyAAASMLKSE------- +>JI8StandDraft_2_1071088.scaffolds.fasta_scaffold1253323_1 # 1 # 258 # 1 # ID=1253323_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.581 +-NVTDWLEEVEDYRGKSYWDV-DAWRYGYGTDTfiTAEGDSVS--------VTRrGKMSRISKEEAKKQLEIQIEKTFrKELRT--KFgKDYYNSLPKGVRMGLESLAYNAGSDLNDTGGKIKKAIKDGDFNKAADLI-----AGQVPeghtlyNRRQQEAAFVRSGG------- +>tr|A0A1H8IED1|A0A1H8IED1_9BRAD Lysozyme OS=Bradyrhizobium sp. OK095 OX=1882760 GN=SAMN05443254_11011 PE=3 SV=1 +---LPAIKKSEGYWPTVKGDKlaNNLPTGGYGETVGVKMGET------------HDEKYWS-DLLEKHLREDYGRHLDEC--------IHVQLPDSAAAMAMSTAMNAGSGAVCA-SPMVRRWNAGDFRGGCNAMR--GWRI------------------------ +>SRR5215211_5025357 +---LHLIVRVEAIVLVPYQDG-KFHSIGCGHNGPAVR----------------SMRKISVADAFLILKQDCVVREPEMRRYMDRLGfTHEAWNQDEWNALVCFYHQRSN-RDDGFEELLVAAA------------------------------------------ +>APLak6261680685_1056136.scaffolds.fasta_scaffold83026_1 # 2 # 325 # 1 # ID=83026_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.679 +---KRFIRGHEQvpktqdFAPEAYYATkgdraKGILTIGWGTTRLDDRAV-------------QEGDTITREEAERYFERDVRAAISDFNSTV-SKEARDQLGVNESAAVTSFLYNNGNTSEWRNSRALGHLNK----------------------------------------- +>tr|A0A228GVA6|A0A228GVA6_9BURK Uncharacterized protein OS=Burkholderia sp. AU15512 OX=2015345 GN=CFB43_13825 PE=4 SV=1 +------TAMGEGLVLKATPDPNpkAGNNIGYGYNLNANAGTIaeDFRrAGIPAtSIdGIkNGSVQITPEQAARLLEVTLPRYTERAKQAVEavNPGLWMMINQAQKAALTDVAYQVGD--VGQFHKAIGALARKDIAGFNDALKV-TYLDKDGNRredvrRNNLRSLMINGVSAWKQ-- +>tr|A0A226IWD8|A0A226IWD8_9FLAO Uncharacterized protein OS=Flavobacterium plurextorum OX=1114867 GN=B0A81_02590 PE=4 SV=1 +------IALYEQLSLSVYDNDgsiNGTATIGFGHKLHPGLITS------------TDPKTITFDQAISYFAQDIIKTENVLNQKIENMDLTGMFNRSQYFALFDMAYNGGNSSDSILTAVLNGMKSGGVEMA----------------------------------- +>APWor7970452502_1049265.scaffolds.fasta_scaffold424640_1 # 324 # 467 # -1 # ID=424640_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.465 +---EDYIIGHEEIRLMAYDDGANNPTIGIGHFLTGEPDDRaIFQRLfdnelnYDrlvagsGQvrrgrgvaqvnlnLSTtprgvellrrgVRPQSMDREQVRILFSRDIPVYINRATNNLIRYnIDLEQLSPRHQAAWIDLQFR----------------------------------------------------------- +>GWRWMinimDraft_10_1066017.scaffolds.fasta_scaffold411161_1 # 1 # 198 # -1 # ID=411161_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.510 +--------------FFPYTDADKWLTAGIGGKIDGDREGA-VRAqlaIVApeIDYDDfkAGQVGLTRPQVDQMFVMNVKEHIKTAEGLkvkldDGitVypFENLHEFPSYMQEAVVNGVFWSMLT-PAKSPNTMELIAKGDWEGASKEFLDGKMQREQ---------------------- +>DeetaT_7_FD_contig_51_1336734_length_250_multi_3_in_0_out_0_1 # 2 # 250 # -1 # ID=1221747_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.602 +---KQLIRKFETFSSRPSEDE-GTFRGGYGSSKKLVNGKL---------EDVTADTRWTQQEAEDTLDYQLKNTFGPLIAKQLGLGNWEKLNDKQKAALISLGYNAGPYFITArdyGQKIKDSIEDGDMETAAAYIS------------------------------ +>SRR5208282_1415103 +----KVQSPWEGVESNMYTDSLGLVTTAIGYLIDDKSGVNGYgpalvipwvhksdekpatqaeiIQDWQtvkNAHSqsgsydAPNDRkitqlKISPLVIQDLTASRMADNEKELV---KSLPHFADAPADAQMATHGMAWAMGGAFIpkDGFHAFADAFNRGDWAAAKAN---SNFRGAAPQRKA----------------- +>ERR1700686_2510585 +------------------------------SNKTPLANMSGYapalvipwahksdgqpatqaeiIQDWQtvkNAHLqsggydLPSDKkitqlKISDLVVQDLTASRMADNEKELL---KSLPHFADAPADAQMATHGMAWAMGGAFIpvDGFKAFADAFNRGDWVAAKAN---SNIKGAAPERKA----------------- +>SRR5256885_13277640 +-----------------------------------------------------------LGAAVAVFVaRTLPRTIAAAR---AIYPQIDALPPERQTALRSEEHTSELQSPcnlvcrlllekqerRRHAAARR--------------------------------------------- +>SRR2546428_14112537 +-----------------------------------------------------------RTISSAIFFfNDTATTEIYPL---SLHDALPILPPSRAAALVSLVYNRGTDLTgdrrREMRAIRDLLAAGESRIRSEEHTselQSRSEIVCRll----PAKILFCPGL------- +>SRR5439155_4247364 +-----------------------------------------------------------LGAAVTAFIaRGLPQTIALAR---SIFPQID-tLPPPSGLLLWAGHLA-ARGMVaviwRH--------------------------------------------------- +>GraSoiStandDraft_35_1057300.scaffolds.fasta_scaffold5326263_1 # 1 # 222 # 1 # ID=5326263_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.653 +----------------------DSNEVGYGYQVTNTEIKYGYVTVASQRI-DISSGKITKVMANFILEKQLRNIGNVLER---F--VKKELSQPQFDALCFYFFKEGVEKIEN-HPIVSLINNEKWYDITDEIQ------------------------------ +>OM-RGC.v1.029978909 GOS_JCVI_SCAF_1101669175839_1_gene5411944 NOG289343 "" +----------------------VDNVVGYNYKISDTEIDYGYITVASTRI-DISSKKITKGAAVFILEKQLRNIGNVLEK---F--ITVKISQPHYDALLYHFFAEGVSTIEK-SPITKLINAKDWYSVTDEIQ------------------------------ +>tr|A0A1L9UV96|A0A1L9UV96_9EURO Uncharacterized protein OS=Aspergillus brasiliensis CBS 101740 OX=767769 GN=ASPBRDRAFT_37900 PE=4 SV=1 +---RARLKEREGLRTTAYRDAGGSQAIGYGINQRSYPE--EFASIV---------SSPTKENSDLAFDMVVDKLERGVA----RYPNINKLNRNQVDAVIIY-------------------------------------------------------------- +>KBSMisStaDraftv2_1062788.scaffolds.fasta_scaffold4076262_1 # 13 # 384 # -1 # ID=4076262_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.672 +--VYDMLYRYEDIREAAYQDTEGIWTIGKGATYIPGWLDIELgtAKLVDGKlstEGRpvRQGDILNKEQIMKLSAEDYNTFYDRTSSQlAAVGVDILELPLHVSAPLISAAYNYGSINSahggtntpvtigdktVTFPNSLAemvaAgHKSGDYSKIADLFEYNLGPLdnkgDLINRRRSEANIIRTGTDI----- +>RhiMetdeSRZDD1v2_1073273.scaffolds.fasta_scaffold581620_1 # 1 # 705 # 1 # ID=581620_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.603 +-ALVDRIVELEGFIPEAEIrfKTDRHPTIGYGQSDSTV----------------KLGKKTTQKESEKHLKEKLIpEKVETAK---RLFTNFDELSNELKIELVQGVFR---GDFEVGHRSVRLINEERWDEAALEFLDHDEYE------------------------ +>GraSoiStandDraft_41_1057321.scaffolds.fasta_scaffold11849445_1 # 3 # 245 # 1 # ID=11849445_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.572 +----------------------GMEEISYGIVRPK------------------GGSPLTQEAAEKLLGERMQNAFKVAERVVdkRMkPGTFNQLPQRKKEVFVDYAFNMSEENFAKYRKFIPALINDDKKGVEEEYI------------------------------ +>Hof3ISUMetaT_20_FD_contig_31_420785_length_204_multi_3_in_0_out_0_1 # 1 # 204 # 1 # ID=44568_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.701 +----------------------THDEIGYGIEILKD----------------TYAAGLTQKQIDTLFKQRLTKASEEASRVVdkQLgSGTFASLSRKKQDAFVDYTYNLGEGihirsidkktgkikdkytGFTAYKKFIPAVVNDDRESVLREYK------------------------------ +>SRR5688572_11554311 +-------------RYFPYQMPFGGRRIGYRSDIRDKA---------------LYRTGLSPDEAEKVLREDLARLENELRAYVkEKYSakPFDKLSRESREVLLDFAYSEVGPANVS-DAVYRAVIDENWKSFIHDMTYVRAKNGTP--------------------- +>SRR5678816_1517696 +-------------RYYPYMSPFGGRRIGFRSAKVDKK---------------LYKTGLSTDEATQLLRDELATCLTDLQAHLkQKYPttPFDKLSTDAREMLLDFTYWHHSAANVP-DDVYKAVLAEKWQSFIHDMAYVRMTDGSP--------------------- +>SRR5205085_162706 +----------------------------WRLTVDDDA---------------WYRDGITSDDADRRFLASLQITADAVREQLrTRAPrvSFDDLPSESREMLVDFALSDGW---IK-SDLFDAIVSQDWQKLIDECLYVRYYGPSP--------------------- +>Go1ome_3_1110792.scaffolds.fasta_scaffold113650_1 # 1 # 252 # -1 # ID=113650_1;partial=10;start_type=GTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.512 +-----------GSVPKPTNDTAekdipeaqRSKDVGFGHKVKSSEEASGEIHGIKFKNEDGTYIKLTEAQKVEILNKDMAAELALARngydgkegwdaKLKKLGITWDELDYKYQNALTSLAYNVGGAKAGtGWTKVLQAAKDGNAKEFAKQMRRKDSGKNTAGMDNRVA-------------- +>APCry1669190731_1035312.scaffolds.fasta_scaffold100042_1 # 3 # 359 # -1 # ID=100042_1;partial=10;start_type=ATG;rbs_motif=3Base/5BMM;rbs_spacer=13-15bp;gc_cont=0.465 +-----------GLVERGYTkDralevfverNRkadgtlstvmqerkrrldalpadatgdQIFDIVYGGRMGNTE----PGDGSRYKGRGLimltgkdTYRRIGDligvdlVENPELLNTDKDVMLRA-TiaylddkgfntKDI-----------TADSLRRIIGHSGGSTEANrRWTNAEQAYEDMYGETMPNSSREED-GRETSPRpMLRPE-------------- +>GraSoiStandDraft_25_1057303.scaffolds.fasta_scaffold1498526_1 # 2 # 280 # -1 # ID=1498526_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.685 +-----------GDIPKPTNDTReakkpideRSKDIGYGHKVKEHEEDSGFIHGIKFKNEDGSYIPLTEEQKIKILNADMALELKLAReegwdaKLKAIGTKWENLDSKYQNALNSLAYNVGGPKAAkQWTAVLFAAKDENVLDFALEMRRMDDKKYTAGMDNRVV-------------- +>GraSoiStandDraft_39_1057311.scaffolds.fasta_scaffold2554635_1 # 1 # 318 # -1 # ID=2554635_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.660 +-NLFNYLRNAEGWESAGYLDVGsNRPTRGYGVAITPENTQL-YQGLLSREAGGealKISMQEHKERAREVFNKDIRRHHTTGGP--TKI-PFDKQPPKVQELLTELEFNG---GLSGWPELMKNAKKGDYKGVEKEMY------------------------------ +>307.fasta_scaffold2446606_1 # 2 # 331 # 1 # ID=2446606_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.564 +-GLLDYLKVSEGYVGDYTADPskKaQQKNIGFGVRATEEVREL-RELFIANGvepevVDDmllRKSMQDHAARARDKFNKGIRRHHTTGGP--TQI-PFDKQPPVVQELLTELEFNG---GLGGWPELMKHAKRGDYKGVEKEMY------------------------------ +>GraSoiStandDraft_42_1057292.scaffolds.fasta_scaffold166431_1 # 3 # 104 # 1 # ID=166431_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.529 +-------------------------------------------------------PELSEEQAQVISARRFKVHSSYIQ---KNHPWVKKLPAEIQEVVYDLGYNVGKYYLNkgKWDRFRARIKNGDYKSAANLL-RGRYLKQTGRRAESHIKTL----------- +>SwirhisoilCB3_FD_contig_41_3669314_length_1123_multi_1_in_0_out_0_1 # 2 # 1123 # -1 # ID=1358735_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.264 +--AAERIMQLEGLNYSFkaIHQREDYSLRPsgrvtvsypeengtysKDYGDNSA----------------RPNEYSTELDAEINLKNRLIpEYIERSR---ELFPNYDNYSENLRIELLQSVFR---GWTS--PKTRGHINNGNFILAAEEFLDHDNYRD----------------------- +>JI61114C2RNA_FD_contig_61_666948_length_523_multi_1_in_0_out_0_1 # 2 # 523 # -1 # ID=940193_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.429 +--AAHRIMELEGLNYSFkaIHQREDYSLRPsgrvtvaykeqgkekyaIDYGDNTG----------------VKGQYSTEKAAEVNLKTKLIpEYIRRSR---NIFPDYDTYSEKLRIEILQSVFR---GWNS--KDTAKLIRDEDWFLASIEFLNDADYEA----------------------- +>tr|Q22T26|Q22T26_TETTS Transmembrane protein, putative OS=Tetrahymena thermophila (strain SB210) OX=312017 GN=TTHERM_00185860 PE=4 SV=1 +---EETILDVEGGCRCAYTNISGQKLIGVGYNLNTQRQQQNLQQMLslDSdSLNniVSGNQPLTTDQILQLLTSSADVAVSDLQ---TIFTNYQTTPAIVQMALVRIYYDLTLSGLKLLTGFIDQIQAKNYDQAAVLLQ------------------------------ +>UPI0001F6C2A7 status=active +----------------------GERVKG-----------------egVv---------------EKKISLTAEQAANVRDFMNNAkTSPGNYQLLDD------NCVDFVQG--ALDNAGNMGKLADKFDKAELLNM-------------------------------- +>tr|A0A2P1P6T9|A0A2P1P6T9_9RICK Uncharacterized protein OS=Candidatus Phycorickettsia trachydisci OX=2115978 GN=phytr_70 PE=4 SV=1 +-------------KLKVYDDTATppNKTIGFGFNMDAAGARKEWLEVFGDKVSfdkVKKGLKITKEQAKELLDYQIKKRRDELEL--LFGKDWKLLKPNEKMAIESLYFNSPSLvrKGTKFHeNIKKYIETNDPkylKETCNEVKYkSNKNRHpgIQYRRDKEARMLESNKCPIYS-- +>tr|A0A1Y2AY30|A0A1Y2AY30_9FUNG Uncharacterized protein OS=Neocallimastix californiae OX=1754190 GN=LY90DRAFT_513522 PE=4 SV=1 +--------------CKPYMGSENYPVIGYGKVCNDSVKISNHND--AATYCKEFQDQCSLKQIEKWLDEDVKKVINQINNIEIYKKIFELASSKRKSVLISLGYQKNSEELKnIFDGLYDLIVSNKWEKVAEKIFKNKNILSNTNKFIRQAYIIESGDC------ +>ETNmetMinimDraft_14_1059893.scaffolds.fasta_scaffold911735_1 # 2 # 79 # 1 # ID=911735_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.487 +--LVTFLIEQEGFSKVAEDVGDGKVTSGVGLTGTGRKL----------------GEKvRTPDAV-RELKKRLKDVYlPYLNN--KIEPkLPFKLTDNQKIALVSLFFNVGEPAFNK-SRAFKALKAGNIDIF----------------------------------- +>tr|A0A1Q6UFE1|A0A1Q6UFE1_9PROT Lysozyme OS=Azospirillum sp. 51_20 OX=1896972 GN=BHW58_03440 PE=3 SV=1 +-ELIACIAYVENYFPTSYFCG-ARWTIGYGTTGYADGKKV------------LPNQRISKKEAQNCVRHHLRSYVFPVID--QY--VERELSEGEMLATCLFIYNIGGGNFANQkgigrsCAFLKALNDNKSsQECARKMT------------------------------ +>SRR5215470_6411521 +-RLLNFTKQFEGITDFMYNDRSTPqlVTCGVGKLVKDEEAAVglkgffvnpggsqpsddEIKDDYTAAHAikrddppgnlfdfaTVTILRMPWQKITELLGKAMGERVSTMLRM-PDFADFATFPADAQLACASIAY--GSWQYPIQAPLRAAVRARNWSDAARDYRSPGWDP------------------------ +>ERR1700691_2673584 +----------------------------------------sdEVVDDYEAAHSlkrndnpanlfdfaVNTALRMPWNKMLELLGSFMGEMVGSMLRR-PQFKKFAAFPEDAKLACSSIAY--GGWGQPGFGPLLDAVQAHDWGTAATVYAPPhARHR------------------------ +>SRR6185369_369551 +------------------------------------------GRCCQppt-VNGQlstA------------------NSVDVEDL---LQRVDVGIDeqaSIDFHCRVGSEV-------ngLlRDVLGLNELVIHV------------------vLRKKSC----DETC-------- +>SRR5215467_5754565 +---------------------------------------evSEWTRCRFyse-PLHLcpsV------------------SLIDFEDF---LQRIHIRIDeqaPIDFDGGIGSEV-------dgLlDEVGGIHQLVIDV------------------vLRKKSC----DETC-------- +>tr|A0A011ABX9|A0A011ABX9_9CLOT Lysozyme OS=Clostridium sp. ASBs410 OX=1304866 GN=K413DRAFT_3899 PE=3 SV=1 +-----WLALAEGNLPFIYStkdishkpwtgafDPSADLTFGIGHSIKTADEFNSIKKFIET----HTDSEIT-DEVQKYLQKDMASAVERVNNFS--KNNNVVLKQNQFDAIVSLVFNYPTS-LANGSTLYDALIKGDYSK------------------------------------ +>UPI0002939896 status=active +--------------------------------------------------------NIPAEVAMNITRRHIEKSKQQLR---KAYPKFDSISIQQQAVALSLLHNYGTGAL-KYKT-MKAVIKGDLLQAIALLRDPeEWSNvELHPRRNREADLLQS--------- +>GraSoiStandDraft_43_1057313.scaffolds.fasta_scaffold4353315_1 # 3 # 242 # -1 # ID=4353315_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.733 +--------------------------------------------------------NVPSDVAMGITRSHIDKTANSVK---EKFKGID-LSPEQLAVGVSLVHNYGNSAL-GFNS-MREIMKGDMTKGISMLRDPdEWTNeELLPRRNREADLLQS--------- +>SRR5271166_6277950 +------------------------------------------------------WPPLSQSKQTELLTQDAQMVGAQVTA-----TMSIPLNAHEADALTSFAYNLGIGYLAQGHTLYDYLEGGGRDPA-TITSDFELYDQPtslpgiLNRRYDESQMYLYGIYT----- +>ERR1700722_2431934 +------TLSNEGYTDYPYLDDKGYLTVGQGNKIDNSDVAGinpaapalalTWYDkNtnqtlsddeVLAQWnaikalqsmkdagGgafePYSTMRLSQTSIQSLVQGMMDSMIATLG---KRLPAIGGWPADAQLAILRWAWAFGPN--ANFPLMFAELKKVmpNFDGAVQQA---ELDQLNLPVKIDITHMFQ---------- +>SRR5271166_6267326 +--------------------------------------------------gggfnpttGegfsTLNDLRLSDGDARKFLFDTAEQIATYLV---NYYPDMASWPADAQLGLMRWAWGRGPN--VPVKAHNMDTALKsgDFRTAANEA---HWVGEAPKTANTIAQLFF---------- +>tr|A0A1B9LFY8|A0A1B9LFY8_9GAMM Lysozyme OS=Gilliamella apicola GN=A9G22_02940 PE=3 SV=1 +------KLHWPGtgsgITLGPGYDMKErssenilndLLDIGISSDIAQKVSNAaglkketEINSFIT-----ENKDVISltEEQEKLLLKKIIPDYENRVDK-----TIKINLNQHEYDALVSFVYNIGSIKSD--SNLAKYINEGKKKEAMDVLE--QYNKSGgkvldglKKRREKEREIFEKGIYA----- +>WorMetDrversion2_6_1045231.scaffolds.fasta_scaffold176971_1 # 541 # 771 # 1 # ID=176971_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.584 +-RLTKFLIEKEGFDrvarhgkGEFYEDGTPIYTIGHGTIRYPDGSRVKKGDTLPSG---EKGKNIARGYLKNFINKDVKQVTNKI-------NNFDKLPIRLKVALGGEAYR---GSLAQSPKTIELINQGKYKEASEEFLNNDEFK------------------------ +>GraSoiStandDraft_32_1057276.scaffolds.fasta_scaffold1433692_1 # 1 # 348 # 1 # ID=1433692_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.652 +--IASFVKKNETIGgkpdRKPYRVgGKGKWTIGIGHELKNQNRI---------------PNEWTDEHSYNQFKEDLTEKIATTK---RLFKSYDDYPVDVKKSLVDGTFR---GEFKEGHKTVKYINEGKWGLVPDEYIDRDDY------------------------- +>SRR5438045_3465825 +------------------KDNDGTWHIGFGHGNANNFPPF---------V--NEFTiLKSRDEAMSILVGELNEIYvPQLTK--LLAKIGFVATDFEFSGLLDTDYNRGFGRLRD--------------------------------------------------- +>tr|A0A218MMU5|A0A218MMU5_9VIRU Uncharacterized protein OS=uncultured virus OX=340016 PE=4 SV=1 +--------------YKAYLDTRDNWTVGPGIYLGAENEVSEE----------AIQKNYSENFVKKEFVKRLDESASYIDN--KYNPNtTPNYTPLSRATLINMHFQLGTDRYETFTQLDRAILEGNNEEAAEQIV------------------------------ +>tr|A0A1E3UFS0|A0A1E3UFS0_9FIRM Uncharacterized protein OS=Eisenbergiella tayi OX=1432052 GN=BEI59_16675 PE=4 SV=1 +DNLVEFVKQQEGFVPRAYN---GGKTIAYGFDMQQYPEVK---------INYNSDGTVSEEEGERLLRIVLEQSKNQINSY--LEDTNQVLDQNAYDAVMDLFYNRNSNKLT--HEVIDAMAERDDE------------------------------------- +>SRR5579863_7568018 +-----------------SRDPTPRMTIGLGFDVSRPEAQEMLRqVGLDPGAVRGGRVPVSDAQMNELFDLTLLAAVRLAR---QRIPEFDGMPRDRQWAVLELLVWLGPEGSWGaFAE----VEELSLPLAHQPLEPAPWFDALPESS------------------ +>SRR5579863_10450365 +------------------------MTIGLGFDLSRPEASEMLRqVGLDPGAVRGGRAPVSDAQMNELFDLTLLAALRLAR---QRVAGFDGMPPDRQRAVLELLVWLGPDGSLGvFGE----LMELSLPMTHQPLEPAPWFDSLPEPS------------------ +>tr|A0A108EN44|A0A108EN44_9BURK Uncharacterized protein OS=Burkholderia territorii GN=WT83_18195 PE=4 SV=1 +------TAMGEGLVLKATPDPNpkAGNNIGYGYNLNANAGTIaeDFRrAGIPAtSIdGIkAGTVQITPEQAARLLEVSLPRYEARAKQAVDavDPNLWSALSMPQKAALTDVSYQVGD--VGQFHKALGALARKDVAGFQDALKV-TYLDKDGNRredvrRNNLRALMVSGPIAFYN-- +>OM-RGC.v1.022101568 TARA_152_MIX_0.22-3_C18881493_1_gene344567 COG0513 "" +---------------SVIWNHEES-GSSFE--------------------------------MHGPT--------------------------------------------------------------GGNTM--KLMHS-------------SG-------- +>APWor7970451999_1049232.scaffolds.fasta_scaffold45755_1 # 29 # 640 # 1 # ID=45755_1;partial=01;start_type=ATG;rbs_motif=AAAAAA;rbs_spacer=15bp;gc_cont=0.413 +-------------QCVNPWGEVIG-GtVSAGIKTT---------------------------K-PSEEDLKILSYR------------EMMWPSTDLKTQTATISSHSQGLY--------------------------------dtfgTTRR----------------- +>OM-RGC.v1.028236386 TARA_065_DCM_0.22-3_C21600522_1_gene265537 "" "" +----THIVLFDGDEELKIWFTVDK-GITMQLKESR------------------INIGQDRAItVEHADSSSSIELRGGEININSNStinltsgseieaASNDVWIN-------GNFVTVGHGPVKQ----PAVLG--------DNLF--LCLTQLAsvid--------------------- +>HubBroStandDraft_5_1064220.scaffolds.fasta_scaffold992955_1 # 2 # 529 # -1 # ID=992955_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.593 +---HGFLAaglskAVMGRFSYADGsapAVmgRTTIAAVVGGTisRITGGKFA----------------NGALTsAMAQLFNNEAsnmtkanrqeqlahlvvrptdngilyeygdaevlvqndvlgngipkvkiqqiehvldvshaiGKQVQIISG-----YRADSVSHSQYDAIDVRISGYNSEQVAdalhGSNHFsrIASYTDGRTSAHADNM------------------------------- +>APCry1669189883_1035261.scaffolds.fasta_scaffold506963_1 # 1 # 198 # 1 # ID=506963_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.586 +-------------------snnSWigTSNTPHSWGSF-------A----------------TGFFGlVSNALMHQAMemvasgpfpsplhrpgltiggvaglagnvagiaafyfiakdiqatgeiRPSHLIDAG-----MIGVGMRGGKYGALLAagwFVASYGTLGVNlmlg---------------------------------------------------- +>SoimicMinimDraft_3_1059731.scaffolds.fasta_scaffold1710687_1 # 2 # 232 # -1 # ID=1710687_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.468 +-NFIGdlegerligYVPNAEGSKSGVTIA-T-GFDLGarnesdiKDLPIELQNKLkpylG--------------LKGDaAKALASKLRINaDeakIINDFskkkattslaKKWY---DKTGkDFFEMSQAKQTVVASVAFQYGD--LaSETPNFWRQTTTGDWDGALK--------------------------------- +>SoimicmetaTmtLAB_FD_contig_31_16729199_length_305_multi_3_in_0_out_0_1 # 3 # 230 # 1 # ID=996373_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.689 +-DYMKILAKHEGTRPAKATE-GGGYTRGYGITSLADNFVstllR--------------NKGLnASEMEDKELAReYVIWNAEQIS---KQFDNYDEWPDSVKMAAVDLAYNGGR--VTRYANFSRFLKEGKYQDAMKETLD----------------------------- +>SRR5438477_7726540 +-----------------------------------LDEKAlealglpvSLQETLRPYLGlkrdaavawlDAVPLKLSADEVEPLNAAVQAAQVTSLQRAYeaavgSDRVRFEDLPEPAQTVIASVKFQWGSIWHrVdneNIVNFWGAAMARDWARMEAVLR--GWTPA----------------------- +>APLak6261665767_1056052.scaffolds.fasta_scaffold61358_2 # 412 # 492 # -1 # ID=61358_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.407 +--AAKILAELEGFRDKPYQS-GGNWYIGYGSQITSADLY---------------PNGISKEDALGLLKKHLTDTVDKAIN---QFTkdRNLDLTRNQHDALALYSYRCGTAWLSNSSnAMYQAVVTGK--------------------------------------- +>tr|A0A0K2QJE7|A0A0K2QJE7_9CAUD Uncharacterized protein OS=Achromobacter phage phiAxp-2 OX=1664246 GN=ADP64_000026 PE=4 SV=1 +----IVRIES---TGNADARNKrssaaglgqfvdatwiEQFTKVFPAAAAKmNDAAI-------LQY--KTGEQN-RDTQLKVLENFTQENIAKLVNA-GQaptaGNAYLAHFLGAGDAIKVLLANPTE--LAKNivsEQSVRANPEVFGkNQTAGDLR--AWAARLMggnkPILDSGLTKEETEAA------ +>tr|A0A1W0YUH6|A0A1W0YUH6_9BURK Lysozyme OS=Burkholderia multivorans GN=UA18_05838 PE=3 SV=1 +----------HLEGVTNRLHWPggaSGVTLGAGYDMKARTAESvvaDMKAiglddatatAISGGAGLekdaardfckkNRDVvNLSNDKQVELLHKTVPAYVRMVN---KAV--KVQLKQTEFDALVSYAYNPGG-GWAKVT---DMINRGQIPEAMAQIS-qyVYSGGKvfdgLVKRRKDEVTLYTTGRYE----- +>tr|A0A0J6L3U4|A0A0J6L3U4_9BURK Uncharacterized protein OS=Burkholderia sp. LK4 GN=VK92_38645 PE=4 SV=1 +----egNDIKDSQYF--SRVIHWPgnalSGVTLGRGYDMGNRDEHEifdHLTSsdvsgeqakKISLAAKLkgsaardfvssNKNDvgEISWEQQIRLFNLIYPDYVSRAI---ANYdhwtsgfPDKvawGQLHPVIRDVLVDFVYQ----GFtkgs-------KPMEAGMRNDIDELIRYi---ENTp-------GISQYEPGRHR----- +>GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold3280400_1 # 1 # 249 # 1 # ID=3280400_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.622 +---SnspktklgplnsesEANLRKLFEGIKKYFIEAg------GGCDIQKIAYMLataRIEcydwnrsvffgprteGisyekaeidYGVGPTGRrveyarsmgnaevgdgykyrGRGLvQITWKKKYETFA-------DILK---LPLVSEPDLA-CEWDVALKiMVIGmRDG-VFtgfslSDFIskskvdylSARRIINGVDKAKIFA------EYAKqfeeLLKDSRSX--------------- +>MudIll2142460700_1097286.scaffolds.fasta_scaffold3444185_1 # 2 # 376 # 1 # ID=3444185_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.525 +-KLEKEIKLHEGDIRVMYCDALGLVTTGIGVLIQNKNTGAAFSFVygadhvklwekngkpatradveaefktlatmcekrrkfcLkkpkcckgEQKVncdhdaGDfkksfasvravDSTLKLSDSSAKTLFRKSMDRMVKGSKEDLKESVNFDELPSPAKAAIVDLRYGMGTVRKtfpedpgKAWPSLHNALKINDFNEAANQIF------------------------------ +>GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold4945769_1 # 3 # 206 # 1 # ID=4945769_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.559 +----AFLIEKEGGRQhkvdggwRVYEDEVvkkktgkTLPTVGPGLTAKAVKDLE----------GMKVGDIIDERKITSLFKDTLGTINTELEK--EFGGTYQDLSSNEQAAVMSLIYNVGMTNFKTggkkegTTKAYRALKSGDYDTFKKESF------------------------------ +>SRR3972149_828500 +--------AHEGRRYTPYYDSAGILTVCASVTGPAV----------------VDGKRYTDDECTRVGHI-----FPK------LLEMYDQLTRDQHAlVRTSIVLVVGAAEAAG-PAAPVRSMGPPRQPLPRSLF------------------------------ +>ERR1712051_249921 +----ELVKANWGSDLCKTDVEVfGelRHEICSGLPLEAPGVADLFwSMGLDYDAVWHGKKCLAQKDCDWIMNNEIEQYRTIS------WKFFTSPCACANAVLTDMTYSMRAQlDTKDFETFINHVQHNHYSDAADELKKTLWCREIGeDRCQRDADQIKN--------- +>ERR1711997_92149 +----ELIKANWGSNLCKYDIEVyGrlDYDICSALPLQAPGVADLFwSMGLDYDAIFHGKKCLAQKDCDWIMNNEIEQFRTIS------WKYFTSRCACANHVLTEMTYTMRAQlDAKDFETFMNHVQHNQYSDAADELLKTHWCTEIGhDRCQRDADQIKN--------- +>ERR1044072_605496 +---------------QVLGGGKGNWTVGIGEMSGRDKDSV----------------FESEDDAYESFVGKVTgeytkRVRRDLR----LERVSRRLTQNEFDALVDLAYHHGN-------------------------------------------------------- +>PorBlaBluebeHill_2_1084457.scaffolds.fasta_scaffold05845_2 # 1034 # 2770 # -1 # ID=5845_2;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.546 +---LDVLITYNNEMVEArsggtrawrNNNPGNLLNTDFSVrhGSIGeagsFAVF---------------PDVPTGQGALNSL---LRTETYqGLS----I--NDAiarfaPPAENNTPAYQRFIQNA---------------------------------------------------------- +>SRR5210317_235890 +--------------------SGGNPTIGIGHLVTPEQYDTIY----------KTGKTYTKSDLEDELIRDYFTKKQEVNNQiKNSGKNPDDIPEKVKSILVRNSF-WGVSK-SSFPKYFDSMINGLYKDAADNLK------------------------------ +>KBSSwiStaDraftv2_1062776.scaffolds.fasta_scaffold1179143_2 # 202 # 597 # -1 # ID=1179143_2;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.626 +---RTRIALFRGHTesvrS-AVLSPDGLRVISSSFDGTARIWQAgDTR--TIAQLGevsGPLEHSVFSFdgrwlaaatrengvrlwqmdnpeQPRVLRsgtrmfqtvaistdknrivaqTTPYAQLWDTVSG-----EALVTLRPEVTvDAINFPQFSPDSS------------------------------------------------------- +>tr|H0KFB0|H0KFB0_AGGAC Integrating conjugative element protein OS=Aggregatibacter actinomycetemcomitans RhAA1 GN=RHAA1_05773 PE=4 SV=1 +----ANLKKYESKDayadgrATVYRTPDGKLvrreggSLAWRNNNPGniragafaKAH-GAIG--------VGPGGFAIfpdAETGARAIGalMRTKNYKNL-----SIADAIARYAPPSENNTAAYQAAIRKQTGL--DSSRKMgtLSDAELNRVVNAIRQHeGWQEGKIsTQ------------------- +>tr|A0A1W1UVQ7|A0A1W1UVQ7_9PAST Integrating conjugative element protein, PFL_4711 family OS=Pasteurella testudinis DSM 23072 GN=SAMN05660772_02453 PE=4 SV=1 +-QALPAAKEI-----EKKWGVPAEVviaqaalESGWGQHVKGnayfgvkgtGSS-GSVNF---ATHEVINGQKVGIRD----NFAAYGGFGE-------AADGYGAFLTKNKRYRNAFNYKDDPVAFA--REVAKAgyaTDPNYADTLTKIIRG------------NKLDQIM---------- +>SRR5208283_4879247 +---------------------PPICAYNCfrgnSRSFTTSRDKGVT---------------SKVTGIVKFLLPSMSLAg----------------SPTAYSDASTDIYGRTATGRptisaNAagdgklHMEX------------------------------------------------ +>SRR5271165_1592570 +---------------------PGPRGIQVgrAAQ----QNLGDGF---------PVFDNFDPETGEGTqIYSTTQIQtTQQFIA-----AVRAkaSQFQRGFDATDTFSGKDSAGNpfr--fektqvTSRNMLVVTPAnRNFplsrvatelEQleAEFGLE--EIVVEEsQGLAPX---------------- +>SRR6266700_2782959 +---------------------AGNCTIGWGHKIRDGSCTDQDR---------AAYSSFTESAAEQMLFVDVQLKaMTPIKR-----HVHVGLAQRELDALIDFTFNVGGGSktn--hqhpglAGSQLLVWLNAGNYKQAGVGFL--GFMAGGtgiVRRRNDEKRLWDTGEYKSY--- +>GraSoiStandDraft_53_1057289.scaffolds.fasta_scaffold1205255_1 # 2 # 214 # -1 # ID=1205255_1;partial=10;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.648 +--------------YVVYPDTKGKPTLGAGILVDKNFLKT-----IEK-KKLKVGDTVPKDIVESVGLTRWKNSIKEAKE------LLPNLSEDQVLPLAEMVYQMGKPS------------------------------------------------------ +>UPI0002CD11D9 status=active +--------------YVIYGDTRKKLTVGPGVLVDDAFKKL-----VGK-ENIKVGDKFSSDLVDNLSQERWANAIKDAEE------L-SGFKDRRARPLAEMIYQMGKPK------------------------------------------------------ +>SRR5437868_11042044 +-------------------------------------------------------IDRKSTRLNSSHVsISYAVFCLKKK---NTK------------------------------------------------------------------------------- +>SRR5690606_8897758 +---------------------DGLKTVGWGLTHLHNSRYPQ---DC--QI-THFGQRFTRNQIERMYTPVQQYFEGLVKS---YLQG-LSVPQGVFDACVSVAYNTGSGLFKNsngsDTQFYQFLKQKKFENARLSL---TWFKSG---------------------- +>SRR5271168_673425 +-----------------------------------------------------------------SSSaRSRSGWSAagpRCR---T-----STNSRLCQGSLLSVSYNRGTGGYDdpgpRDAEmrsIKADMAEKKFNNIPLQILSmrRLWPRGGdlWNRRTHEAAMYQKGLT------ +>HubBroStandDraft_3_1064219.scaffolds.fasta_scaffold4927001_1 # 1 # 225 # 1 # ID=4927001_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.627 +----ALLSQPVSKCPPTLRLpppPNAsvlAvtFSSGGGVIETAGGDG----------AVRLWQCTGKEVRALRI----------RLPD----------LAAGSFSPGGDAVYTATSG------------------------------------------------------- +>ETNmetMinimDraft_20_1059909.scaffolds.fasta_scaffold414823_1 # 2 # 400 # 1 # ID=414823_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.494 +---AQYISKNEGFTEISSWDV-NAYRLGFGTDKILLDD-GTIKRVLPvadyykqtnqKKVPPPIGMQTTRANAMKMLEHDLVNRFKpRVVGTfGntLTEEEWNKLSEPAKAALISYAYNCGSL--R--TKIASAIKEGKYDLAGQYIKEGpttgggVVYPGLVRRRAEESALFLSQPLP----- +>SRR4051794_13635305 +---------------------------------------------------------VPIEAALQAFYNtTIFDFAKKAA---KIYPDLFKLHPVEQAVIVGLVYNRGDSLDgdrrKEMKELITAIKDDSDKEICDLISGmcRLWPDVAg---------------------- +>SRR5688572_31002000 +---------------------------------------------------------VPIACALQVFYNtTIREYSKAAI---RIYPDLFKLHPAEQSAIVGLVYNRGAALKgerrKEMRALIVAIKNDNDKEMADLIRAmmRLWPKTLglRRRREAEAALIGLVDA------ +>ERR1700689_374469 +----------------------------------------------------vdnigpqLASVSLDTEQLVALDNVAETKVLTAITSAWssSSTTPFSDLPIPVQTVLVDLAFNFGTNGLpkA-FAAFVKA--gnsvnvtttpngnPTLWDTVINELY--HFLKG----------------------- +>UPI0003480937 status=active +---FDFIKQNEGKAiTKAYIPENkdgsilgqSGVTIASGFDLGQQDITSisELSKdlqdklipYLGakkdKAAsklKETGGLQLTDKEVNEIDMMAKQKYSNKVKESYskLTGKSFDELPSNLQTVIADIQFQYGTNYNR-TPKFAGIIQeiaenpsnVESYMKLENELR--NFGDSYSTRRGKGADLIKD--------- +>GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold4816599_1 # 2 # 217 # 1 # ID=4816599_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.574 +---RNTILELEASHRQGYIPSNadgtiignSGVTVAGGLDLGQQTADSlrDMNlpQdlitsftpYLGltgqAAQialA-NSPLSVNQQQETTINNQVVQGYWDDLSDTYqaRVGTPISDLSENQQLALHSAHYNLGGAGLfgtagN-ETNFTRQLEqddftgaannLYSWHNANGTL------NQLDPRRQAEAALFQG--------- +>ERR1700723_1554158 +------------------------------------------------------TVTVPWTPAITVHRtKVIPKWIALVQK---YLSNTDKLDGDCLGALVSNLQSRCLVRPGrrpfhgnaehqgrhgvaEFRRhTRSDTEHeASVADRQGIADPa----RKRgrAIRSGT---------C------ +>tr|A0A1Y3C4M6|A0A1Y3C4M6_9GAMM Uncharacterized protein OS=Acinetobacter sp. ANC 3903 OX=1977883 GN=B9T29_13835 PE=4 SV=1 +-LTWKKILEFETYEAKPYQPgdNSSGVTIAIGYDLGQQTKAQiqkDLASFYSaeqinrlltaqGKigavarelIPKLSDITITKDKALRLATVLKTRYANQVL---SIYPEALNLHPHCQGVLLSLVFNRGPGLKDkkgqltrkHMRQIKDAFKNNEIDKIPAIFRDmsKLWNKTgkngnrgVGIRRRQEAAIFEEGLK------ +>ADurb_Ile_01_Slu_FD_contig_91_374605_length_1011_multi_3_in_0_out_0_1 # 19 # 186 # -1 # ID=269144_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.488 +---VNtmkikdvaeFAKKNGKYHKYNLKKYKld-stavGFYQILGGTinDILDRGGK--------ELGI------TGDTIFDK--KTQDKmYVWYMSDT-ISrkktlngmmqnvMRRWASFRDESKTKKDSQGkkiYtpkNANQLK----GVIYEH------------LN--KYHPDhpmiNS--------------------- +>SRR5216684_3762538 +-------------------------------QAI------------------GAGLTITEDQAEYLLRESLSHNYaPAVAK------AMPGAEPNENDGGLCFHYNCGAIGRASWVARW--REKASAAAIIAALA--SWNKAGgnvlaglTRRRAREGAIITRGDYGP---- +>tr|A0A160FIF5|A0A160FIF5_9BURK Lysozyme OS=Burkholderia sp. OLGA172 GN=AYM40_04125 PE=3 SV=1 +-------EALAGVTNRLHWPGgNSGVTLGAGYDMKARSAESivsDMKSIGVndaaahaisAAAglsghaaldfckknLGL--VNLSDEKQVDLLRKTVPAYEKMVIK---A--IKIDLLKHEFDALVSYAYNPGGGWASVtnfinhinIEQAMNNIKK---------Y--TTSGGVafdgLVKRRADEVTLYTTGRYE----- +>SRR5699024_1887815 +---DKLIIFYEGYRAIPYVPKlsdTSGITLGYGYDLGHQNINIakiELSEIYTteqidrliKVIgykgdtaraalSSIIDIKISKEKVKQLSVILKQKNVQQVVN---MYPEAINLTSEQKGALLSFIYNSNNSLTEPrrkeKKKIQVKLKENKIEDIPELIRNmkRLWQ------------------------- +>SRR5690242_5072038 +-----------------------------------RPHQQlptpalgvgqpgACRRALARPGggrrrgaaDklKAMPPTITMADADELDAAVKKGATDTTVAaYNkAVrdkpgLREFEELPAPAQTAIASVAYQYG-dLASRA-PTLWKHATAQGWRRX----------------------------------- +>MDTB01.2.fsa_nt_gb|MDTB01109707.1|_11 # 4875 # 5624 # -1 # ID=23213_11;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.412 +---WDFISEREGLRLTGYvPDpdgSKSGVTIATGFDLGARNEGDleglpkAIIEKLKPYLgikgaeaqSVAKNLKVTNKQATTIDEFSKQKATDALKSkWLaATGESFDSLPKNKATVVASVAFQYGDLKSET-PNFWRQVTNNDWNAALNNLR--DFGDKYPTRRGLEADYLESGM------- +>APCry1669188910_1035180.scaffolds.fasta_scaffold646313_2 # 218 # 319 # -1 # ID=646313_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.539 +----------------------------------------manwidDYMKILAKHEgtrpakateggGytRG--YGITSLADNFVstllrnkglnaSEME-DKELAREYViWNaEQiSKQFDNYDEwPDSVKMAAVDLAYnGGRVTRY-ANFSRFLREGKYQDAMKEtldivtandpktsksgaLR--GLGNRRFDIYNYVAKELEfpqiTGL------- +>APCry1669192522_1035417.scaffolds.fasta_scaffold765725_1 # 1 # 201 # 1 # ID=765725_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.517 +---YDYLRAsiqregigAEGKsRHTGYvPSDDSGVTIGVGFDIGQHNEWElnkifpdnPKlVEKLKDYTklegdearKvaKN--LTVSGKDYIDVIVKPINYKIDRIVEkYNeK-agKNAFQNLEPELQRTIFGITYQMGTGSFLK-SDFWKQVTSKDWTGMLNNMQ--SNGWGeTQARRTKELDILLKSG------- +>tr|A0A1F7SWU7|A0A1F7SWU7_9BACT Uncharacterized protein OS=Candidatus Tectomicrobia bacterium RIFCSPLOWO2_02_FULL_70_19 GN=A3I72_06105 PE=4 SV=1 +-----------------------------------------------------QAIRIGAAQAYVLMPHAARTYWRYIAE---RFSAlsRPEAPPSVQTALLSLAYNRGAGNEA-LDVLAGPLREGDWAGAADLIGRMQQDHdlpGIRERRRWEADLIRAE-------- +>tr|A0A1G5DLB0|A0A1G5DLB0_9DELT Peptidoglycan-binding (PGRP) domain of peptidoglycan hydrolases-containing protein OS=Desulfoluna spongiiphila GN=SAMN05216233_ +-----------------------------------------------------QSIRVSREQSDTIFPYAADPYWEKISG---RFKTlaDDDTFPPVQTVMLSLAYNRGPYNKG-LEVLRQPIEEKNWAEVANVVGAMQQDHpleGIRKRRRMEAELIRSH-------- +>NOAtaT_5_FD_contig_51_2412303_length_281_multi_1_in_0_out_0_1 # 1 # 279 # -1 # ID=369434_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.441 +-----------------------------------------------------QSIRISRAQADTIFQYAAQPYWQAIAK---RFPNlaGADVLESVQTALLSIGYNRGTGNKD-LEVLKHPIQDKDWSEVSNLIGTMQQDHsleGIRKRRRMEANLIKKE-------- +>AP59_1055472.scaffolds.fasta_scaffold1051180_1 # 1 # 267 # 1 # ID=1051180_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.423 +-----------------------------------------------------GTIRISRSNALKVMPHIAVEYWKGVAR---RFKRiaEPETPPSVQTVMLSLAYNRGARNRG-LEDLKPPIADGNWLKVAEIVGAMQQDHslpGIRIPGSEX--------------- +>SRR5262245_61759606 +-------------------------------------------------------IEVPWEVADKSYHtYTVDKFGRLVIG---ALPNAMELH------------------------------------------------------------------------ +>SRR5689334_3318391 +--GEALIREQEGFDYHPEWpSGASGTTWGYGYDAHQNTVANilnDWSKLpppaprrLADthPYsGraaigptrAVKDIYIPHQIGADVFDrIDVSREFENARR---ALSGFDDLRPNAQAALISLGFNRGWSMVGsnriEMRAIRDLVPKKDYAGMADQFMKmrRVWKGTsiehdMYMR------------------- +>SRR6266404_4016804 +------------YDLHPAFgGGNSGIDISVGYDFSTVTPKIaifDWKALpspaperLAAthPYtGhqavahlsDVYDITAPKPIGLDVFNhTDIPRVDSQCKK---IYPGFEDLR------------------------------------------------------------------------ +>ERR1043165_1520933 +-KLVPLLTTYEGIDQPsIWPGGDSGITIGLGYDLSQVTLE-QFRRDWSllegRTlaslsraVGirggeaalmaqQYKNIHIQRDDAVRVFSRaQALQDYAQAE---QTFPGIQRLPPPAQAALVSLVYNQGMRLGGekrrEVALIHDAVLTKNLREVAKQFRSlsRSAAAAhmrgLAKRRESEAELVESA-------- +>tr|Q56948|Q56948_YERPE Pesticin OS=Yersinia pestis OX=632 GN=pst PE=4 SV=1 +--------GFEGFNPTSHFPsnPSsdyfnsTGVTFGSGVDLGQRSKQDLlndgvpqyIADRLDGynmlrgkeAYDKVRTapLTLSDNEAHLLSNIYIDKFSHKIEGLFndaNIGLRFSDLPLRTRTALVSIGYQKGFKLSRTAPTVWNKVIAKDWNGLVNAFN--NIVDGMSDRRKREGALVQKDID------ +>TergutCu122P1_1016479.scaffolds.fasta_scaffold5762158_1 # 266 # 331 # 1 # ID=5762158_1;partial=01;start_type=ATG;rbs_motif=TAA;rbs_spacer=15bp;gc_cont=0.470 +--TLNYIASRENFRATPYSDG-EGQSIGLGTPALEG------------------DEKITEQQAFERAQQFLEEQvYPEIETI--QNEAGIILNKNQITALSSLLYNIGVGQTWNNSEAKRLLIEGDIEGFKEAAFDaeEGFVYsggklmrGLQNRRGKDLAMFNKAV------- +>UPI00063E5413 status=active +---------FEPSLLAPYSDPSGNPTYAYGIETNRY------------------GKKNSISEMEMQFTKIIKedvlKTIENIN-----VNNNLKLNNNQKTALASLLYNVGETQFKYkkdKEtgkitdeetEAFSALKRGDLETFKKEAFGEerGfiSGGILTERRKRELKLFETP-------- +>SRR3569833_951434 +--------------------------------------------------------DIHWEAALEGYAdHDLPRYTAMCRA---HLPGYDELSPHCKGSLFSLVLNRGA-SFDlqgaryaELRDIKAAVKRGDLARVPALLRSmkLNRTNkpaTknKQTQQKKEAQRW----------- +>HubBroStandDraft_3_1064219.scaffolds.fasta_scaffold3085697_1 # 1 # 282 # 1 # ID=3085697_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.656 +--FVAWMKEREGFEPVSKWDN-KQFSVGHGTRATR------------------KGVPITKKQADAALREELTEHYGHVTDIMEEFDGELALNNNQIQALTSLSFNAGPDNVRNLltgSPDKGKGELRDLETIKKKWL--LYTKSnkpsekegLKNRRIKELELFNTPIP------ +>tr|G3IQI7|G3IQI7_METTV Hemolysin-type calcium-binding region OS=Methylobacter tundripaludum (strain ATCC BAA-1195 / SV96) GN=Mettu_0871 PE=4 SV=1 +-TVVDVISKAEGYLSQVTNLKDGMSTIGYGYTFERNDNIALWQAAgitlttgewtiLqqiDSAssaqktvIaLTQFGKTLTHSEAKALLEQTYQKYESPADE-------LAMPLSWERVALVSVTYNRGEPAVhSKMQDFYSAIETGDRAEAWFQIRYKAqttnptYADGIAKRRYYESELF----------- +>OM-RGC.v1.021690511 TARA_137_DCM_0.22-3_scaffold16985_1_gene17496 "" "" +---------------LDAHFEGTSIAIGYGYDVIQNFNYiqSDLADVgvtlnttqidlLseaKTAnsarrkeIaTELNLNLPSDAAALRLFTIRIERDESTLDrilsnnG-------SDVLLhSNERLALMSMLYAGRFQNYggsfPVQTLLIEAINNNNHAEAWYVIRYLGghaqnpiNANGYAKRAYWESEIF----------- +>SRR3546814_12408711 +---------------------------------------------------------------------------------------SRTFTLFTD--TTLFRSNGRQTYLKS-SNLAKEFKQKQCEACCYELL--RWVYACKkkrkgleMRRDLERDM------------ +>SRR5215471_9797622 +------------------------------------------------------GIIIPWYSAWLLFIgTSLPSYVTKTK---QAIWQsawttatvlwsagEPGLQPRCVINRFH---------Epsw-gspgnax---------------------------------------------------- +>SRR6266566_5188868 +------------------------------------------------------RITIGWDNAMKVFANrDVPQWTAAVLK---AVPGADKLNGDLLGVLVDIAYNRGVGGFNsdnpRYremKAIRTMVTMNQLETVPDQIESmkRLWPNVadLQRRCDHRIALWNIGMA------ +>LakMenEpi03Aug12_release.lakeMendotaPanAssembly.Ray.scaffolds.fasta_scaffold248824_2 # 638 # 1429 # -1 # ID=248824_2;partial=00;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.383 +----RFISLEEGTElQGDIPkDSkgnilgTSGVTIGSGLDLSKQTEDRlrkmgiseKLITRFKPYlglvgsdaqkAIDNSPLTISESENEAMMPKVQRHYINQIRNTYektrqgllesERGKSWEQLTPAQRTVITSVGYQHGPNFLTKdkkPMNFIKEAAQNKWPKLLENLR--DFKDKYPSRRNREANYLEM--------- +>SRR4029077_13070987 +------RTTMEDYR-AGRGDY-VTVAmdknsswqNQFLSSPA------------------FPGTVFR------------------------VRDNGSYGNGKTGENWIDIAYTD----------------PQKAKSMMLKGV--EFNPISPeea-QKISESRSYVEN-------- +>GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold2784130_1 # 2 # 292 # -1 # ID=2784130_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.674 +-------------------------------------------------------APLSEALATELLAIDIESHCAYFKK-----EINVPITQNQFDAMASICFDKGPEFYG-LKMIVGQLNKGNCDEAAKWFMKSDQDvatgesAATVARRTQEAQLFAGYGYT----- +>APLak6261667961_1056064.scaffolds.fasta_scaffold179320_1 # 1 # 252 # 1 # ID=179320_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.520 +-----------------------------------------mgLSSDIVQklapYFGlqkdeaacaiIQNPLRLSREEATNTTNVIKMQTLMSVQSRYdqdkkAGAKKFDSLPRGIRTAIVSVWFQFGLP--PKYPKFWGHVTRNEWEKAVNELR--NFYSNpgdqargDLRRRNHEADIIEAA-------- +>tr|R9K5S8|R9K5S8_9FIRM Uncharacterized protein OS=Lachnospiraceae bacterium COE1 OX=1235793 GN=C809_04202 PE=4 SV=1 +---------------------PNVLTIGWGHAMFSSQEGEfVFSDGtsmnlYDmciyDSSGTQPINGITYDQAMEILQSDIKMREEILNEELEKRQIITLIDQHFYDALFSLIYQMGTEQITTNTDLSKFLNKTNFDPTnAQEIKEQfgEYTnhsEQgTMRRRADELDIIFYGTYER---- +>tr|A9BWH0|A9BWH0_DELAS Phage-related lysozyme (Muraminidase)-like protein OS=Delftia acidovorans (strain DSM 14801 / SPH-1) OX=398578 GN=Daci_3556 PE=4 SV=1 +----QTLERWEGNVLQVYADHlaGGLPTYCAGRTDPTA----------------VVGTKLTSDQCQSINKTTLLEYGYAVLG----CVNWDYLTARRLIGLTVFAINVGKDGACG-SQAVRQINAGAVDAGCDLIAStpdgrPNWSYAGgvfvqglQNRRQAERALCLEG-------- +>SRR5208282_4578724 +---FNFTRPVEGYTPYLYTDSKGFVTTGMGNLVESKGQPTadvfalPWKkpdgslasraeiaAAWQtvkSAWpkvqssasQSLTTLRLSEADVAKLVSQKMAQNQAYLAS---KYPGMSQWPADAQMAIHALSWAWGAGFASVWGtlgqNFAAAVNasPPDFARAASIMQQASAHeesinPGLVPRDKAVALMLENAA------- +>ERR1700720_912642 +----KFILKMEGMVLKGYVPlpekTKSGVTIAGGLDIGQLSVKEfnklPMSADLRAkllpyvglkrfeakAFLKAHPLHVTRDEAEQLNLIAANMILMPLSEkYhKASGKSFSNLPPAAQTALFSFAYQYGAGFMTKqgLKKLWNHFVAEEWSEVSKTLNS---FKMYSERRKQEARLLA---------- +>ERR1700722_18580222 +----------------------------------------lSITPVLKAklrpyvglkkfqavAFLKSHPLTINVSELEELNQISSNKILIPLQKaYdKVSMVPFNRLPAEAQTSLFSFAYQYGVGFMHKggLQkKLWQSYVHQNWSQVVSNLHS----------------------------- +>SRR3954466_8435646 +------------------------------------------------gNAALNLKLQLSSTEALLLTNRVTDVIVMQIVKrYeKDSGKQFNSLSPEVQWAVVDYFYQYGPYAMKNePHlTLWKFIIINDWNKVVEFLLL---QKNYKER------------------- +>Dee2metaT_26_FD_contig_31_1544169_length_213_multi_2_in_0_out_0_1 # 13 # 111 # -1 # ID=756747_1;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.384 +--AFDFISHQETPGgvpeLRAYEDKDytvstgFTLRIGFGSDTITNtDGTV---------TGVTRSSRITKEQAILDLQRRIKNDFKpKvVRRLNERGVNYDSLPLKVKVVFIDLAYNYGTL----FYDFINAWKSAKIPGIIAELNRRiaRGERQVPTRRQAEINYLR---------- +>InofroStandDraft_1065614.scaffolds.fasta_scaffold29592_2 # 701 # 1960 # 1 # ID=29592_2;partial=00;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.465 +--QLTFDAEGNDLEHSPWFSRkihwpggISGVTIGRGYDLGQQSNAEndlKEAGIDeplkswlVGSSglsstaannrlNSASN------EVRTYNITRKEQYDLFMI----SYQrleddvKRINQKPDT---IRSYHPNPSATAEQPWAdipDKIKEILADlryrgDYTpgirtHIqrfaysgdidgFGRILsdKSYWGNVPqdrfDR----RSRYYENX-------- +>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold3275259_1 # 176 # 388 # -1 # ID=3275259_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.615 +------------GHTNPWFTGtig--GRAFATHFKINYPEV----FEFDkndfvsrlNEQLiayeitgayhkaH-----------FISQCLHESAHFDTTI--------EF-----------------------------------GsghNYDpgqhsDAikngntvvgdgPRYK------------------------------- +>SRR5882724_10855659 +-KAVNLIIRFEGLDRPgSFPAGASGVSLGVGWDCGYET---TLDEDWGPylspdvmkrlraVMGlkgaaaqhaamSLRDIQIDPKAARAVFEHStIPREESTTA---KVFPGSETLPADAFGGLVSLIYNRGPLIDKidrrrEMLQLFQLFRKGQpfdLKAIAALVEdqKRLWPHVhnsdgdLWTRRIEEAKLIREA-------- +>SRR4051812_27051503 +------------------WDDLGGVEIGIDYDLGVATQK-DFESKWKDilkpndferlseAVGktgqaakaivgKFKDILITDEMTTKFFTAHiLPKYQALAS---EVFPGLEKLPADVQALLAGVIMNRGPSLLGasrqDMRDVRDNVAKGDVPGIAKNLHamAQR--------------------------- +>SRR5580704_14592238 +-RSADFLIAAEDLPTHPYWPGgESGITIGVGWDLGQHSESEflrAWAAldqtTLGqvkIairKsghdaevlVPRLKAIAVPRDISLSVFRSSLADsYYPMTL---RLFPGVEILPTEVQVALLSLVFNRGVLLGHdpdwskakeldrrwEIRRLQGDVRGRNLFAIYIRLgtMKRLWEKSGlrglLYRRRDEQHLIR---------- +>APAra7269097289_1048552.scaffolds.fasta_scaffold07665_4 # 1406 # 2287 # -1 # ID=7665_4;partial=00;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.637 +----------------------------------YHSRAElreTWAVlgvdALAlldGaagKkgreaqalIPQLRAISVPRDLSIQVLNRSLNKdYYPLVV---RLFPGLERLPAEAEVVFISVVFNRGPSIGHdpdwstakevdrrlEMRRMQADVRDADMFAIYAHLgtMKRLWESAGprglPLRRRDEQALIR---------- +>APSaa5957512493_1039668.scaffolds.fasta_scaffold740240_1 # 115 # 249 # -1 # ID=740240_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.511 +-------------------------------------------------------------------------------------DNWSNENNpGKTYNNQGDLVDAGTTSI------------------NENINDTnkGLIQATgafdsanfnvedknqvmnlqkqlFPDDANEWDGVFGPKT------ +>GraSoiStandDraft_5_1057265.scaffolds.fasta_scaffold4822014_1 # 1 # 231 # 1 # ID=4822014_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.706 +---ASFLKQYEantdpGISIYPYPDLgynsdgsgKGTWTIGFGSTSYIGNIVPGRS----GN-IKPTDGPITYTQAENLMRVEVNGYCKNVVDK-ALLGAVTRPSDTQYAVLISHVYNTGNL-----NNVQKYLKAGDSQSVANALAGGPTTAagsngqslgGLISRRQQESALYLAG-------- +>SRR5688572_123867 +---IALLGQMEGIKPLPYYDPDSVATIGIGFNLEEDNVLRtvmvrmgfsnteindntatGYRQqivaACNGSpqtdtalqsrldaimaardnANQtgRPTFQLSIPEMELVFNDLRPDYESEITRW---NSAIDaaSADSTERITLFSLGYNQQTNGtnLLG-DGLKAAILGNDRAEAWVEIRYRSNGdanNGIAKRRYIESEFF----------- +>SRR5215213_8382938 +-EALEFIIDEEGMDQpWRFPGGESGVTLGHGYDLGAGTEsKAdmvnDWKQWLSgaqlerlsVAIGktgdaarrprmrssfapRSRSITRRWS--RRFRMrTSCP--------------VRRKA----RCSV------------------SSLIAGhhx----------------------------------------- +>SRR6185312_11844764 +-------------------------------------------------CGlrgqkaaaalsRVQHVLVDWQAAVG-QFnRYLPYVIGETE---DHFPNCSELSEHSLGALVSVVYNRGSDTSnNdrrrEMHQIKLLMAGRNFAAIPDQIRSmkRIWAGDpN---------------------- +>SRR6056297_61345 +---VDVIIGFKTFSPVRYETEDGEFEIGYGIGDPDD------------------EQGYTEEQAYAEWLGYVRNQQRIVKA---QIP-IDKIPNSVFDALMSLYLDTGTWRTLqaaeGTYDVADAVRNGNWLLVADILMR---GNVNPLLRKREAAALRLGSYS----- +>SRR6056297_1897051 +---LSLMLGYREWSGIRYINqATGDYEIGYGIGDPDD------------------EQGYTEPQAYADWVGYIRNRQKNLRT---QIP-IVGITQAAFDALLSLYVDTGTWRTVqsneGLYDLADAVKNSNWLLVADIISR---GNVNPELRKKEAAVVQLGDYN----- +>tr|A0A1R4LRG3|A0A1R4LRG3_9VIBR Putative peptidoglycan binding domain protein OS=Vibrio ruber DSM 16370 GN=VR7878_03204 PE=4 SV=1 +---KP--TTLTGTYNQS-VeivNSDGNVI------LKVRGSSK-pn---PFKpknssikgedaYPYVQSGTYSVDHGLHKGKPALVLNS--NGYVPtIniNPNHPSYGK---NA----NYIHIHWGYS------KTWRGsagcmtIHPDDWASFLSSVP--VGKG---------EVIIK---------- +>SRR5690606_6298219 +--------------------------------IGQNSSQDLirifksdmvLYNLYLpyankrkqvaIDYLKDNPLTITLEQAKSTDVLVKESQVKTIVKrYdsFEMPVPFQNLSTGVQTVVVSICFQWGAYS-SNLNPMWX--------------------------------------------- +>ERR1712032_1311689 +--------------LQPYRTEdkSPYGRIAFGHRVTEDDFKTQ-----------RFAGGLSVAAAHKLMMEDLRSAAETAQKE-IGQTVFETLPLRYQMVATDFVYSCGT--TNDYRGLVEALSSGSRSRIKAELF------------------------------ +>ERR1719291_705084 +--------------LQPYRNDhgAKNGRIAFGHRVTQADMDSS-----------RFVGGVNMTMAHILLEEDYQLAATTASNE-IGPPDFNALPLKYQAIAVDFAFSHGS--TRDFQRLMQeGLLKNNKKLIKAELL------------------------------ +>ERR1711879_1128216 +---------------------------------------SS-----------RFVGGVNMTTAHVLLAEDYQLAATTASNE-LGPPDFNAMPLRYQAIAVDIAFSHGS--TRDFQRLMQeGLLKSNRKLIKAELL------------------------------ +>ERR1740121_2584951 +--------------LQPYRNLenASYGRIAFGHRVTLAEIESH-----------AFAGGINVSRAHQLLHNDYGSAVATATEE-LGQDRFETLPLKSQAIAVDFILSFGS--MSGVPDLTDALLANDRKRIKHQLT------------------------------ +>UPI00069C3CAA status=active +-----------------------------------P------------------FQFIDS------------TWLAMMQKYGKK--YGY--DVSSM-------------------------------------------------------------------- +>tr|A0A1F8URV4|A0A1F8URV4_9FIRM Uncharacterized protein OS=Clostridiales bacterium GWF2_36_10 OX=1797682 GN=A2Y15_08070 PE=4 SV=1 +MDLIDFIAGWETLSLTRNKD----GTIGFGYTIFSNDKLL---YDITG---SYNPSNITERQAYMLLMATVQKHEIALYS--KYSSDILGMTLHQKNAVLDYSYNVGNA------------------------------------------------------- +>Laugrefabdmm15sn_1035127.scaffolds.fasta_scaffold278152_1 # 3 # 134 # 1 # ID=278152_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.598 +----------------------------------------------------------NPENGIRALTRDLTNKrKRGLDTI-----TKIItvyappSENDTKSYIKDVANDMGLSATDklsdknmykmikamtkhegG-KEALKHFTDAIIKKGMKSAYKNKYQk-------------FNX--------- +>SRR3974390_6597 +---LGFTRAFEGDLPFMYLELKGVGTVGFGDLVDSQNAVRflPFRhkrdgtpatqweiaNEWLkvknaqsmmrlggGAFAALTGLYLDDAGIQGVVAGRVPPAGGAPPR---RLPPPPPPAPGPPPP------------------------------------------------------------------ +>SRR5215475_2837035 +-------------------------------------IPSrrevfktmgldvtrlnktdpnyqtqlakeqEYIDSiiatiefYQnttdsiqYflneimadraadpalqSYnhiTSRTTFSLTNDEIEQTYQNIIKVYEGRITS---LFGDtAVFAESKE--------------------------------------------------------------------- +>SRR6266581_7159477 +----------------------------------------ag-------eapkTsaseqalqqaldtklqSYvMGVTAFSLT-EQAAKDVKNdIILGFSigpfvdpgkqARLDaK---LQNNgTViDHDTAEYKALMSLYFAGESLI-GLG--L----------------------------------------------- +>DeetaT_13_FD_contig_21_9215483_length_206_multi_9_in_0_out_0_1 # 2 # 205 # -1 # ID=599926_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.451 +----RLLGNFEGYSETPYFDTKrpnRHVTIGRGFDIEPDKSIGrkevfnvmelgskyikntgdpkqtdalrakeeEYVDRiikvikgPDtdnatiqKslnkimaeraadpafsNIehiTSRKTFSLTGQEIETVYTAINKSKEGIIDrK---IAPKdK-LVLSRERAVLVAMAFQGIISE-R-SASLREAiVKSDNRAEAWFQIR------------------------------ +>1186.fasta_scaffold1025347_2 # 203 # 664 # 1 # ID=1025347_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.732 +----DLLRQAEGDYTSPYFDSRtgnKHVTIGRGFDIEGDLES-rnavfkelniglsfsalvdpggvmaaaeaAYVRDiisvltgPDtsdstlqSslraimvkrssdpvfaNVpaiTAQTDFKLTPAQIDSAFSVLIEKKEKFVDaV---IANRsT-LVLSRERAIIIAK-------------------------------------------------------------- +>JI9StandDraft_1071089.scaffolds.fasta_scaffold2100077_1 # 1 # 267 # 1 # ID=2100077_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.689 +-------------------------------------------------------NGVLPRVTSNLFKVKFMNAVNRVQK-----NVKVPVNTNQFTALVSLCYDVSLDKLMT-SKMLNVLNKEKYNEVASYFLD--FSEQilpngnkilnekLFNRRLLEAELFSN--------- +>SRR3546814_9304080 +-------IVSEGVITGRYNDPVGIPTACVGETNSEIV---------------SYKAQFSRDECIAVMGASLFAHAVELDKC-----VKRPLGRNEAEIGRAHVX------------------------------------------------------------ +>tr|A0A0J9E574|A0A0J9E574_9RHOB Phage tail length tape-measure protein 1 OS=Candidatus Rhodobacter lobularis OX=1675527 GN=AIOL_001937 PE=4 SV=1 +---AALLRRFEGFSSSAYDDGRrdgegnrvgpPIYRAGYGSDTVTLSDG--------SVRRVTQGMTVSVEDANRDLARRIVEFQRGIIDE-IGMARWNQFNPAQQAALTSVAYNYGSIGAEG-AGISSVVRSGSSDEIANAIRGLagHNGGINRDRRLTEAQVFSNG-------- +>tr|A0A1P8UUS2|A0A1P8UUS2_9RHOB Phage tail length tape-measure protein 1 OS=Pelagibaca abyssi OX=1250539 GN=Ga0080574_TMP2809 PE=4 SV=1 +---EELVRAVVAVADQ---LGIaakdlltvMSYETGGTF-----S-T--------SIANPTTGA------------TGLIQFMPDNLKR-YGVNAQSSIT---DQVIASGQYLADagvKAGDNL-LRIYAAINAGSPDKIYASD---aKNGGAPGT-------VLDKV-------- +>HubBroStandDraft_3_1064219.scaffolds.fasta_scaffold1259987_1 # 3 # 413 # 1 # ID=1259987_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.567 +-----LVAQVEGFKNSFYRDNVG-LAVGFGFNASHQTKQTNRRAgievlkseqsartleSLSGQMDppSLPAIQVSPEQAMGMSLLLKPGYEDPMR---AWIPGFDQLKPHQQAVLVYHAYKVGPGGAVKYRTLK---------------------------------------------- +>tagenome__1003787_1003787.scaffolds.fasta_scaffold17497254_2 # 277 # 486 # -1 # ID=17497254_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.695 +--------------------------------------------------------------------------------------LVDGNRNAARQAIGERYKDHEIASGggVTYGEYYGGWYDRNFPSPX---------------------------------- +>ERR1700722_1637346 +-RLWNWLYATEGPVPYMYLDGLGLVTVGIGFMIDPIADYInQWGRSfvkndgtaagpeevkteFNrvklmqdKkgahlNFKAGAQLTLPASAMKPTLLNILRQKEAALKTgWrNDFFSDFENFPPDAQMGVLSTAYGSlGNKSLAE-VAFNNACKNQDWAAAANSGR------------------------------ +>SRR5262249_38213981 +-------------------------------------------------XKPTAKFFMPGSAMKPTVLSILKMKEAALKTdgWmKEFYKDFDTFPPDAQMGCLSTAYGGmYNKTAAQ-QAYNQACKDQRWADAADSGY------------------------------ +>SRR5271166_4015565 +-ELMKFTHAVEGPIRWMYLDDPhdeagnhippRRVTVGSGFMIDPIESYVdQYVGQfadknthmpasrpaiiaeFNsvngqtGiygwpNFEAITQLRLTDEFIRKKAIEIAKSKEHALKYdpRvSRYFTNFEAFPPDAQLACVSRAYGRiEDPGSsiKA-RNYYEAIGAQDWFKASENVA------------------------------ +>GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold2147330_1 # 3 # 311 # -1 # ID=2147330_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.683 +-EARDMIVNFEVGGKSYYQRrlnkptwpgGASGVTVGFGYDLGYNTAQQ-IEKDWGQvvgkeelralknCSGkkgsagkyalsaAKYRVHVTWEEAQKVFDDsTLPRFTALTKR---AFLlSEDRLHPDCNGALVSLVFNRGSSMSGsRrteMRNIRDHIATGYAGRVPREITAmkRLWVGKglpgLLTRRNAEARLFSGGLSA----- +>SRR5258708_27803418 +--IRKFLSQYEGEVREMYLDNRGFVTTGVGNLLPSSAAANrfQWEpvggggpvgtpdvtAEFDrvssaetktkiRGWArmgggnfiaaakklGIVTLRLTTESYGKIFTETLAGLESTMKGT-PGFEDYESYPADAQMGVLSVIWANGAGGFVgpqrddrrlhkTWPKFTDACKRRAWLEIVDREHY-KWRNINRDRDTATQQVFR---------- +>SRR5580698_10414448 +---------------------------------------------------amgggnfinearrlGIVTLLLNDASFDRLFQRAISGLEGAMKAT-PGFEEYEKgfggFPADAQLGIISVIWANGTTPLgPagYLKDFSALCQARKWGQIAEQEKY-KWSNIRADRNRATKQVFQ---------- +>Laugrefabdmm15sn_1035127.scaffolds.fasta_scaffold336298_2 # 125 # 247 # -1 # ID=336298_2;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.740 +----DYNDQLDAIV------------QGAGF----------------------SDKVMNALRPA-RQAQATGGVSLMPTD-----DISALLKPQTQD------------------------NTDQSNA--------------------------A--------- +>tr|A0A2G2R589|A0A2G2R589_9PROT Uncharacterized protein OS=Rhodospirillaceae bacterium GN=COB59_05860 PE=4 SV=1 +----KFIEDREDFKTDMYAlekdkqpDVKSGPTIGAGVDLGQMNKYDlkDLVDNHGlskdtadkvapylgkkgqdaTTFTKDNPLTLSNTEARELTDAKYDKIGKDLEKNYdrdmkakGSSKRFRDLDKDTKTIALSLAINLGPNlGReNAAPRSWSLLLNDDKAGLINELN--NFGGNSNLPrRKLEADYL-AGKYP----- +>SRR3569833_148936 +--------------------------------------------SWGvpeddiqkletlrgyrgehaLQFFQDHKDqmpQISDAGIAALDRGARDSIIGTVQRQYdatnp--PQRFADLPERYRTAIISVAYHHG-PYLaKATPKFWHQITTGDWSGAIDNLK--KFEDHYGPRREMDAYYMQy---------- +>SRR5438445_790261 +-----------------------------------------------ylglqgvpaQTFLAAHPLKLNPPDLDRLNAAVTASYFNKTATKFnaanqSGFANFAALPGEAQTVIADLA--YNLGSLaNSAPNFWDQLTHGRWDDAYQNLM--NFTNQQTNPWLFKRAQEDaailqraiqNGSLP----- +>SidTnscriptome_2_FD_contig_81_515956_length_1425_multi_7_in_0_out_0_1 # 117 # 1424 # -1 # ID=570278_1;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.521 +------------------------------------------------------------VGTQMHATSELAGTAEVVKGL-----CgTTQLSDNQLLSLSSFANHTGTNAFAK-SKVLGAVKNGQGDKVPNLMM--AHSTLKvggvptmqgdyYQRRQFEGELFQT--------- +>SRR5687768_4174629 +------------------------------------------------------------EDAMAVYRVrRLPSYIEDTRR---AFPGVAAMGPDVFGALTSLIYNCGAGLK--DKplkraafeVIRAGVRANDRDEVARGVRLMRaHHnlspsvrDGLNRRREAEA-------------- +>SRR6185437_16156792 +-----------------------------------------------------VDLEIVPASLAKLTEDTIAGYESALRS---PHHvgpAWDSLPGVAQLARFRTAWADGTA--SPWPKLDAALARGDWETAAQX-------------------------------- +>tr|A0A1B9V2B9|A0A1B9V2B9_RHIRD Uncharacterized protein OS=Rhizobium radiobacter GN=A6U97_02490 PE=4 SV=1 +--------------nvdpKMTAWCA-AFVNAalaangmpGTGSLAAKS--------------FLDYGTATNDpRQGDIVVLKRGGGngghvgFFEGYDE----NGNVRVFGGNQSDGVNTKSfkrddvLGYRSI--P--GAERSSL----------------------------SDT-LTDEL------ +>HubBroStandDraft_4_1064222.scaffolds.fasta_scaffold3816117_1 # 3 # 230 # 1 # ID=3816117_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.610 +----GAITINSGYRsnerQAQLWQE-ALA--KYGSVAE-----------------------------ARKWVAPPGNsqHNKGNAAD-LGyaNDAARQW-AHQNASQFGLSFPLSN----------ENWHIEDAG----------------------------ARS------ +>VirMetMinimDraft_7_1064189.scaffolds.fasta_scaffold23729_1 # 1 # 120 # -1 # ID=23729_1;partial=10;start_type=ATG;rbs_motif=TAAAA;rbs_spacer=4bp;gc_cont=0.317 +----AITEILNGRI-KPM-----------------------------------------TRSGNDTWTPPDPSsFKNPP-RD-RA----AEK----------------------------------------------------------ADREREAVQ------ +>JI10StandDraft_1071094.scaffolds.fasta_scaffold190835_1 # 2 # 1201 # 1 # ID=190835_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.631 +----DQLDRRNGYS-----ED-FVYRPGQNGSKKTIDTsvEY-IR-------KFNEQLAL----SNRERAIAAET-Q----RI-LNdaSSQGASLTEAQARELAELAVARK-------------------E--------adSTAKGSAK-----ESQKEAEAIA------ +>GraSoiStandDraft_45_1057281.scaffolds.fasta_scaffold884747_2 # 224 # 541 # 1 # ID=884747_2;partial=01;start_type=GTG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.704 +----------AGQLPKSKWGL-PdnmdpdvleadarnDAAQGYGVVAA-------------------AKQ--AKADANRKmlseqgglyVNGQFQPVPGWVEN-----KANQAATSKGYETMVANDeaeaqkYAArvdaenrIQGMLD----IMQDFQTGAFADQRNKLV-------qkmndL-------------G-------- +>SRR5215471_10851391 +---IALIKRNESgTRREGYVPpdrsGRSGVTIAAGFDVGQHSLAElrgfglpeDIVRKLSPyaglkgqdarDALAKTPVSITAPEAEQINEAVLNSKLNAAGKAFddqnHRPGEFTKLPWQAQTVIADLWYNMGDLRDpKVAHVLWRQVTTGDWEGAYRNLR--NFSHRDptlAARARRDAKLLRDA-------- +>SRR5260370_28664349 +--------------------hnggDNRRGTIAAGFHLGQHKLAGrrsynlsaDLIKRLSPylgltgqdarDALAKQPVRITRDEATQISDAAFDSNMNSVAQTFdkaa-RAGALARLPWRAHNPFGGLCYITAELP------------------------------------------------------ +>SRR5882757_5419835 +--------------PQGSNKtiisPTSGVTIAAGFDLGNRTVESlralglpeDLVQRFKDylglkgeqadKYVMAHPLVVTTEEADLINHLGFEQYYNTVAGSYnaaaAaNGLRFQDLPQGYQTAIVDLAWHTGt-DLS-RLKDFWGQITKGQWVEAF---------------------------------- +>SRR5205814_1094589 +----------------------------------------lknlglppDLIERFSDylgikgfdasTAVleghkkVGNPLRISPPEAELIDALLAVDFYKKVAASFnva-sKARRFQDLPQGAQTAIIDVAYQHGqDLAS-KTPQFWFRSRKDTGKTPSLNX------------------------------- +>SRR6266480_5762054 +--LYDFLKSVEAFVPRVYSDPVGIPTLGVGYALVVFDSAQsKWIlrdsrrAdlagivtltqgdddllqeaaDVKNgaggtnPffaWSpgetlaqseakrVTSFPLITDPQARTLFDRIIGGYFNRVRLM-LGQEEFDRLSgSSEMVALDSLAFNNVIKEnGSGSNLLRDAILNGNRAEAWFQIR------------------------------ +>SRR5262249_53145359 +--RYNFflmVEERGTLQLAPYDDHAvrPNMTIGMGMNLHDATVRAEVLrtfgvirnnsalskqgrmiengyiVRitnvinamFDGsigfdearrRvdaamrdrandtkfsaadLQVLGnrrttFAFYNEDEIKATFKRLMdTVYEPLVNR------FARNipggIPDsTERVVLASLAYNSRVHEQGhprqgiprtlG-LNLERAILNGDRAEAWYEIRYGTNPpgtnmqAGIAKRRYVEADTF-----------