diff --git a/README.md b/README.md index 92be359..8371723 100644 --- a/README.md +++ b/README.md @@ -196,4 +196,6 @@ If you use ProteinGym in your work, please cite the following paper: ``` ## Links -Website: https://www.proteingym.org/ +- Website: https://www.proteingym.org/ +- NeurIPS proceedings: [link to abstract](https://papers.nips.cc/paper_files/paper/2023/hash/cac723e5ff29f65e3fcbb0739ae91bee-Abstract-Datasets_and_Benchmarks.html) +- Preprint: [link to abstract](https://www.biorxiv.org/content/10.1101/2023.12.07.570727v1) diff --git a/proteingym/baselines/carp_mif/__init__.py b/proteingym/baselines/carp_mif/__init__.py new file mode 100644 index 0000000..e4bfb0a --- /dev/null +++ b/proteingym/baselines/carp_mif/__init__.py @@ -0,0 +1 @@ +from . import carp_mif_utils \ No newline at end of file diff --git a/proteingym/baselines/carp_mif/carp_mif_utils.py b/proteingym/baselines/carp_mif/carp_mif_utils.py new file mode 100644 index 0000000..3997336 --- /dev/null +++ b/proteingym/baselines/carp_mif/carp_mif_utils.py @@ -0,0 +1,38 @@ +import torch +from sequence_models.collaters import SimpleCollater, StructureCollater, BGCCollater +from sequence_models.pretrained import load_carp,load_gnn,MIF +from sequence_models.constants import PROTEIN_ALPHABET + +CARP_URL = 'https://zenodo.org/record/6564798/files/' +MIF_URL = 'https://zenodo.org/record/6573779/files/' +BIG_URL = 'https://zenodo.org/record/6857704/files/' + +def load_model_and_alphabet(model_name, model_dir=None): + if not model_name.endswith(".pt"): + if 'big' in model_name: + url = BIG_URL + '%s.pt?download=1' %model_name + elif 'carp' in model_name: + url = CARP_URL + '%s.pt?download=1' %model_name + elif 'mif' in model_name: + url = MIF_URL + '%s.pt?download=1' %model_name + model_data = torch.hub.load_state_dict_from_url(url, progress=False, map_location="cpu", model_dir=model_dir) + else: + model_data = torch.load(model_name, map_location="cpu") + if 'big' in model_data['model']: + pfam_to_domain = model_data['pfam_to_domain'] + tokens = model_data['tokens'] + collater = BGCCollater(tokens, pfam_to_domain) + else: + collater = SimpleCollater(PROTEIN_ALPHABET, pad=True) + if 'carp' in model_data['model']: + model = load_carp(model_data) + elif model_data['model'] in ['mif', 'mif-st']: + gnn = load_gnn(model_data) + cnn = None + if model_data['model'] == 'mif-st': + url = CARP_URL + '%s.pt?download=1' % 'carp_640M' + cnn_data = torch.hub.load_state_dict_from_url(url, progress=False, map_location="cpu") + cnn = load_carp(cnn_data) + collater = StructureCollater(collater, n_connections=30) + model = MIF(gnn, cnn=cnn) + return model, collater \ No newline at end of file diff --git a/proteingym/baselines/carp_mif/compute_fitness.py b/proteingym/baselines/carp_mif/compute_fitness.py index a58bd4c..7750605 100644 --- a/proteingym/baselines/carp_mif/compute_fitness.py +++ b/proteingym/baselines/carp_mif/compute_fitness.py @@ -10,15 +10,17 @@ import torch from torch.nn import CrossEntropyLoss -from sequence_models.pretrained import load_model_and_alphabet from sequence_models.constants import PROTEIN_ALPHABET, PAD, MASK from sequence_models.pdb_utils import parse_PDB, process_coords +from proteingym.baselines.carp_mif.carp_mif_utils import load_model_and_alphabet + def label_row(rows, sequence, token_probs, alphabet, offset_idx=1): rows = rows.split(":") score = 0 for row in rows: wt, idx, mt = row[0], int(row[1:-1]) - offset_idx, row[-1] + assert sequence[idx] == wt, "The listed wildtype does not match the provided sequence" wt_encoded, mt_encoded = alphabet.index(wt), alphabet.index(mt) @@ -46,7 +48,7 @@ def process_batch_mif(prot,pdb_file,tokenizer,device='cuda:0'): edge_mask = edge_mask.to(device) return input_ids,nodes,edges,connections,edge_mask -def calc_fitness(model, DMS_data, tokenizer, device='cuda:0', model_context_len=1024, mode="masked_marginals", alphabet=PROTEIN_ALPHABET, mutation_col='mutant', target_seq=None, pdb_file=None, model_name=None): +def calc_fitness(model, DMS_data, tokenizer, device='cuda:0', model_context_len=1024, mode="masked_marginals", alphabet=PROTEIN_ALPHABET, mutation_col='mutant', target_seq=None, pdb_file=None, model_name=None, offset_idx=1): if mode=="pseudo_likelihood": prots=np.array(DMS_data['mutated_sequence']) loss_fn = CrossEntropyLoss() @@ -85,6 +87,7 @@ def calc_fitness(model, DMS_data, tokenizer, device='cuda:0', model_context_len= target_seq, token_probs, PROTEIN_ALPHABET, + offset_idx ), axis=1, ) @@ -130,24 +133,35 @@ def main(): mapping_protein_seq_DMS = pd.read_csv(args.DMS_reference_file_path) list_DMS = mapping_protein_seq_DMS["DMS_id"] DMS_id=list_DMS[args.DMS_index] + if not os.path.exists(args.output_scores_folder): os.mkdir(args.output_scores_folder) + args.output_scores_folder = args.output_scores_folder + os.sep + args.model_name + if not os.path.exists(args.output_scores_folder): os.mkdir(args.output_scores_folder) + scoring_filename = args.output_scores_folder+os.sep+DMS_id+'.csv' print("Computing scores for: {} with model: {}".format(DMS_id, args.model_name)) + DMS_file_name = mapping_protein_seq_DMS["DMS_filename"][mapping_protein_seq_DMS["DMS_id"]==DMS_id].values[0] target_seq = mapping_protein_seq_DMS["target_seq"][mapping_protein_seq_DMS["DMS_id"]==DMS_id].values[0].upper() - pdb_file = args.structure_data_folder + os.sep + mapping_protein_seq_DMS["pdb_file"][mapping_protein_seq_DMS["DMS_id"]==DMS_id].values[0] - + DMS_data = pd.read_csv(args.DMS_data_folder + os.sep + DMS_file_name, low_memory=False) DMS_data['mutated_sequence'] = DMS_data['mutant'].apply(lambda x: get_mutated_sequence(target_seq, x)) if not args.indel_mode else DMS_data['mutant'] - model_scores = calc_fitness(model=model, DMS_data=DMS_data, tokenizer=tokenizer, mode=args.fitness_computation_mode, target_seq=target_seq, pdb_file=pdb_file, model_name=args.model_name) - + if 'mif' in args.model_name: + pdb_filenames = mapping_protein_seq_DMS["pdb_file"][mapping_protein_seq_DMS["DMS_id"]==DMS_id].values[0].split('|') #if sequence is large (eg., BRCA2_HUMAN) the structure is split in several chunks + pdb_ranges = mapping_protein_seq_DMS["pdb_range"][mapping_protein_seq_DMS["DMS_id"]==DMS_id].values[0].split('|') + model_scores=[] + for pdb_index, pdb_filename in enumerate(pdb_filenames): + pdb_file = args.structure_data_folder + os.sep + pdb_filename + pdb_range = [int(x) for x in pdb_ranges[pdb_index].split("-")] + target_seq_split = target_seq[pdb_range[0]-1:pdb_range[1]] #pdb_range is 1-indexed + DMS_data["mutated_position"] = DMS_data['mutant'].apply(lambda x: int(x.split(':')[0][1:-1])) #if multiple mutant, will extract position of first mutant + filtered_DMS_data = DMS_data[(DMS_data["mutated_position"] >= pdb_range[0]) & (DMS_data["mutated_position"] <= pdb_range[1])] + model_scores.append(calc_fitness(model=model, DMS_data=filtered_DMS_data, tokenizer=tokenizer, mode=args.fitness_computation_mode, target_seq=target_seq_split, pdb_file=pdb_file, model_name=args.model_name, offset_idx=pdb_range[0])) + model_scores = np.concatenate(model_scores) + else: + model_scores = calc_fitness(model=model, DMS_data=DMS_data, tokenizer=tokenizer, mode=args.fitness_computation_mode, target_seq=target_seq, pdb_file=None, model_name=args.model_name) + DMS_data[args.model_name+'_score']=model_scores - - if not os.path.exists(args.output_scores_folder): os.mkdir(args.output_scores_folder) - args.output_scores_folder = args.output_scores_folder + os.sep + args.model_name - if not os.path.exists(args.output_scores_folder): os.mkdir(args.output_scores_folder) - scoring_filename = args.output_scores_folder+os.sep+DMS_id+'.csv' DMS_data[['mutant',args.model_name+'_score','DMS_score']].to_csv(scoring_filename, index=False) - spearman, _ = spearmanr(DMS_data[args.model_name+'_score'], DMS_data['DMS_score']) if not os.path.exists(args.performance_file) or os.stat(args.performance_file).st_size==0: @@ -157,4 +171,4 @@ def main(): performance_file.write(",".join([DMS_id,str(spearman)])+"\n") if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/proteingym/utils/scoring_utils.py b/proteingym/utils/scoring_utils.py index 4a773e8..3286b05 100644 --- a/proteingym/utils/scoring_utils.py +++ b/proteingym/utils/scoring_utils.py @@ -6,6 +6,9 @@ unusual_AA ="OU" #Pyrrolysine O and selenocysteine U indeterminate_AA = "BJXZ" #B = Asparagine or Aspartic acid; J = leucine or isoleucine; X = Any/Unknown ; Z = Glutamine or glutamic acid +def standardize(x, epsilon = 1e-10): + return (x - x.mean()) / (x.std() + epsilon) + def nanmean(v, *args, inplace=False, **kwargs): if not inplace: v = v.clone() diff --git a/reference_files/DMS_substitutions.csv b/reference_files/DMS_substitutions.csv index 4c1cb06..709cde4 100644 --- a/reference_files/DMS_substitutions.csv +++ b/reference_files/DMS_substitutions.csv @@ -1,218 +1,218 @@ -DMS_id,DMS_filename,UniProt_ID,taxon,source_organism,target_seq,seq_len,includes_multiple_mutants,DMS_total_number_mutants,DMS_number_single_mutants,DMS_number_multiple_mutants,DMS_binarization_cutoff,DMS_binarization_method,first_author,title,year,jo,region_mutated,molecule_name,selection_assay,selection_type,MSA_filename,MSA_start,MSA_end,MSA_len,MSA_bitscore,MSA_theta,MSA_num_seqs,MSA_perc_cov,MSA_num_cov,MSA_N_eff,MSA_Neff_L,MSA_Neff_L_category,MSA_num_significant,MSA_num_significant_L,raw_DMS_filename,raw_DMS_phenotype_name,raw_DMS_directionality,raw_DMS_mutant_column,weight_file_name,pdb_file,ProteinGym_version,raw_mut_offset,coarse_selection_type -A0A140D2T1_ZIKV_Sourisseau_2019,A0A140D2T1_ZIKV_Sourisseau_2019.csv,A0A140D2T1_ZIKV,Virus,Zika virus,MKNPKKKSGGFRIVNMLKRGVARVNPLGGLKRLPAGLLLGHGPIRMVLAILAFLRFTAIKPSLGLINRWGSVGKKEAMEIIKKFKKDLAAMLRIINARKERKRRGADTSIGIIGLLLTTAMAAEITRRGSAYYMYLDRSDAGKAISFATTLGVNKCHVQIMDLGHMCDATMSYECPMLDEGVEPDDVDCWCNTTSTWVVYGTCHHKKGEARRSRRAVTLPSHSTRKLQTRSQTWLESREYTKHLIKVENWIFRNPGFALVAVAIAWLLGSSTSQKVIYLVMILLIAPAYSIRCIGVSNRDFVEGMSGGTWVDVVLEHGGCVTVMAQDKPTVDIELVTTTVSNMAEVRSYCYEASISDMASDSRCPTQGEAYLDKQSDTQYVCKRTLVDRGWGNGCGLFGKGSLVTCAKFTCSKKMTGKSIQPENLEYRIMLSVHGSQHSGMIVNDTGYETDENRAKVEVTPNSPRAEATLGGFGSLGLDCEPRTGLDFSDLYYLTMNNKHWLVHKEWFHDIPLPWHAGADTGTPHWNNKEALVEFKDAHAKRQTVVVLGSQEGAVHTALAGALEAEMDGAKGKLFSGHLKCRLKMDKLRLKGVSYSLCTAAFTFTKVPAETLHGTVTVEVQYAGTDGPCKIPVQMAVDMQTLTPVGRLITANPVITESTENSKMMLELDPPFGDSYIVIGVGDKKITHHWHRSGSTIGKAFEATVRGAKRMAVLGDTAWDFGSVGGVFNSLGKGIHQIFGAAFKSLFGGMSWFSQILIGTLLVWLGLNTKNGSISLTCLALGGVMIFLSTAVSADVGCSVDFSKKETRCGTGVFIYNDVEAWRDRYKYHPDSPRRLAAAVKQAWEEGICGISSVSRMENIMWKSVEGELNAILEENGVQLTVVVGSVKNPMWRGPQRLPVPVNELPHGWKAWGKSYFVRAAKTNNSFVVDGDTLKECPLEHRAWNSFLVEDHGFGVFHTSVWLKVREDYSLECDPAVIGTAVKGREAAHSDLGYWIESEKNDTWRLKRAHLIEMKTCEWPKSHTLWTDGVEESDLIIPKSLAGPLSHHNTREGYRTQVKGPWHSEELEIRFEECPGTKVYVEETCGTRGPSLRSTTASGRVIEEWCCRECTMPPLSFRAKDGCWYGMEIRPRKEPESNLVRSMVTAGSTDHMDHFSLGVLVILLMVQEGLKKRMTTKIIMSTSMAVLVVMILGGFSMSDLAKLVILMGATFAEMNTGGDVAHLALVAAFKVRPALLVSFIFRANWTPRESMLLALASCLLQTAISALEGDLMVLINGFALAWLAIRAMAVPRTDNIALPILAALTPLARGTLLVAWRAGLATCGGIMLLSLKGKGSVKKNLPFVMALGLTAVRVVDPINVVGLLLLTRSGKRSWPPSEVLTAVGLICALAGGFAKADIEMAGPMAAVGLLIVSYVVSGKSVDMYIERAGDITWEKDAEVTGNSPRLDVALDESGDFSLVEEDGPPMREIILKVVLMAICGMNPIAIPFAAGAWYVYVKTGKRSGALWDVPAPKEVKKGETTDGVYRVMTRRLLGSTQVGVGVMQEGVFHTMWHVTKGAALRSGEGRLDPYWGDVKQDLVSYCGPWKLDAAWDGLSEVQLLAVPPGERARNIQTLPGIFKTKDGDIGAVALDYPAGTSGSPILDKCGRVIGLYGNGVVIKNGSYVSAITQGKREEETPVECFEPSMLKKKQLTVLDLHPGAGKTRRVLPEIVREAIKKRLRTVILAPTRVVAAEMEEALRGLPVRYMTTAVNVTHSGTEIVDLMCHATFTSRLLQPIRVPNYNLYIMDEAHFTDPSSIAARGYISTRVEMGEAAAIFMTATPPGTRDAFPDSNSPIMDTEVEVPERAWSSGFDWVTDHSGKTVWFVPSVRNGNEIAACLTKAGKRVIQLSRKTFETEFQKTKNQEWDFVITTDISEMGANFKADRVIDSRRCLKPVILDGERVILAGPMPVTHASAAQRRGRIGRNPNKPGDEYMYGGGCAETDEGHAHWLEARMLLDNIYLQDGLIASLYRPEADKVAAIEGEFKLRTEQRKTFVELMKRGDLPVWLAYQVASAGITYTDRRWCFDGTTNNTIMEDSVPAEVWTKYGEKRVLKPRWMDARVCSDHAALKSFKEFAAGKRGAALGVMEALGTLPGHMTERFQEAIDNLAVLMRAETGSRPYKAAAAQLPETLETIMLLGLLGTVSLGIFFVLMRNKGIGKMGFGMVTLGASAWLMWLSEIEPARIACVLIVVFLLLVVLIPEPEKQRSPQDNQMAIIIMVAVGLLGLITANELGWLERTKNDIAHLMGRREEGATMGFSMDIDLRPASAWAIYAALTTLITPAVQHAVTTSYNNYSLMAMATQAGVLFGMGKGMPFYAWDLGVPLLMMGCYSQLTPLTLIVAIILLVAHYMYLIPGLQAAAARAAQKRTAAGIMKNPVVDGIVVTDIDTMTIDPQVEKKMGQVLLIAVAISSAVLLRTAWGWGEAGALITAATSTLWEGSPNKYWNSSTATSLCNIFRGSYLAGASLIYTVTRNAGLVKRRGGGTGETLGEKWKARLNQMSALEFYSYKKSGITEVCREEARRALKDGVATGGHAVSRGSAKLRWLVERGYLQPYGKVVDLGCGRGGWSYYAATIRKVQEVRGYTKGGPGHEEPMLVQSYGWNIVRLKSGVDVFHMAAEPCDTLLCDIGESSSSPEVEETRTLRVLSMVGDWLEKRPGAFCIKVLCPYTSTMMETMERLQRRHGGGLVRVPLSRNSTHEMYWVSGAKSNIIKSVSTTSQLLLGRMDGPRRPVKYEEDVNLGSGTRAVASCAEAPNMKIIGRRIERIRNEHAETWFLDENHPYRTWAYHGSYEAPTQGSASSLVNGVVRLLSKPWDVVTGVTGIAMTDTTPYGQQRVFKEKVDTRVPDPQEGTRQVMNIVSSWLWKELGKRKRPRVCTKEEFINKVRSNAALGAIFEEEKEWKTAVEAVNDPRFWALVDREREHHLRGECHSCVYNMMGKREKKQGEFGKAKGSRAIWYMWLGARFLEFEALGFLNEDHWMGRENSGGGVEGLGLQRLGYILEEMNRAPGGKMYADDTAGWDTRISKFDLENEALITNQMEEGHRTLALAVIKYTYQNKVVKVLRPAEGGKTVMDIISRQDQRGSGQVVTYALNTFTNLVVQLIRNMEAEEVLEMQDLWLLRKPEKVTRWLQSNGWDRLKRMAVSGDDCVVKPIDDRFAHALRFLNDMGKVRKDTQEWKPSTGWSNWEEVPFCSHHFNKLYLKDGRSIVVPCRHQDELIGRARVSPGAGWSIRETACLAKSYAQMWQLLYFHRRDLRLMANAICSAVPVDWVPTGRTTWSIHGKGEWMTTEDMLMVWNRVWIEENDHMEDKTPVTKWTDIPYLGKREDLWCGSLIGHRPRTTWAENIKDTVNMVRRIIGDEEKYMDYLSTQVRYLGEEGSTPGVL,3423,False,9576,9576,0,0.04324892146,median,Sourisseau,Deep Mutational Scanning Comprehensively Maps How Zika Envelope Protein Mutations Affect Viral Growth and Antibody Escape,2019,10.1128/JVI.01291-19,291-794,Zika virus env,Viral replication,Growth,A0A140D2T1_ZIKV_theta0.99_281-804_11-26-2021_b02.a2m,281,804,524,0.2,0.01,16501,0.948,497.0,1357.9,2.732193159,medium,329.0,0.661971831,A0A140D2T1_ZIKV_Sourisseau_growth_2019.csv,effect,1,mutant,A0A140D2T1_ZIKV_theta_0.01.npy,A0A140D2T1_ZIKV.pdb,0.1,,OrganismalFitness -A0A192B1T2_9HIV1_Haddox_2018,A0A192B1T2_9HIV1_Haddox_2018.csv,A0A192B1T2_9HIV1,Virus,HIV,MRVKGIQMNSQHLLRWGIMILGMIMICSVAGNLWVTVYYGVPVWKDAETTLFCASDAKAYDAEVHNIWATHACVPTDPNPQEINLENVTEEFNMWKNNMVEQMHTDIISLWDQGLKPCVKLTPLCVTLDCHNVTYNITSDMKEEITNCSYNVTTVIRDKKQKVSSLFYKLDVVQIGGNNRTNSQYRLINCNTSAITQACPKVTFEPIPIHYCAPAGFAILKCKDEKFNGTGLCKNVSTVQCTHGIKPVVSTQLLLNGSLAEGEVRIRSENITNNAKNIIVQLASPVTINCIRPNNNTRKSVHLGPGQAFYATDGIIGEIRQAHCNVSKKEWNSTLQKVANQLRPYFKNNTIIKFANSSGGDLEITTHSFNCGGEFFYCNTSGLFNSTWEFNSTWNNSNSTENITLQCRIKQIINMWQRAGQAIYAPPIPGVIRCKSNITGLILTRDGGSNKNTSETFRPGGGDMRDNWRSELYKYKVVKIEPIGVAPTRAKRRVVEREKRAVGIGAVFIGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQSNLLRAIEAQQHLLKLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKSQDEIWGNMTWLQWDKEVSNYTQIIYTLIEESQNQQEKNEQDLLALDKWASLWNWFNISQWLWYIKIFIIIVGGLIGLRIVFAVLSVINRVRQGYSPLSFQTRTPNPGELDRPGRIEEEGGEQDRGRSIRLVSGFLALAWDDLRSLCLFSYHRLRDFILIATRTVELLGHSSLKGLRLGWESLKYLGNLLVYWGRELKISAINLCDTIAIAVAGWTDRVIELGQRLCRAILHIPRRIRQGFERALL,852,False,12577,12577,0,-2.2,manual,Haddox,Mapping mutational effects along the evolutionary landscape of HIV envelope,2018,10.7554/eLife.34420,30-691,HIV env (BF520),Viral replication,Growth,A0A192B1T2_9HIV1_theta0.99_full_11-26-2021_b09.a2m,1,852,852,0.9,0.01,74854,0.986,840.0,36319.9,43.23797619,medium,2382.0,2.835714286,A0A192B1T2_9HIV1_Haddox_2018.csv,fitness,1,mutant,A0A192B1T2_9HIV1_theta_0.01.npy,A0A192B1T2_9HIV1.pdb,0.1,,OrganismalFitness -A0A1I9GEU1_NEIME_Kennouche_2019,A0A1I9GEU1_NEIME_Kennouche_2019.csv,A0A1I9GEU1_NEIME,Prokaryote,Neisseria meningitidis,FTLIELMIVIAIVGILAAVALPAYQDYTARAQVSEAILLAEGQKSAVTEYYLNHGEWPGDNSSAGVATSADIKGKYVQSVTVANGVITAQMASSNVNNEIKSKKLSLWAKRQNGSVKWFCGQPVTRTTATATDVAAANGKTDDKINTKHLPSTCRDDSSAS,161,False,922,922,0,0.141,median,Kennouche,Deep mutational scanning of the Neisseria meningitidis major pilin reveals the importance of pilus tip-mediated adhesion,2019,10.15252/embj.2019102145,1-161,pilin (PilE),"piliation (20D9 anti-pilus monoclonal Ab), aggregation, adhesion (human umbilical vein endothelial cells (HUVECs))",,A0A1I9GEU1_NEIME_full_11-26-2021_b08.a2m,1,161,161,0.8,0.2,5553,0.857,138.0,2183.6,15.82318841,medium,72.0,0.5217391304,A0A1I9GEU1_NEIME_Kennouche_2019.csv,piliation_log2_ratio,1,mutants,A0A1I9GEU1_NEIME_theta_0.2.npy,A0A1I9GEU1_NEIME.pdb,0.1,,Activity -A0A247D711_LISMN_Stadelmann_2021,A0A247D711_LISMN_Stadelmann_2021.csv,A0A247D711_LISMN,Eukaryote,Listeria monocytogenes,MNINDLIREIKNKDYTVKLSGTDSNSITQLIIRVNNDGNEYVISESENESIVEKFISAFKNGWNQEYEDEEEFYNDMQTITLKSELN,87,False,1653,1653,0,-0.0155627327,median,Stadelmann,A deep mutational scanning platform to characterize the fitness landscape of anti-CRISPR proteins,2021,10.1101/2021.08.21.457204,1-87,Anti-CRISPR protein AcrIIA4,activity against SpyCas9 inducing an RFP reporter,Flow cytometry,A0A247D711_LISMN_full_b0.3.a2m,1,87,87,0.2,0.2,1316890,1.0,87.0,188739.9,2169.424138,High,209.0,2.402298851,A0A247D711_LISMN_Stadelmann_2021.csv,mean_prediction,1,mutant,A0A247D711_LISMN_b03_theta_0.2.npy,A0A247D711_LISMN.pdb,1.0,,Activity -A0A2Z5U3Z0_9INFA_Doud_2016,A0A2Z5U3Z0_9INFA_Doud_2016.csv,A0A2Z5U3Z0_9INFA,Virus,influenza H1N1,MKAKLLVLLYAFVATDADTICIGYHANNSTDTVDTILEKNVAVTHSVNLLEDSHNGKLCKLKGIAPLQLGKCNITGWLLGNPECDSLLPARSWSYIVETPNSENGACYPGDLIDYEELREQLSSVSSLERFEIFPKESSWPNHTFNGVTVSCSHRGKSSFYRNLLWLTKKGDSYPKLTNSYVNNKGKEVLVLWGVHHPSSSDEQQSLYSNGNAYVSVASSNYNRRFTPEIAARPKVRDQHGRMNYYWTLLEPGDTIIFEATGNLIAPWYAFALSRGFESGIITSNASMHECNTKCQTPQGAINSNLPFQNIHPVTIGECPKYVRSTKLRMVTGLRNIPSIQYRGLFGAIAGFIEGGWTGMIDGWYGYHHQNEQGSGYAADQKSTQNAINGITNKVNSVIEKMNTQFTAVGKEFNNLEKRMENLNKKVDDGFLDIWTYNAELLVLLENERTLDFHDLNVKNLYEKVKSQLKNNAKEIGNGCFEFYHKCDNECMESVRNGTYDYPKYSEESKLNREKIDGVKLESMGVYQILAIYSTVASSLVLLVSLGAISFWMCSNGSLQCRICI,565,False,10715,10715,0,-2.239942981,median,Doud,Accurate Measurement of the Effects of All Amino-Acid Mutations on Influenza Hemagglutinin,2016,10.3390/v8060155,2-565,Influenza hemagglutinin,viral replication,Growth,A0A2Z5U3Z0_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,565,565,0.9,0.01,57581,0.968,547.0,9809.4,17.93308958,medium,925.0,1.691042048,A0A2Z5U3Z0_9INFA_Doud_2016.csv,transformed_pref,1,mutant,A0A2Z5U3Z0_9INFA_theta_0.01.npy,A0A2Z5U3Z0_9INFA.pdb,0.1,,OrganismalFitness -A0A2Z5U3Z0_9INFA_Wu_2014,A0A2Z5U3Z0_9INFA_Wu_2014.csv,A0A2Z5U3Z0_9INFA,Virus,Influenza A virus (A/WSN/1933(H1N1)),MKAKLLVLLYAFVATDADTICIGYHANNSTDTVDTILEKNVAVTHSVNLLEDSHNGKLCKLKGIAPLQLGKCNITGWLLGNPECDSLLPARSWSYIVETPNSENGACYPGDLIDYEELREQLSSVSSLERFEIFPKESSWPNHTFNGVTVSCSHRGKSSFYRNLLWLTKKGDSYPKLTNSYVNNKGKEVLVLWGVHHPSSSDEQQSLYSNGNAYVSVASSNYNRRFTPEIAARPKVRDQHGRMNYYWTLLEPGDTIIFEATGNLIAPWYAFALSRGFESGIITSNASMHECNTKCQTPQGAINSNLPFQNIHPVTIGECPKYVRSTKLRMVTGLRNIPSIQYRGLFGAIAGFIEGGWTGMIDGWYGYHHQNEQGSGYAADQKSTQNAINGITNKVNSVIEKMNTQFTAVGKEFNNLEKRMENLNKKVDDGFLDIWTYNAELLVLLENERTLDFHDLNVKNLYEKVKSQLKNNAKEIGNGCFEFYHKCDNECMESVRNGTYDYPKYSEESKLNREKIDGVKLESMGVYQILAIYSTVASSLVLLVSLGAISFWMCSNGSLQCRICI,565,False,2350,2350,0,0.0947955855,median,Wu,High-throughput profiling of influenza A virus hemagglutinin gene at single-nucleotide resolution,2014,10.1038/srep04942,6-560,Influenza hemagglutinin,Viral replication,Growth,A0A2Z5U3Z0_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,565,565,0.9,0.01,57581,0.968,547.0,9809.4,17.93308958,medium,925.0,1.691042048,A0A2Z5U3Z0_9INFA_Wu_2014.csv,RF Index,1,mutant,A0A2Z5U3Z0_9INFA_theta_0.01.npy,A0A2Z5U3Z0_9INFA.pdb,0.1,,OrganismalFitness -A4_HUMAN_Seuma_2022,A4_HUMAN_Seuma_2022.csv,A4_HUMAN,Human,Homo sapiens,MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMNVQNGKWDSDPSGTKTCIDTKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPHFVIPYRCLVGEFVSDALLVPDKCKFLHQERMDVCETHLHWHTVAKETCSEKSTNLHDYGMLLPCGIDKFRGVEFVCCPLAEESDNVDSADAEEDDSDVWWGGADTDYADGSEDKVVEVAEEEEVAEVEEEEADDDEDDEDGDEVEEEAEEPYEEATERTTSIATTTTTTTESVEEVVREVCSEQAETGPCRAMISRWYFDVTEGKCAPFFYGGCGGNRNNFDTEEYCMAVCGSAMSQSLLKTTQEPLARDPVKLPTTAASTPDAVDKYLETPGDENEHAHFQKAKERLEAKHRERMSQVMREWEEAERQAKNLPKADKKAVIQHFQEKVESLEQEAANERQQLVETHMARVEAMLNDRRRLALENYITALQAVPPRPRHVFNMLKKYVRAEQKDRQHTLKHFEHVRMVDPKKAAQIRSQVMTHLRVIYERMNQSLSLLYNVPAVAEEIQDEVDELLQKEQNYSDDVLANMISEPRISYGNDALMPSLTETKTTVELLPVNGEFSLDDLQPWHSFGADSVPANTENEVEPVDARPAADRGLTTRPGSGLTNIKTEEISEVKMDAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIATVIVITLVMLKKKQYTSIHHGVVEVDAAVTPEERHLSKMQQNGYENPTYKFFEQMQN,770,True,14811,796,14015,-2.0,manual,Seuma,"An atlas of amyloid aggregation: the impact of substitutions, insertions, deletions and truncations on amyloid beta fibril nucleation",2022,10.1038/s41467-022-34742-3,672-713,APP,aggregation,survival assessment assay,A4_HUMAN_2023-08-07_b01.a2m,1,770,770,0.1,0.2,5272,0.987,760.0,99.3,0.1306578947,Low,0.0,0.0,MS_BL_BB_indels_processed_data.tsv,nscore,1,mutant,A4_HUMAN_theta0.2_2023-08-07_b01.npy,A4_HUMAN.pdb,1.0,,Stability -A4D664_9INFA_Soh_2019,A4D664_9INFA_Soh_2019.csv,A4D664_9INFA,Virus,Influenza A virus,MERIKELRDLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPALRMKWMMAMKYPITADKRIMEMIPERNEQGQTLWSKTNDAGSDRVMVSPLAVTWWNRNGPTTSTVHYPKVYKTYFEKVERLKHGTFGPVHFRNQVKIRRRVDINPGHADLSAKEAQDVIMEVVFPNEVGARILTSESQLTITREKKEELQDCKIAPLMVAYMLERELVRKTRFLPVAGGTSSVYIEVLHLTQGTCWEQMYTPGGEVRNDDVDQSLIIAARNIVRRATVSADPLASLLEMCHSTQIGGIRMVDILRQNPTEEQAVDICKAAMGLRISSSFSFGGFTFKRTSGSSVKREEEVLTGNLQTLKIRVHEGYEEFTMVGRRATAILRKATRRLIQLIVSGRDEQSIAEAIIVALVFSQEDCMIKAVRGDLNFVNRANQRLNPMHQLLRHFQKDAKVLFQNWGIEPIDNVMGMIGILPDMTPSTEMSLRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTEKLTITYSSSMMWEINGPESVLVNTYQWIIRNWETVKIQWSQDPTMLYNKMEFEPFQSLVPKAARGQYSGFVRTLFQQMRDVLGTFDTVQIIKLLPFAAAPPEQSRMQFSSLTVNVRGSGMRILVRGNSPVFNYNKATKRLTVLGKDAGALTEDPDEGTAGVESAVLRGFLILGKEDKRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN,759,False,14421,14421,0,0.2170105627,median,Soh,Comprehensive mapping of adaptation of the avian influenza polymerase protein PB2 to humans,2019,10.7554/eLife.45079,1-759,Influenza polymerase basic protein 2,Viral replication (avian cells: CCL141 (duck)),Growth,A4D664_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,759,759,0.9,0.01,26683,1.0,759.0,1730.2,2.279578393,medium,3736.0,4.92226614,A4D664_9INFA_Soh_2019.csv,effectCCL141,1,mutant,A4D664_9INFA_theta_0.01.npy,A4D664_9INFA.pdb,0.1,,OrganismalFitness -A4GRB6_PSEAI_Chen_2020,A4GRB6_PSEAI_Chen_2020.csv,A4GRB6_PSEAI,Prokaryote,Pseudomonas aeruginosa,MFKLLSKLLVYLTASIMAIASPLAFSVDSSGEYPTVSEIPVGEVRLYQIADGVWSHIATQSFDGAVYPSNGLIVRDGDELLLIDTAWGAKNTAALLAEIEKQIGLPVTRAVSTHFHDDRVGGVDVLRAAGVATYASPSTRRLAEVEGNEIPTHSLEGLSSSGDAVRFGPVELFYPGAAHSTDNLIVYVPSASVLYGGCAIYELSRTSAGNVADADLAEWPTSIERIQQHYPEAQFVIPGHGLPGGLDLLKHTTNVVKAHTNRSVVE,266,False,5004,5004,0,-2.1,manual,Chen,"Comprehensive exploration of the translocation, stability and substrate recognition requirements in VIM-2 lactamase",2020,10.7554/eLife.56707,1-266,Beta-lactamase VIM-2,"drug resistance (128/16/2.0 ug/mL ampicillin, 4.0/0.5 ug/mL cefotaxime, 0.031 ug/mL meropenem @ 25C, 37C)",Antibiotics resistance,A4GRB6_PSEAI_full_11-26-2021_b03.a2m,1,266,266,0.3,0.2,108496,0.726,193.0,31234.2,161.8352332,high,317.0,1.642487047,A4GRB6_PSEAI_Chen_2020.csv,0.031ug_mL_MEM_37C,1,mutant,A4GRB6_PSEAI_theta_0.2.npy,A4GRB6_PSEAI.pdb,0.1,,OrganismalFitness -AACC1_PSEAI_Dandage_2018,AACC1_PSEAI_Dandage_2018.csv,AACC1_PSEAI,Prokaryote,Pseudomonas aeruginosa,MLRSSNDVTQQGSRPKTKLGGSSMGIIRTCRLGPDQVKSMRAALDLFGREFGDVATYSQHQPDSDYLGNLLRSKTFIALAAFDQEAVVGALAAYVLPKFEQPRSEIYIYDLAVSGEHRRQGIATALINLLKHEANALGAYVIYVQADYGDDPAVALYTKLGIREEVMHFDIDPSTAT,177,False,1801,1801,0,0.7172234411,median,Dandage,Differential strengths of molecular determinants guide environment specific mutational fates,2018,10.1371/journal.pgen.1007419,12-172,GMR (aacC1),"Antibiotic resistance under: heat/cold resistance (32C, 37C (ref), 42C), chemical stability (chemical chaperones TMAO, glycerol), antibiotic resistance (gentamicin), or combo",Antibiotics resistance,AACC1_PSEAI_full_04-29-2022_b03.a2m,1,177,177,0.3,0.2,539868,0.746,132.0,170256.3,1289.820455,high,235.0,1.78030303,AACC1_PSEAI_Dandage_2018.csv,30C,1,Mutation,AACC1_PSEAI_theta_0.2.npy,AACC1_PSEAI.pdb,0.1,,OrganismalFitness -ACE2_HUMAN_Chan_2020,ACE2_HUMAN_Chan_2020.csv,ACE2_HUMAN,Human,Homo sapiens,MSSSSWLLLSLVAVTAAQSTIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADQSIKVRISLKSALGDKAYEWNDNEMYLFRSSVAYAMRQYFLKVKNQMILFGEEDVRVANLKPRISFNFFVTAPKNVSDIIPRTEVEKAIRMSRSRINDAFRLNDNSLEFLGIQPTLGPPNQPPVSIWLIVFGVVMGVIVVGIVILIFTGIRDRKKKNKARSGENPYASIDISKGENNPGFQNTDDVQTSF,805,False,2223,2223,0,-0.266564268,median,Chan,Engineering human ACE2 to optimize binding to the spike protein of SARS coronavirus 2,2020,10.1126/science.abc0870,19-518,ACE2,Binding affinity,Flow Cytometry Assay,ACE2_HUMAN_2023-10-12_b05.a2m,1,805,805,0.5,0.2,11106,0.743,598.0,1506.7,2.519565217,Medium,349.0,0.5836120401,,score,1,mutant,ACE2_HUMAN_theta0.2_2023-10-12_b05.npy,ACE2_HUMAN.pdb,1.0,,Binding -ADRB2_HUMAN_Jones_2020,ADRB2_HUMAN_Jones_2020.csv,ADRB2_HUMAN,Human,Homo sapiens,MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL,413,False,7800,7800,0,1.859961867,median,Jones,Structural and Functional Characterization of G Protein-Coupled Receptors with Deep Mutational Scanning,2020,10.7554/eLife.54895,2-413,ADRB2,"transcription (luciferase reporter, isoproterenol (beta2AR agonist)-induced)",Receptor activity,ADRB2_HUMAN_full_11-26-2021_b03.a2m,1,413,413,0.3,0.2,204722,0.712,294.0,25459.6,86.59727891,medium,234.0,0.7959183673,ADRB2_HUMAN_Jones_2020.csv,0.625,1,mutant_id,ADRB2_HUMAN_theta_0.2.npy,ADRB2_HUMAN.pdb,0.1,,Activity -AICDA_HUMAN_Gajula_2014_3cycles,AICDA_HUMAN_Gajula_2014_3cycles.csv,AICDA_HUMAN,Human,Homo sapiens,MDSLLMNRRKFLYQFKNVRWAKGRRETYLCYVVKRRDSATSFSLDFGYLRNKNGCHVELLFLRYISDWDLDPGRCYRVTWFTSWSPCYDCARHVADFLRGNPNLSLRIFTARLYFCEDRKAEPEGLRRLHRAGVQIAIMTFKDYFYCWNTFVENHERTFKAWEGLHENSVRLSRQLRRILLPLYEVDDLRDAFRTLGL,198,False,209,209,0,1.0,manual,Gajula,High-throughput mutagenesis reveals functional determinants for DNA targeting by activation-induced deaminase,2014,10.1093/nar/gku689,113-123,AID,Enzymatic activity,bulk RNA-sequencing,AICDA_HUMAN_2023-08-07_b01.a2m,1,198,198,0.1,0.2,18148,0.879,174.0,3340.0,19.1954023,Medium,101.0,0.5804597701,urn_mavedb_00000106-c-1_scores.csv,DMS_score,1,mutant,AICDA_HUMAN_theta0.2_2023-08-07_b01.npy,AICDA_HUMAN.pdb,1.0,,Activity -AMFR_HUMAN_Tsuboyama_2023_4G3O,AMFR_HUMAN_Tsuboyama_2023_4G3O.csv,AMFR_HUMAN,Human,Homo sapiens,YFQGQLNAMAHQIQEMFPQVPYHLVLQDLQLTRSVEITTDNILEGRI,47,True,2972,820,2152,-1.504736022,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-6,1-47,E3 ubiquitin-protein ligase AMFR,Stability,cDNA display proteolysis,AMFR_HUMAN_2023-08-07_b04.a2m,1,47,47,0.4,0.2,17787,0.872,41.0,1166.9,28.46097561,Medium,12.0,0.2926829268,Tsuboyama2023_Dataset2_Dataset3,ddG_ML_float,1,mut_type,AMFR_HUMAN_theta0.2_2023-08-07_b04.npy,AMFR_HUMAN.pdb,1.0,,Stability -AMIE_PSEAE_Wrenbeck_2017,AMIE_PSEAE_Wrenbeck_2017.csv,AMIE_PSEAE,Prokaryote,Pseudomonas aeruginosa,MRHGDISSSNDTVGVAVVNYKMPRLHTAAEVLDNARKIAEMIVGMKQGLPGMDLVVFPEYSLQGIMYDPAEMMETAVAIPGEETEIFSRACRKANVWGVFSLTGERHEEHPRKAPYNTLVLIDNNGEIVQKYRKIIPWCPIEGWYPGGQTYVSEGPKGMKISLIICDDGNYPEIWRDCAMKGAELIVRCQGYMYPAKDQQVMMAKAMAWANNCYVAVANAAGFDGVYSYFGHSAIIGFDGRTLGECGEEEMGIQYAQLSLSQIRDARANDQSQNHLFKILHRGYSGLQASGDGDRGLAECPFEFYRTWVTDAEKARENVERLTRSTTGVAQCPVGRLPYEGLEKEA,346,False,6227,6227,0,-0.2222,median,Wrenbeck,Single-mutation fitness landscapes for an enzyme on multiple substrates reveal specificity is globally encoded,2017,10.1038/ncomms15695,1-341,Aliphatic amidase,Enzyme function,Growth,AMIE_PSEAE_full_11-26-2021_b02.a2m,1,346,346,0.2,0.2,140703,0.725,251.0,29959.3,119.359761,high,557.0,2.219123506,AMIE_PSEAE_Wrenbeck_2017.csv,isobutyramide_normalized_fitness,1,mutant,AMIE_PSEAE_theta_0.2.npy,AMIE_PSEAE.pdb,0.1,,Activity -ANCSZ_Hobbs_2022,ANCSZ_Hobbs_2022.csv,ANCSZ,Eukaryote,reconstructed ancestor,MADSANHLPYFYGSITREEAEDYLKQGGMSDGLFLLRQSLNSLGGYVLSVVYDRQCHHYTIERQLNGTYAIAGGKPHSGPAELCEYHSQDSDGLVCLLKKPCNRPPGVQPKVGPFEDLKDQLIREYVRQTWNLEGEALEQAIISQRPQLEKLIATTAHEKMPWFHGKISREESERRLLSGAQPNGKFLIRERDENGSYALSLLYEKKVYHYRIDRDKSGKLSIPDGKKFDTLWQLVEHYSHKPDGLLCVLTEPCPNPDSPAGALGAPAPPLPGSHPKLETAGGIISRIKSYSFPKPGFKKKPPSERPKSALNVNGYVPRPKPLGAEGGSRRAMPMDTNVYESPYSDPEELKDKKLYLKREQLMLEEGELGSGNFGTVKKGVYKMRKKEIPVAVKVLKSENDPAVKDELMKEAEFMHQLDNPYIVRMIGICEAESLMLVMELAPLGPLNKFLQKHKDQITVENIVELMHQVSMGMKYLEEKNFVHRDLAARNVLLVNQHYAKISDFGLSKALGADDNYYKAKTAGKWPLKWYAPECINFHKFSSKSDVWSFGVTMWEAFSYGQKPYKGMKGQEVLPFIENGERMECPAECPEEMYELMKDCWTYKADDRPGFVAVELRLRDYYYDISK,627,False,4670,4670,0,-0.0574121626,median,Hobbs,Saturation mutagenesis of a predicted ancestral Syk-family kinase,2022,10.1002/pro.4411,352-627,ancestral spleen tyrosine kinase,successful phosphorylation of bait peptide,enzymatic activity,ANCSZ_b0.4.a2m,1,627,627,0.4,0.2,7424,1.0,627.0,1036.7,1.653429027,Medium,109.0,0.1738437002,ANCSZ_Hobbs_2022.csv,DMS_value,1,mutant,ANCSZ_theta_0.2.npy,ANCSZ.pdb,1.0,,Activity -ARGR_ECOLI_Tsuboyama_2023_1AOY,ARGR_ECOLI_Tsuboyama_2023_1AOY.csv,ARGR_ECOLI,Prokaryote,Escherichia coli (strain K12),QEELVKAFKALLKEEKFSSQGEIVAALQEQGFDNINQSKVSRMLTKFGAVRTRNAKMEMVYCLPAELGV,69,False,1287,1287,0,-0.4541373765,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-7,1-69,Arginine repressor,Stability,cDNA display proteolysis,ARGR_ECOLI_2023-08-07_b04.a2m,1,69,69,0.4,0.2,21443,0.913,63.0,3719.2,59.03492063,Medium,29.0,0.4603174603,Tsuboyama2023_Dataset2_Dataset4,ddG_ML_float,1,mut_type,ARGR_ECOLI_theta0.2_2023-08-07_b04.npy,ARGR_ECOLI.pdb,1.0,,Stability -B2L11_HUMAN_Dutta_2010_binding-Mcl-1,B2L11_HUMAN_Dutta_2010_binding-Mcl-1.csv,B2L11_HUMAN,Human,Homo sapiens,MAKQPSDVSSECDREGRQLQPAERPPQLRPGAPTSLQTEPQGNPEGNHGGEGDSCPHGSPQGPLAPPASPGPFATRSPLFIFMRRSSLLSRSSSGYFSFDTDRSPAPMSCDKSTQTPSPPCQAFNHYLSAMASMRQAEPADMRPEIWIAQELRRIGDEFNAYYARRVFLNNYQAAEDHPRMVILRLLRYIVRLVWRMH,198,False,170,170,0,16002529.37,median,Dutta,Determinants of BH3 Binding Specificity for Mcl-1 versus Bcl-xL,2010,10.1016/j.jmb.2010.03.058,148-159,BCL2L11,Binding to Mcl-1 (FACS; yeast-displayed and antibody stained for binding partner),FACS,B2L11_HUMAN_2023-08-07_b04.a2m,1,198,198,0.4,0.2,660,0.995,197.0,88.5,0.4492385787,Low,2.0,0.01015228426,,score,1,mut_proteingym,B2L11_HUMAN_theta0.2_2023-08-07_b04.npy,B2L11_HUMAN.pdb,1.0,147.0,Binding -BBC1_YEAST_Tsuboyama_2023_1TG0,BBC1_YEAST_Tsuboyama_2023_1TG0.csv,BBC1_YEAST,Eukaryote,Saccharomyces cerevisiae (strain ATCC 204508 / S288c) (Baker's yeast),EVPFKVVAQFPYKSDYEDDLNFEKDQEIIVTSVEDAEWYFGEYQDSNGDVIEGIFPKSFVAVQG,64,True,2069,1084,985,-1.271998543,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-8,1-64,Myosin tail region-interacting protein MTI1,Stability,cDNA display proteolysis,BBC1_YEAST_2023-08-07_b05.a2m,1,64,64,0.5,0.2,604824,0.844,54.0,17529.2,324.6148148,High,55.0,1.018518519,Tsuboyama2023_Dataset2_Dataset5,ddG_ML_float,1,mut_type,BBC1_YEAST_theta0.2_2023-08-07_b05.npy,BBC1_YEAST.pdb,1.0,,Stability -BCHB_CHLTE_Tsuboyama_2023_2KRU,BCHB_CHLTE_Tsuboyama_2023_2KRU.csv,BCHB_CHLTE,Prokaryote,Chlorobaculum tepidum,ELSWTAEAEKMLGKVPFFVRKKVRKNTDNYAREIGEPVVTADVFRKAKEHLG,52,True,1572,890,682,-0.9540616602,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-9,1-52,Light-independent protochlorophyllide reductase subunit B,Stability,cDNA display proteolysis,BCHB_CHLTE_2023-08-07_b04.a2m,1,52,52,0.4,0.2,12079,0.923,48.0,2630.8,54.80833333,Medium,18.0,0.375,Tsuboyama2023_Dataset2_Dataset6,ddG_ML_float,1,mut_type,BCHB_CHLTE_theta0.2_2023-08-07_b04.npy,BCHB_CHLTE.pdb,1.0,,Stability -BLAT_ECOLX_Deng_2012,BLAT_ECOLX_Deng_2012.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,False,4996,4996,0,-2.913548,median,Deng,Deep Sequencing of Systematic Combinatorial Libraries Reveals Œ≤-Lactamase Sequence Constraints at High Resolution,2012,10.1016/j.jmb.2012.09.014,24-286,Beta-lactamase TEM,"antibiotic resistance, MIC",Amp resistance,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215.0,47605.0,221.4186047,high,446.0,2.074418605,BLAT_ECOLX_Deng_2012.csv,ddG_stat,-1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,0.1,,OrganismalFitness -BLAT_ECOLX_Firnberg_2014,BLAT_ECOLX_Firnberg_2014.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,False,4783,4783,0,0.4257,median,Firnberg,"A Comprehensive, High-Resolution Map of a Gene's Fitness Landscape",2014,10.1093/molbev/msu081,24-286,Beta-lactamase TEM,Growth (0.25-1024 ug/mL ampicillin) doubling,Growth,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215.0,47605.0,221.4186047,high,446.0,2.074418605,BLAT_ECOLX_Firnberg_2014.csv,linear,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,0.1,,OrganismalFitness -BLAT_ECOLX_Jacquier_2013,BLAT_ECOLX_Jacquier_2013.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,False,989,989,0,-0.666666667,median,Jacquier,Capturing the mutational landscape of the beta-lactamase TEM-1,2013,10.1073/pnas.1215206110,24-286,Beta-lactamase TEM,MIC,Amoxicillin resistance,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215.0,47605.0,221.4186047,high,446.0,2.074418605,BLAT_ECOLX_Jacquier_2013.csv,MIC_score,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,0.1,,OrganismalFitness -BLAT_ECOLX_Stiffler_2015,BLAT_ECOLX_Stiffler_2015.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,False,4996,4996,0,-1.159498916,median,Stiffler,Evolvability as a Function of Purifying Selection in TEM-1 β-lactamase,2015,10.1016/j.cell.2015.01.035,24-286,Beta-lactamase TEM,Growth (10-2500 ug/mL ampicillin),Growth,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215.0,47605.0,221.4186047,high,446.0,2.074418605,BLAT_ECOLX_Stiffler_2015.csv,2500,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,0.1,,OrganismalFitness -BRCA1_HUMAN_Findlay_2018,BRCA1_HUMAN_Findlay_2018.csv,BRCA1_HUMAN,Human,Homo sapiens,MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEISLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENSSLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPCSENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLLASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKRKRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEKESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSEEIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKEFVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLGKAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVSKRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKDKPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSMSPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKLNAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDDLLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELPCFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLFSSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEAASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALEDLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHSCSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRAPESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSGLTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKMLNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTLGTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY,1863,False,1837,1837,0,-1.0,manual,Findlay,Accurate classification of BRCA1 variants with saturation genome editing,2018,10.1038/s41586-018-0461-z,1-1855,BRCA1,Growth,Growth,BRCA1_HUMAN_full_11-26-2021_b02.a2m,1,1863,1863,0.2,0.2,1008,0.769,1432.0,108.4,0.07569832402,low,0.0,0.0,BRCA1_HUMAN_Findlay_2018.csv,function_score,1,mutant,BRCA1_HUMAN_theta_0.2.npy,BRCA1_HUMAN.pdb,0.1,,OrganismalFitness -BRCA2_HUMAN_Erwood_2022_HEK293T,BRCA2_HUMAN_Erwood_2022_HEK293T.csv,BRCA2_HUMAN,Human,Homo sapiens,MPIGSKERPTFFEIFKTRCNKADLGPISLNWFEELSSEAPPYNSEPAEESEHKNNNYEPNLFKTPQRKPSYNQLASTPIIFKEQGLTLPLYQSPVKELDKFKLDLGRNVPNSRHKSLRTVKTKMDQADDVSCPLLNSCLSESPVVLQCTHVTPQRDKSVVCGSLFHTPKFVKGRQTPKHISESLGAEVDPDMSWSSSLATPPTLSSTVLIVRNEEASETVFPHDTTANVKSYFSNHDESLKKNDRFIASVTDSENTNQREAASHGFGKTSGNSFKVNSCKDHIGKSMPNVLEDEVYETVVDTSEEDSFSLCFSKCRTKNLQKVRTSKTRKKIFHEANADECEKSKNQVKEKYSFVSEVEPNDTDPLDSNVANQKPFESGSDKISKEVVPSLACEWSQLTLSGLNGAQMEKIPLLHISSCDQNISEKDLLDTENKRKKDFLTSENSLPRISSLPKSEKPLNEETVVNKRDEEQHLESHTDCILAVKQAISGTSPVASSFQGIKKSIFRIRESPKETFNASFSGHMTDPNFKKETEASESGLEIHTVCSQKEDSLCPNLIDNGSWPATTTQNSVALKNAGLISTLKKKTNKFIYAIHDETSYKGKKIPKDQKSELINCSAQFEANAFEAPLTFANADSGLLHSSVKRSCSQNDSEEPTLSLTSSFGTILRKCSRNETCSNNTVISQDLDYKEAKCNKEKLQLFITPEADSLSCLQEGQCENDPKSKKVSDIKEEVLAAACHPVQHSKVEYSDTDFQSQKSLLYDHENASTLILTPTSKDVLSNLVMISRGKESYKMSDKLKGNNYESDVELTKNIPMEKNQDVCALNENYKNVELLPPEKYMRVASPSRKVQFNQNTNLRVIQKNQEETTSISKITVNPDSEELFSDNENNFVFQVANERNNLALGNTKELHETDLTCVNEPIFKNSTMVLYGDTGDKQATQVSIKKDLVYVLAEENKNSVKQHIKMTLGQDLKSDISLNIDKIPEKNNDYMNKWAGLLGPISNHSFGGSFRTASNKEIKLSEHNIKKSKMFFKDIEEQYPTSLACVEIVNTLALDNQKKLSKPQSINTVSAHLQSSVVVSDCKNSHITPQMLFSKQDFNSNHNLTPSQKAEITELSTILEESGSQFEFTQFRKPSYILQKSTFEVPENQMTILKTTSEECRDADLHVIMNAPSIGQVDSSKQFEGTVEIKRKFAGLLKNDCNKSASGYLTDENEVGFRGFYSAHGTKLNVSTEALQKAVKLFSDIENISEETSAEVHPISLSSSKCHDSVVSMFKIENHNDKTVSEKNNKCQLILQNNIEMTTGTFVEEITENYKRNTENEDNKYTAASRNSHNLEFDGSDSSKNDTVCIHKDETDLLFTDQHNICLKLSGQFMKEGNTQIKEDLSDLTFLEVAKAQEACHGNTSNKEQLTATKTEQNIKDFETSDTFFQTASGKNISVAKESFNKIVNFFDQKPEELHNFSLNSELHSDIRKNKMDILSYEETDIVKHKILKESVPVGTGNQLVTFQGQPERDEKIKEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQGTSEITSFSHQWAKTLKYREACKDLELACETIEITAAPKCKEMQNSLNNDKNLVSIETVVPPKLLSDNLCRQTENLKTSKSIFLKVKVHENVEKETAKSPATCYTNQSPYSVIENSALAFYTSCSRKTSVSQTSLLEAKKWLREGIFDGQPERINTADYVGNYLYENNSNSTIAENDKNHLSEKQDTYLSNSSMSNSYSYHSDEVYNDSGYLSKNKLDSGIEPVLKNVEDQKNTSFSKVISNVKDANAYPQTVNEDICVEELVTSSSPCKNKNAAIKLSISNSNNFEVGPPAFRIASGKIVCVSHETIKKVKDIFTDSFSKVIKENNENKSKICQTKIMAGCYEALDDSEDILHNSLDNDECSTHSHKVFADIQSEEILQHNQNMSGLEKVSKISPCDVSLETSDICKCSIGKLHKSVSSANTCGIFSTASGKSVQVSDASLQNARQVFSEIEDSTKQVFSKVLFKSNEHSDQLTREENTAIRTPEHLISQKGFSYNVVNSSAFSGFSTASGKQVSILESSLHKVKGVLEEFDLIRTEHSLHYSPTSRQNVSKILPRVDKRNPEHCVNSEMEKTCSKEFKLSNNLNVEGGSSENNHSIKVSPYLSQFQQDKQQLVLGTKVSLVENIHVLGKEQASPKNVKMEIGKTETFSDVPVKTNIEVCSTYSKDSENYFETEAVEIAKAFMEDDELTDSKLPSHATHSLFTCPENEEMVLSNSRIGKRRGEPLILVGEPSIKRNLLNEFDRIIENQEKSLKASKSTPDGTIKDRRLFMHHVSLEPITCVPFRTTKERQEIQNPNFTAPGQEFLSKSHLYEHLTLEKSSSNLAVSGHPFYQVSATRNEKMRHLITTGRPTKVFVPPFKTKSHFHRVEQCVRNINLEENRQKQNIDGHGSDDSKNKINDNEIHQFNKNNSNQAVAVTFTKCEEEPLDLITSLQNARDIQDMRIKKKQRQRVFPQPGSLYLAKTSTLPRISLKAAVGGQVPSACSHKQLYTYGVSKHCIKINSKNAESFQFHTEDYFGKESLWTGKGIQLADGGWLIPSNDGKAGKEEFYRALCDTPGVDPKLISRIWVYNHYRWIIWKLAAMECAFPKEFANRCLSPERVLLQLKYRYDTEIDRSRRSAIKKIMERDDTAAKTLVLCVSDIISLSANISETSSNKTSSADTQKVAIIELTDGWYAVKAQLDPPLLAVLKNGRLTVGQKIILHGAELVGSPDACTPLEAPESLMLKISANSTRPARWYTKLGFFPDPRPFPLPLSSLFSDGGNVGCVDVIIQRAYPIQWMEKTSSGLYIFRNEREEEKEAAKYVEAQQKRLEALFTKIQEEFEEHEENTTKPYLPSRALTRQQVRALQDGAELYEAVKNAADPAYLEGYFSEEQLRALNNHRQMLNDKKQAQIQLEIRKAMESAEQKEQGLSRDVTTVWKLRIVSYSKKEKDSVILSIWRPSSDLYSLLTEGKRYRIYHLATSKSKSKSERANIQLAATKKTQYQQLPVSDEILFQIYQPREPLHFSKFLDPDFQPSCSEVDLIGFVVSVVKKTGLAPFVYLSDECYNLLAIKFWIDLNEDIIKPHMLIAASNLQWRPESKSGLLTLFAGDFSVFSASPKEGHFQETFNKMKNTVENIDILCNEAENKLMHILHANDPKWSTPTKDCTSGPYTAQIIPGTGNKLLMSSPNCEIYYQSPLSLCMAKRKSVSTPVSAQMTSKSCKGEKEIDDQKNCKKRRALDFLSRLPLPPPVSPICTFVSPAAQKAFQPPRSCGTKYETPIKKKELNSPQMTPFKKFNEISLLESNSIADEELALINTQALLSGSTGEKQFISVSESTRTAPTSSEDYLRLKRRCTTSLIKEQESSQASTEECEKNKQDTITTKKYI,3418,False,265,265,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,388-2654,BRCA2,Fitness,Growth,BRCA2_HUMAN_2023-10-12_b01.a2m,1,3418,3418,0.1,0.2,933,,,,,,,,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,BRCA2_HUMAN_theta0.2_2023-10-12_b01.npy,BRCA2_HUMAN_1-1000.pdb,1.0,,OrganismalFitness -C6KNH7_9INFA_Lee_2018,C6KNH7_9INFA_Lee_2018.csv,C6KNH7_9INFA,Virus,Influenza A virus (A/Perth/16/2009(H3N2)),MKTIIALSYILCLVFAQKLPGNDNSTATLCLGHHAVPNGTIVKTITNDQIEVTNATELVQSSSTGEICDSPHQILDGKNCTLIDALLGDPQCDDFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSKNSFFSRLNWLTHLNFKYPALNVTMPNNEQFDKLYIWGVLHPGTDKDQIFLYAQASGRITVSTKRSQQIVSPNIGSRPRVRNIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCNSECITPNGSIPNDKPFQNVNRITYGACPRYVKQNTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVALLGFIMWACQKGNIRCNICI,566,False,10754,10754,0,-1.720276237,median,Lee,Deep mutational scanning of hemagglutinin helps predict evolutionary fates of human H3N2 influenza variants,2018,10.1073/pnas.1806133115,1-566,Influenza hemagglutinin,Viral replication,Growth,C6KNH7_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,566,566,0.9,0.01,57453,0.977,553.0,10569.8,19.11356239,medium,964.0,1.743218807,C6KNH7_9INFA_Lee_2018.csv,log_fitness_by_syn_mut_fitness,1,mutant,C6KNH7_9INFA_theta_0.01.npy,C6KNH7_9INFA.pdb,0.1,,OrganismalFitness -CALM1_HUMAN_Weile_2017,CALM1_HUMAN_Weile_2017.csv,CALM1_HUMAN,Human,Homo sapiens,MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREADIDGDGQVNYEEFVQMMTAK,149,False,1813,1813,0,0.872790117,median,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-149,CALM1,Yeast growth,complementation,CALM1_HUMAN_full_11-26-2021_b03.a2m,1,149,149,0.3,0.2,177633,0.893,133.0,28985.1,217.9330827,high,96.0,0.7218045113,CALM1_HUMAN_Weile_2017.csv,screenscore,1,mutant,CALM1_HUMAN_theta_0.2.npy,CALM1_HUMAN.pdb,0.1,,OrganismalFitness -CAPSD_AAV2S_Sinai_2021,CAPSD_AAV2S_Sinai_2021.csv,CAPSD_AAV2S,Virus,Adeno-associated virus 2,MAADGYLPDWLEDTLSEGIRQWWKLKPGPPPPKPAERHKDDSRGLVLPGYKYLGPFNGLDKGEPVNEADAAALEHDKAYDRQLDSGDNPYLKYNHADAEFQERLKEDTSFGGNLGRAVFQAKKRVLEPLGLVEEPVKTAPGKKRPVEHSPVEPDSSSGTGKAGQQPARKRLNFGQTGDADSVPDPQPLGQPPAAPSGLGTNTMATGSGAPMADNNEGADGVGNSSGNWHCDSTWMGDRVITTSTRTWALPTYNNHLYKQISSQSGASNDNHYFGYSTPWGYFDFNRFHCHFSPRDWQRLINNNWGFRPKRLNFKLFNIQVKEVTQNDGTTTIANNLTSTVQVFTDSEYQLPYVLGSAHQGCLPPFPADVFMVPQYGYLTLNNGSQAVGRSSFYCLEYFPSQMLRTGNNFTFSYTFEDVPFHSSYAHSQSLDRLMNPLIDQYLYYLSRTNTPSGTTTQSRLQFSQAGASDIRDQSRNWLPGPCYRQQRVSKTSADNNNSEYSWTGATKYHLNGRDSLVNPGPAMASHKDDEEKFFPQSGVLIFGKQGSEKTNVDIEKVMITDEEEIRTTNPVATEQYGSVSTNLQRGNRQAATADVNTQGVLPGMVWQDRDVYLQGPIWAKIPHTDGHFHPSPLMGGFGLKHPPPQILIKNTPVPANPSTTFSAAKFASFITQYSTGQVSVEIEWELQKENSKRWNPEIQYTSNYNKSVNVDFTVDTNGVYSEPRPIGTRYLTRNL,735,True,42328,532,41796,-1.2,manual,Sinai,Generative AAV capsid diversification by latent interpolation,2021,10.1101/2021.04.16.440236,561-588,AAV,viability for AAV capsid production,,CAPSD_AAV2S_uniprot_t099_msc70_mcc70_b0.8.a2m,1,735,735,0.8,0.01,604,0.782,575.0,213.8,0.371826087,low,1943.0,3.379130435,CAPSD_AAV2S_Sinai_substitutions_2021.csv,viral_selection,1,mutant,CAPSD_AAV2S_theta_0.01.npy,CAPSD_AAV2S.pdb,0.1,,OrganismalFitness -CAR11_HUMAN_Meitlis_2020_gof,CAR11_HUMAN_Meitlis_2020_gof.csv,CAR11_HUMAN,Human,Homo sapiens,MPGGGPEMDDYMETLKDEEDALWENVECNRHMLSRYINPAKLTPYLRQCKVIDEQDEDEVLNAPMLPSKINRAGRLLDILHTKGQRGYVVFLESLEFYYPELYKLVTGKEPTRRFSTIVVEEGHEGLTHFLMNEVIKLQQQMKAKDLQRCELLARLRQLEDEKKQMTLTRVELLTFQERYYKMKEERDSYNDELVKVKDDNYNLAMRYAQLSEEKNMAVMRSRDLQLEIDQLKHRLNKMEEECKLERNQSLKLKNDIENRPKKEQVLELERENEMLKTKNQELQSIIQAGKRSLPDSDKAILDILEHDRKEALEDRQELVNRIYNLQEEARQAEELRDKYLEEKEDLELKCSTLGKDCEMYKHRMNTVMLQLEEVERERDQAFHSRDEAQTQYSQCLIEKDKYRKQIRELEEKNDEMRIEMVRREACIVNLESKLRRLSKDSNNLDQSLPRNLPVTIISQDFGDASPRTNGQEADDSSTSEESPEDSKYFLPYHPPQRRMNLKGIQLQRAKSPISLKRTSDFQAKGHEEEGTDASPSSCGSLPITNSFTKMQPPRSRSSIMSITAEPPGNDSIVRRYKEDAPHRSTVEEDNDSGGFDALDLDDDSHERYSFGPSSIHSSSSSHQSEGLDAYDLEQVNLMFRKFSLERPFRPSVTSVGHVRGPGPSVQHTTLNGDSLTSQLTLLGGNARGSFVHSVKPGSLAEKAGLREGHQLLLLEGCIRGERQSVPLDTCTKEEAHWTIQRCSGPVTLHYKVNHEGYRKLVKDMEDGLITSGDSFYIRLNLNISSQLDACTMSLKCDDVVHVRDTMYQDRHEWLCARVDPFTDHDLDMGTIPSYSRAQQLLLVKLQRLMHRGSREEVDGTHHTLRALRNTLQPEEALSTSDPRVSPRLSRASFLFGQLLQFVSRSENKYKRMNSNERVRIISGSPLGSLARSSLDATKLLTEKQEELDPESELGKNLSLIPYSLVRAFYCERRRPVLFTPTVLAKTLVQRLLNSGGAMEFTICKSDIVTRDEFLRRQKTETIIYSREKNPNAFECIAPANIEAVAAKNKHCLLEAGIGCTRDLIKSNIYPIVLFIRVCEKNIKRFRKLLPRPETEEEFLRVCRLKEKELEALPCLYATVEPDMWGSVEELLRVVKDKIGEEQRKTIWVDEDQL,1154,False,2374,2374,0,0.14475,manual,Meitlis,Multiplexed Functional Assessment of Genetic Variants in CARD11,2020,10.1016/j.ajhg.2020.10.015.,4-146,CARD11,Signaling (in presence of ibrutinib),survival assessment assay,CAR11_HUMAN_2023-10-12_b02.a2m,1,1154,1154,0.2,0.2,1352,0.998,1152.0,53.7,0.04661458333,Low,0.0,0.0,mmc2.xlsx,log2_score,1,mutant,CAR11_HUMAN_theta0.2_2023-10-12_b02.npy,CAR11_HUMAN.pdb,1.0,,OrganismalFitness -CAR11_HUMAN_Meitlis_2020_lof,CAR11_HUMAN_Meitlis_2020_lof.csv,CAR11_HUMAN,Human,Homo sapiens,MPGGGPEMDDYMETLKDEEDALWENVECNRHMLSRYINPAKLTPYLRQCKVIDEQDEDEVLNAPMLPSKINRAGRLLDILHTKGQRGYVVFLESLEFYYPELYKLVTGKEPTRRFSTIVVEEGHEGLTHFLMNEVIKLQQQMKAKDLQRCELLARLRQLEDEKKQMTLTRVELLTFQERYYKMKEERDSYNDELVKVKDDNYNLAMRYAQLSEEKNMAVMRSRDLQLEIDQLKHRLNKMEEECKLERNQSLKLKNDIENRPKKEQVLELERENEMLKTKNQELQSIIQAGKRSLPDSDKAILDILEHDRKEALEDRQELVNRIYNLQEEARQAEELRDKYLEEKEDLELKCSTLGKDCEMYKHRMNTVMLQLEEVERERDQAFHSRDEAQTQYSQCLIEKDKYRKQIRELEEKNDEMRIEMVRREACIVNLESKLRRLSKDSNNLDQSLPRNLPVTIISQDFGDASPRTNGQEADDSSTSEESPEDSKYFLPYHPPQRRMNLKGIQLQRAKSPISLKRTSDFQAKGHEEEGTDASPSSCGSLPITNSFTKMQPPRSRSSIMSITAEPPGNDSIVRRYKEDAPHRSTVEEDNDSGGFDALDLDDDSHERYSFGPSSIHSSSSSHQSEGLDAYDLEQVNLMFRKFSLERPFRPSVTSVGHVRGPGPSVQHTTLNGDSLTSQLTLLGGNARGSFVHSVKPGSLAEKAGLREGHQLLLLEGCIRGERQSVPLDTCTKEEAHWTIQRCSGPVTLHYKVNHEGYRKLVKDMEDGLITSGDSFYIRLNLNISSQLDACTMSLKCDDVVHVRDTMYQDRHEWLCARVDPFTDHDLDMGTIPSYSRAQQLLLVKLQRLMHRGSREEVDGTHHTLRALRNTLQPEEALSTSDPRVSPRLSRASFLFGQLLQFVSRSENKYKRMNSNERVRIISGSPLGSLARSSLDATKLLTEKQEELDPESELGKNLSLIPYSLVRAFYCERRRPVLFTPTVLAKTLVQRLLNSGGAMEFTICKSDIVTRDEFLRRQKTETIIYSREKNPNAFECIAPANIEAVAAKNKHCLLEAGIGCTRDLIKSNIYPIVLFIRVCEKNIKRFRKLLPRPETEEEFLRVCRLKEKELEALPCLYATVEPDMWGSVEELLRVVKDKIGEEQRKTIWVDEDQL,1154,False,2395,2395,0,-0.4635,manual,Meitlis,Multiplexed Functional Assessment of Genetic Variants in CARD11,2020,10.1016/j.ajhg.2020.10.015.,4-146,CARD11,Signaling,survival assessment assay,CAR11_HUMAN_2023-10-12_b02.a2m,1,1154,1154,0.2,0.2,1352,0.998,1152.0,53.7,0.04661458333,Low,0.0,0.0,mmc3.xlsx,log2_score,1,mutant,CAR11_HUMAN_theta0.2_2023-10-12_b02.npy,CAR11_HUMAN.pdb,1.0,,OrganismalFitness -CAS9_STRP1_Spencer_2017_positive,CAS9_STRP1_Spencer_2017_positive.csv,CAS9_STRP1,Eukaryote,Streptococcus pyogenes serotype M1,MDKKYSIGLDIGTNSVGWAVITDEYKVPSKKFKVLGNTDRHSIKKNLIGALLFDSGETAEATRLKRTARRRYTRRKNRICYLQEIFSNEMAKVDDSFFHRLEESFLVEEDKKHERHPIFGNIVDEVAYHEKYPTIYHLRKKLVDSTDKADLRLIYLALAHMIKFRGHFLIEGDLNPDNSDVDKLFIQLVQTYNQLFEENPINASGVDAKAILSARLSKSRRLENLIAQLPGEKKNGLFGNLIALSLGLTPNFKSNFDLAEDAKLQLSKDTYDDDLDNLLAQIGDQYADLFLAAKNLSDAILLSDILRVNTEITKAPLSASMIKRYDEHHQDLTLLKALVRQQLPEKYKEIFFDQSKNGYAGYIDGGASQEEFYKFIKPILEKMDGTEELLVKLNREDLLRKQRTFDNGSIPHQIHLGELHAILRRQEDFYPFLKDNREKIEKILTFRIPYYVGPLARGNSRFAWMTRKSEETITPWNFEEVVDKGASAQSFIERMTNFDKNLPNEKVLPKHSLLYEYFTVYNELTKVKYVTEGMRKPAFLSGEQKKAIVDLLFKTNRKVTVKQLKEDYFKKIECFDSVEISGVEDRFNASLGTYHDLLKIIKDKDFLDNEENEDILEDIVLTLTLFEDREMIEERLKTYAHLFDDKVMKQLKRRRYTGWGRLSRKLINGIRDKQSGKTILDFLKSDGFANRNFMQLIHDDSLTFKEDIQKAQVSGQGDSLHEHIANLAGSPAIKKGILQTVKVVDELVKVMGRHKPENIVIEMARENQTTQKGQKNSRERMKRIEEGIKELGSQILKEHPVENTQLQNEKLYLYYLQNGRDMYVDQELDINRLSDYDVDHIVPQSFLKDDSIDNKVLTRSDKNRGKSDNVPSEEVVKKMKNYWRQLLNAKLITQRKFDNLTKAERGGLSELDKAGFIKRQLVETRQITKHVAQILDSRMNTKYDENDKLIREVKVITLKSKLVSDFRKDFQFYKVREINNYHHAHDAYLNAVVGTALIKKYPKLESEFVYGDYKVYDVRKMIAKSEQEIGKATAKYFFYSNIMNFFKTEITLANGEIRKRPLIETNGETGEIVWDKGRDFATVRKVLSMPQVNIVKKTEVQTGGFSKESILPKRNSDKLIARKKDWDPKKYGGFDSPTVAYSVLVVAKVEKGKSKKLKSVKELLGITIMERSSFEKNPIDFLEAKGYKEVKKDLIIKLPKYSLFELENGRKRMLASAGELQKGNELALPSKYVNFLYLASHYEKLKGSPEDNEQKQLFVEQHKHYLDEIIEQISEFSKRVILADANLDKVLSAYNKHRDKPIREQAENIIHLFTLTNLGAPAAFKYFDTTIDRKRYTSTKEVLDATLIHQSITGLYETRIDLSQLGGD,1368,False,8117,8117,0,-0.2654328586,median,Spencer,Deep mutational scanning of S. pyogenes Cas9 reveals important functional domains,2017,10.1038/s41598-017-17081-y,1-1368,Streptococcus pyogenes Cas9,count of mutation where survival depends on expression of Cas9 and correct cleavage,Flow cytometry,CAS9_STRP1_2023-08-07_b01.a2m,1,1368,1368,0.1,0.2,5349,0.992,1357.0,1532.3,1.12918,Medium,241.0,0.17759764,SPCAS9_Spencer_positive_2022.csv,Log2 Fold Change after Positive Selection,1,mutant,CAS9_STRP1_theta0.2_2023-08-07_b01.npy,CAS9_STRP1.pdb,1.0,,Activity -CASP3_HUMAN_Roychowdhury_2020,CASP3_HUMAN_Roychowdhury_2020.csv,CASP3_HUMAN,Human,Homo sapiens,MSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYHLEHHHHHH,258,False,1567,1567,0,0.03725973017,median,Roychowdhury,Microfluidic deep mutational scanning of the human executioner caspases reveals differences in structure and regulation,2022,10.1038/s41420-021-00799-0,2-258,CASP3,Fluorescence measurement,,CASP3_HUMAN_2023-08-07_b01.a2m,1,258,258,0.1,0.2,86012,0.884,228.0,28096.2,123.2289474,High,307.0,1.346491228,CASP3_HUMAN_Roychowdhury_2020.csv,coef,1,mutant,CASP3_HUMAN_theta0.2_2023-08-07_b01.npy,CASP3_HUMAN.pdb,1.0,,Activity -CASP7_HUMAN_Roychowdhury_2020,CASP7_HUMAN_Roychowdhury_2020.csv,CASP7_HUMAN,Human,Homo sapiens,MAKPDRSSFVPSLFSKKKKNVTMRSIKTTRDRVPTYQYNMNFEKLGKCIIINNKNFDKVTGMGVRNGTDKDAEALFKCFRSLGFDVIVYNDCSCAKMQDLLKKASEEDHTNAACFACILLSHGEENVIYGKDGVTPIKDLTAHFRGDRCKTLLEKPKLFFIQACRGTELDDGIQADSGPINDTDANPRYKIPVEADFLFAYSTVPGYYSWRSPGRGSWFVQALCSILEEHGKDLEIMQILTRVNDRVARHFESQSDDPHFHEKKQIPCVVSMLTKELYFSQ,281,False,1680,1680,0,-0.3340768074,median,Roychowdhury,Microfluidic deep mutational scanning of the human executioner caspases reveals differences in structure and regulation,2022,10.1038/s41420-021-00799-0,2-281,CASP7,Fluorescence measurement,,CASP7_HUMAN_2023-08-07_b01.a2m,1,281,281,0.1,0.2,71075,0.854,240.0,21588.4,89.95166667,Medium,298.0,1.241666667,CASP7_HUMAN_Roychowdhury_2020.csv,coef,1,mutant,CASP7_HUMAN_theta0.2_2023-08-07_b01.npy,CASP7_HUMAN.pdb,1.0,,Activity -CATR_CHLRE_Tsuboyama_2023_2AMI,CATR_CHLRE_Tsuboyama_2023_2AMI.csv,CATR_CHLRE,Eukaryote,Chlamydomonas reinhardtii,GLTEEQKQEIREAFDLFDTDGSGTIDAKELKVAMRALGFEPKKEEIKKMISEIDKDGSGTIDFEEFLTMMTA,72,True,1903,1340,563,-0.5681612987,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-10,1-72,Caltractin,Stability,cDNA display proteolysis,CATR_CHLRE_2023-08-07_b03.a2m,1,72,72,0.3,0.2,551057,0.903,65.0,75596.9,1163.029231,High,57.0,0.8769230769,Tsuboyama2023_Dataset2_Dataset7,ddG_ML_float,1,mut_type,CATR_CHLRE_theta0.2_2023-08-07_b03.npy,CATR_CHLRE.pdb,1.0,,Stability -CBPA2_HUMAN_Tsuboyama_2023_1O6X,CBPA2_HUMAN_Tsuboyama_2023_1O6X.csv,CBPA2_HUMAN,Human,Homo sapiens,VGDQVLEIVPSNEEQIKNLLQLEAQEHLQLDFWKSPTTPGETAHVRVPFVNVQAVKVFLESQGIAYSIMIED,72,True,2068,1357,711,-1.221174658,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-11,1-72,Carboxypeptidase A2,Stability,cDNA display proteolysis,CBPA2_HUMAN_2023-08-07_b03.a2m,1,72,72,0.3,0.2,12711,0.986,71.0,3086.5,43.47183099,Medium,34.0,0.4788732394,Tsuboyama2023_Dataset2_Dataset8,ddG_ML_float,1,mut_type,CBPA2_HUMAN_theta0.2_2023-08-07_b03.npy,CBPA2_HUMAN.pdb,1.0,,Stability -CBS_HUMAN_Sun_2020,CBS_HUMAN_Sun_2020.csv,CBS_HUMAN,Human,Homo sapiens,MPSETPQAEVGPTGCPHRSGPHSAKGSLEKGSPEDKEAKEPLWIRPDAPSRCTWQLGRPASESPHHHTAPAKSPKILPDILKKIGDTPMVRINKIGKKFGLKCELLAKCEFFNAGGSVKDRISLRMIEDAERDGTLKPGDTIIEPTSGNTGIGLALAAAVRGYRCIIVMPEKMSSEKVDVLRALGAEIVRTPTNARFDSPESHVGVAWRLKNEIPNSHILDQYRNASNPLAHYDTTADEILQQCDGKLDMLVASVGTGGTITGIARKLKEKCPGCRIIGVDPEGSILAEPEELNQTEQTTYEVEGIGYDFIPTVLDRTVVDKWFKSNDEEAFTFARMLIAQEGLLCGGSAGSTVAVAVKAAQELQEGQRCVVILPDSVRNYMTKFLSDRWMLQKGFLKEEDLTEKKPWWWHLRVQELGLSAPLTVLPTITCGHTIEILREKGFDQAPVVDEAGVILGMVTLGNMLSSLLAGKVQPSDQVGKVIYKQFKQIRLTDTLGRLSHILEMDHFALVVHEQIQYHSTGKSSQRQMVFGVVTAIDLLNFVAAQERDQK,551,False,7217,7217,0,0.3753910128,median,Sun,A proactive genotype-to-patient-phenotype map for cystathionine beta-synthase,2020,10.1186/s13073-020-0711-1,2-551,cystathionine beta-synthase,Yeast Growth,Growth,CBS_HUMAN_2023-10-12_b08.a2m,1,551,551,0.8,0.2,19563,0.833,459.0,1886.0,4.108932462,Medium,289.0,0.6296296296,,score,1,mutant,CBS_HUMAN_theta0.2_2023-10-12_b08.npy,CBS_HUMAN.pdb,1.0,,OrganismalFitness -CBX4_HUMAN_Tsuboyama_2023_2K28,CBX4_HUMAN_Tsuboyama_2023_2K28.csv,CBX4_HUMAN,Human,Homo sapiens,AVESIEKKRIRKGRVEYLVKWRGWSPKYNTWEPEENILDPRLLIAFQNRE,50,True,2282,917,1365,-1.635037732,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-12,1-50,E3 SUMO-protein ligase CBX4,Stability,cDNA display proteolysis,CBX4_HUMAN_2023-08-07_b03.a2m,1,50,50,0.3,0.2,108263,0.96,48.0,13404.4,279.2583333,High,23.0,0.4791666667,Tsuboyama2023_Dataset2_Dataset9,ddG_ML_float,1,mut_type,CBX4_HUMAN_theta0.2_2023-08-07_b03.npy,CBX4_HUMAN.pdb,1.0,,Stability -CCDB_ECOLI_Adkar_2012,CCDB_ECOLI_Adkar_2012.csv,CCDB_ECOLI,Prokaryote,Escherichia coli,MQFKVYTYKRESRYRLFVDVQSDIIDTPGRRMVIPLASARLLSDKVSRELYPVVHIGDESWRMMTTDMASVPVSVIGEEVADLSHRENDIKNAINLMFWGI,101,False,1176,1176,0,-19.0,median,Adkar,Protein model discrimination using mutational sensitivity derived from deep sequencing,2012,10.1016/j.str.2011.11.021,2-101,Toxin CcdB,Protein toxicity (negative effect on cell growth),toxin activity,CCDB_ECOLI_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,43564,0.851,86.0,16821.5,195.5988372,high,61.0,0.7093023256,CCDB_ECOLI_Adkar_2012.csv,score,-1,mutant,CCDB_ECOLI_theta_0.2.npy,CCDB_ECOLI.pdb,0.1,,Activity -CCDB_ECOLI_Tripathi_2016,CCDB_ECOLI_Tripathi_2016.csv,CCDB_ECOLI,Prokaryote,Escherichia coli,MQFKVYTYKRESRYRLFVDVQSDIIDTPGRRMVIPLASARLLSDKVSRELYPVVHIGDESWRMMTTDMASVPVSVIGEEVADLSHRENDIKNAINLMFWGI,101,False,1663,1663,0,-3.5,manual,Tripathi,"Molecular Determinants of Mutant Phenotypes, Inferred from Saturation Mutagenesis Data",2016,10.1093/molbev/msw182,2-101,Toxin CcdB,growth (surrogate for toxicity/activity of CCDB),Growth,CCDB_ECOLI_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,43564,0.851,86.0,16821.5,195.5988372,high,61.0,0.7093023256,CCDB_ECOLI_Tripathi_2016.csv,score,-1,mutant,CCDB_ECOLI_theta_0.2.npy,CCDB_ECOLI.pdb,0.1,,OrganismalFitness -CCR5_HUMAN_Gill_2023,CCR5_HUMAN_Gill_2023.csv,CCR5_HUMAN,Human,Homo sapiens,MDYQVSSPIYDINYYTSEPCQKINVKQIAARLLPPLYSLVFIFGFVGNMLVILILINCKRLKSMTDIYLLNLAISDLFFLLTVPFWAHYAAAQWDFGNTMCQLLTGLYFIGFFSGIFFIILLTIDRYLAVVHAVFALKARTVTFGVVTSVITWVVAVFASLPGIIFTRSQKEGLHYTCSSHFPYSQYQFWKNFQTLKIVILGLVLPLLVMVICYSGILKTLLRCRNEKKRHRAVRLIFTIMIVYFLFWAPYNIVLLLNTFQEFFGLNNCSSSNRLDQAMQVTETLGMTHCCINPIIYAFVGEKFRNYLLVFFQKHIAKRFCKCCSIFQQEAPERASSVYTRSTGEQEISVGL,352,False,6137,6137,0,-0.06,median,Gill,Multiple mechanisms of self-association of chemokine receptors CXCR4 and CCR5 demonstrated by deep mutagenesis,2023,10.1101/2023.03.25.534231,2-344,CCR5,"binding affinity, surface expression",FACS,CCR5_HUMAN_2023-08-07_b03.a2m,1,352,352,0.3,0.2,632074,0.83,292.0,63056.0,215.9452055,High,309.0,1.058219178,,avg_score,1,mutant,CCR5_HUMAN_theta0.2_2023-08-07_b03.npy,CCR5_HUMAN.pdb,1.0,,Binding -CD19_HUMAN_Klesmith_2019_FMC_singles,CD19_HUMAN_Klesmith_2019_FMC_singles.csv,CD19_HUMAN,Human,Homo sapiens,MPPPRLLFFLLFLTPMEVRPEEPLVVKVEEGDNAVLQCLKGTSDGPTQQLTWSRESPLKPFLKLSLGLPGLGIHMRPLAIWLFIFNVSQQMGGFYLCQPGPPSEKAWQPGWTVNVEGSGELFRWNVSDLGGLGCGLKNRSSEGPSSPSGKLMSPKLYVWAKDRPEIWEGEPPCLPPRDSLNQSLSQDLTMAPGSTLWLSCGVPPDSVSRGPLSWTHVHPKGPKSLLSLELKDDRPARDMWVMETGLLLPRATAQDAGKYYCHRGNLTMSFHLEITARPVLWHWLLRTGGWKVSAVTLAYLIFCLCSLVGILHLQRALVLRRKRKRMTDPTRRFFKVTPPPGSGPQNQYGNVLSLPTPTSGLGRAQRWAAGLGGTAPSYGNPSSDVQADGALGSRSPPGVGPEEEEGEGYEEPDSEEDSEFYENDSNLGQDQLSQDGSGYENPEDEPLGPEDEDSFSNAESYENEDEELTQPVARTMDFLSPHGSAWDPSREATSLGSQSYEDMRGILYAAPQLRSIRGQPGPNHEEDADSYENMDNPDGPDPAWGGGGRMGTWSTR,556,False,3761,3761,0,0.0,manual,Klesmith,Retargeting CD19 Chimeric Antigen Receptor T Cells via Engineered CD19-Fusion Proteins,2019,10.1021/acs.molpharmaceut.9b00418,20-291,CD19,Binding affinity,FACS,CD19_HUMAN_2023-10-12_b01.a2m,1,556,556,0.1,0.2,1183,1.0,556.0,275.2,0.4949640288,Low,11.0,0.01978417266,single-site/Clinical_FMC_T1_Fitness.tsv,Fitness,1,mutant_offset,CD19_HUMAN_theta0.2_2023-10-12_b01.npy,CD19_HUMAN.pdb,1.0,,Binding -CP2C9_HUMAN_Amorosi_2021_abundance,CP2C9_HUMAN_Amorosi_2021_abundance.csv,CP2C9_HUMAN,Human,Homo sapiens,MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,490,False,6370,6370,0,0.7723244345,median,Amorosi,Massively parallel characterization of CYP2C9 variant enzyme activity and abundance,2021,10.1016/j.ajhg.2021.07.001,2-490,Cytochrome P450 2C9,protein abundance,protein abundance,CP2C9_HUMAN_full_11-26-2021_b04.a2m,1,490,490,0.4,0.2,264279,0.886,434.0,81212.1,187.1246544,high,1092.0,2.516129032,CP2C9_HUMAN_Amorosi_2021.csv,abundance_score,1,variant,CP2C9_HUMAN_theta_0.2.npy,CP2C9_HUMAN.pdb,0.1,,Expression -CP2C9_HUMAN_Amorosi_2021_activity,CP2C9_HUMAN_Amorosi_2021_activity.csv,CP2C9_HUMAN,Human,Homo sapiens,MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,490,False,6142,6142,0,0.5476104185,median,Amorosi,Massively parallel characterization of CYP2C9 variant enzyme activity and abundance,2021,10.1016/j.ajhg.2021.07.001,1-490,Cytochrome P450 2C9,"activity, binding (to fluorescent CYP probe)","Activity, binding",CP2C9_HUMAN_full_11-26-2021_b04.a2m,1,490,490,0.4,0.2,264279,0.886,434.0,81212.1,187.1246544,high,1092.0,2.516129032,CP2C9_HUMAN_Amorosi_2021.csv,activity_score,1,variant,CP2C9_HUMAN_theta_0.2.npy,CP2C9_HUMAN.pdb,0.1,,Binding -CSN4_MOUSE_Tsuboyama_2023_1UFM,CSN4_MOUSE_Tsuboyama_2023_1UFM.csv,CSN4_MOUSE,Eukaryote,Mus musculus,SSGGSSILDRAVIEHNLLSASKLYNNITFEELGALLEIPAAKAEKIASQMITEGRMNGFIDQIDGIVHFETR,72,True,3295,1353,1942,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-13,1-72,COP9 signalosome complex subunit 4,Stability,cDNA display proteolysis,CSN4_MOUSE_2023-08-07_b03.a2m,1,72,72,0.3,0.2,39217,0.889,64.0,3492.9,54.5765625,Medium,9.0,0.140625,Tsuboyama2023_Dataset2_Dataset10,ddG_ML_float,1,mut_type,CSN4_MOUSE_theta0.2_2023-08-07_b03.npy,CSN4_MOUSE.pdb,1.0,,Stability -CUE1_YEAST_Tsuboyama_2023_2MYX,CUE1_YEAST_Tsuboyama_2023_2MYX.csv,CUE1_YEAST,Eukaryote,Saccharomyces cerevisiae,GGHPVTTQMVETVQNLAPNLHPEQIRYSLENTGSVEETVERYLRGDEFSFPP,52,True,1580,955,625,-1.319713733,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-14,1-52,Coupling of ubiquitin conjugation to ER degradation protein 1,Stability,cDNA display proteolysis,CUE1_YEAST_2023-08-07_b08.a2m,1,52,52,0.8,0.2,3213,0.923,48.0,387.1,8.064583333,Medium,10.0,0.2083333333,Tsuboyama2023_Dataset2_Dataset11,ddG_ML_float,1,mut_type,CUE1_YEAST_theta0.2_2023-08-07_b08.npy,CUE1_YEAST.pdb,1.0,,Stability -D7PM05_CLYGR_Somermeyer_2022,D7PM05_CLYGR_Somermeyer_2022.csv,D7PM05_CLYGR,Eukaryote,Clytia gregaria,MTALTEGAKLFEKEIPYITELEGDVEGMKFIIKGEGTGDATTGTIKAKYICTTGDLPVPWATILSSLSYGVFCFAKYPRHIADFFKSTQPDGYSQDRIISFDNDGQYDVKAKVTYENGTLYNRVTVKGTGFKSNGNILGMRVLYHSPPHAVYILPDRKNGGMKIEYNKAFDVMGGGHQMARHAQFNKPLGAWEEDYPLYHHLTVWTSFGKDPDDDETDHLTIVEVIKAVDLETYR,235,True,24515,1169,23346,12500.0,manual,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-235,Green fluorescent protein cgreGFP,Fluorescence,FACS,D7PM05_CLYGR_full_b0.2.a2m,1,235,235,0.2,0.2,694,1.0,235.0,137.6,0.5855319149,Low,4.0,0.0170212766,D7PM05_CLYGR_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,D7PM05_CLYGR_theta_0.2.npy,D7PM05_CLYGR.pdb,1.0,,Activity -DLG4_HUMAN_Faure_2021,DLG4_HUMAN_Faure_2021.csv,DLG4_HUMAN,Human,Homo sapiens,MDCLCIVTTKKYRYQDEDTPPLEHSPAHLPNQANSPPVIVNTDTLEAPGYELQVNGTEGEMEYEEITLERGNSGLGFSIAGGTDNPHIGDDPSIFITKIIPGGAAAQDGRLRVNDSILFVNEVDVREVTHSAAVEALKEAGSIVRLYVMRRKPPAEKVMEIKLIKGPKGLGFSIAGGVGNQHIPGDNSIYVTKIIEGGAAHKDGRLQIGDKILAVNSVGLEDVMHEDAVAALKNTYDVVYLKVAKPSNAYLSDSYAPPDITTSYSQHLDNEISHSSYLGTDYPTAMTPTSPRRYSPVAKDLLGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAKIHDLREQLMNSSLGSGTASLRSNPKRGFYIRALFDYDKTKDCGFLSQALSFRFGDVLHVIDASDEEWWQARRVHSDSETDDIGFIPSKRRVERREWSRLKAKDWGSSSGSQGREDSVLSYETVTQMEVHYARPIIILGPTKDRANDDLLSEFPDKFGSCVPHTTRPKREYEIDGRDYHFVSSREKMEKDIQAHKFIEAGQYNSHLYGTSVQSVREVAEQGKHCILDVSANAVRRLQAAHLHPIAIFIRPRSLENVLEINKRITEEQARKAFDRATKLEQEFTECFSAIVEGDSFEEIYHKVKRVIEDLSGPYIWVPARERL,724,True,6976,1280,5696,-0.5602585328,median,Faure,Mapping the energetic and allosteric landscapes of protein binding domains,2022,10.1038/s41586-022-04586-4,311-394,PSD95-PDZ3,Yeast growth,Growth,DLG4_HUMAN_full_11-26-2021_b02.a2m,1,724,724,0.2,0.2,25338,0.825,597.0,354.3,0.5934673367,low,7.0,0.01172529313,DLG4_HUMAN_Faure_2021.csv,fitness,1,mutant,DLG4_HUMAN_theta_0.2.npy,DLG4_HUMAN.pdb,0.1,,OrganismalFitness -DLG4_RAT_McLaughlin_2012,DLG4_RAT_McLaughlin_2012.csv,DLG4_RAT,Eukaryote,Rattus norvegicus,MDCLCIVTTKKYRYQDEDTPPLEHSPAHLPNQANSPPVIVNTDTLEAPGYELQVNGTEGEMEYEEITLERGNSGLGFSIAGGTDNPHIGDDPSIFITKIIPGGAAAQDGRLRVNDSILFVNEVDVREVTHSAAVEALKEAGSIVRLYVMRRKPPAEKVMEIKLIKGPKGLGFSIAGGVGNQHIPGDNSIYVTKIIEGGAAHKDGRLQIGDKILAVNSVGLEDVMHEDAVAALKNTYDVVYLKVAKPSNAYLSDSYAPPDITTSYSQHLDNEISHSSYLGTDYPTAMTPTSPRRYSPVAKDLLGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAKIHDLREQLMNSSLGSGTASLRSNPKRGFYIRALFDYDKTKDCGFLSQALSFRFGDVLHVIDAGDEEWWQARRVHSDSETDDIGFIPSKRRVERREWSRLKAKDWGSSSGSQGREDSVLSYETVTQMEVHYARPIIILGPTKDRANDDLLSEFPDKFGSCVPHTTRPKREYEIDGRDYHFVSSREKMEKDIQAHKFIEAGQYNSHLYGTSVQSVREVAEQGKHCILDVSANAVRRLQAAHLHPIAIFIRPRSLENVLEINKRITEEQARKAFDRATKLEQEFTECFSAIVEGDSFEEIYHKVKRVIEDLSGPYIWVPARERL,724,False,1576,1576,0,-0.25,manual,McLaughlin,The spatial architecture of protein function and adaptation,2012,10.1038/nature11500,311-393,"Dlg4, (PSD95_PDZ3)",peptide binding - natural ligand,Binding,DLG4_RAT_full_11-26-2021_b03.a2m,1,724,724,0.3,0.2,24705,0.841,609.0,283.9,0.4661740558,low,6.0,0.009852216749,DLG4_RAT_McLaughlin_2012.csv,CRIPT,1,mutant,DLG4_RAT_theta_0.2.npy,DLG4_RAT.pdb,0.1,,Binding -DN7A_SACS2_Tsuboyama_2023_1JIC,DN7A_SACS2_Tsuboyama_2023_1JIC.csv,DN7A_SACS2,Prokaryote,Saccharolobus solfataricus,TVKFKYKGEEKQVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQ,55,False,1008,1008,0,-0.472754253,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-15,1-55,DNA-binding protein 7a,Stability,cDNA display proteolysis,DN7A_SACS2_2023-08-07_b02.a2m,1,55,55,0.2,0.2,42895,0.764,42.0,1248.1,29.71666667,Medium,13.0,0.3095238095,Tsuboyama2023_Dataset2_Dataset12,ddG_ML_float,1,mut_type,DN7A_SACS2_theta0.2_2023-08-07_b02.npy,DN7A_SACS2.pdb,1.0,,Stability -DNJA1_HUMAN_Tsuboyama_2023_2LO1,DNJA1_HUMAN_Tsuboyama_2023_2LO1.csv,DNJA1_HUMAN,Human,Homo sapiens,TTYYDVLGVKPNATQEELKKAYRKLALKYHPDKNPNEGEKFKQISQAYEVLSDAKKRELYDKGGE,65,True,2264,1216,1048,-2.239788161,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-16,1-65,DnaJ homolog subfamily A member 1,Stability,cDNA display proteolysis,DNJA1_HUMAN_2023-08-07_b07.a2m,1,65,65,0.7,0.2,280284,0.969,63.0,35361.9,561.3,High,52.0,0.8253968254,Tsuboyama2023_Dataset2_Dataset13,ddG_ML_float,1,mut_type,DNJA1_HUMAN_theta0.2_2023-08-07_b07.npy,DNJA1_HUMAN.pdb,1.0,,Stability -DOCK1_MOUSE_Tsuboyama_2023_2M0Y,DOCK1_MOUSE_Tsuboyama_2023_2M0Y.csv,DOCK1_MOUSE,Eukaryote,Mus musculus,WVPTKREEKYGVAFYNYDARGADELSLQIGDTVHILETYEGWYRGYTLRKKSKKGIFPASYIHLKE,66,True,2915,1213,1702,-1.104437518,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-17,1-66,Dedicator of cytokinesis protein 1,Stability,cDNA display proteolysis,DOCK1_MOUSE_2023-08-07_b03.a2m,1,66,66,0.3,0.2,705447,0.848,56.0,22172.3,395.9339286,High,55.0,0.9821428571,Tsuboyama2023_Dataset2_Dataset14,ddG_ML_float,1,mut_type,DOCK1_MOUSE_theta0.2_2023-08-07_b03.npy,DOCK1_MOUSE.pdb,1.0,,Stability -DYR_ECOLI_Nguyen_2023,DYR_ECOLI_Nguyen_2023.csv,DYR_ECOLI,Prokaryote,Escherichia coli,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR,159,False,2916,2916,0,0.8,manual,Nguyen,The Genetic Landscape of a Metabolic Interaction,2023,10.1101/2023.05.28.542639,2-159,DHFR,cell growth in ∆DHFR bacteria,Growth,DYR_ECOLI_2023-08-07_b01.a2m,1,159,159,0.1,0.2,188828,0.969,154.0,47685.7,309.6474026,High,337.0,2.188311688,542639_file03.xlsx,Avg Growth - WT TYMS,1,Mutation,DYR_ECOLI_theta0.2_2023-08-07_b01.npy,DYR_ECOLI.pdb,1.0,,OrganismalFitness -DYR_ECOLI_Thompson_2019,DYR_ECOLI_Thompson_2019.csv,DYR_ECOLI,Prokaryote,Escherichia coli,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR,159,False,2363,2363,0,-0.5,manual,Thompson,Altered expression of a quality control protease in E. coli reshapes the in vivo mutational landscape of a model enzyme,2019,10.7554/eLife.53476,2-159,DHFR reductase,"growth (turbidostat; -Lon for natural absence of Lon protease in E. coli, +Lon for exogenous protease)",Growth,DYR_ECOLI_full_11-26-2021_b08.a2m,1,159,159,0.8,0.2,41921,0.981,156.0,12203.2,78.22564103,medium,265.0,1.698717949,DYR_ECOLI_Thompson_plusLon_2019.csv,PlusLon_selection_coefficient,1,mutant,DYR_ECOLI_theta_0.2.npy,DYR_ECOLI.pdb,0.1,,OrganismalFitness -ENV_HV1B9_DuenasDecamp_2016,ENV_HV1B9_DuenasDecamp_2016.csv,ENV_HV1B9,Virus,Human immunodeficiency virus type 1 group M subtype B (strain 89.6) (HIV-1),MRVKEIRKNWQHLRGGILLLGMLMICSAAKEKTWVTIYYGVPVWREATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNNMVDQMHEDIISLWDESLKPCVKLTPLCVTLNCTNLNITKNTTNPTSSSWGMMEKGEIKNCSFYITTSIRNKVKKEYALFNRLDVVPIENTNNTKYRLISCNTSVITQACPKVSFQPIPIHYCVPAGFAMLKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDIVIRSENFTDNAKTIIVQLNESVVINCTRPNNNTRRRLSIGPGRAFYARRNIIGDIRQAHCNISRAKWNNTLQQIVIKLREKFRNKTIAFNQPSGGDPEIVRHSFNCGGEFFYCNTAQLFNSTWNVTGGTNGTEGNDIITLQCRIKQIINMWQKVGKAMYAPPITGQIRCSSNITGLLLTRDGGNSTETETEIFRPGGGDMRDNWRSELYKYKVVRIEPIGVAPTRAKRRTVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLALERYLRDQQLMGIWGCSGKLICTTSVPWNVSWSNKSVDDIWNNMTWMEWEREIDNYTDYIYDLLEKSQTQQEKNEKELLELDKWASLWNWFDITNWLWYIRLFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSFQTLLPASRGPDRPEGTEEEGGERDRDRSGPLVNGFLALFWVDLRNLCLFLYHLLRNLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIKIVQRACRAIRNIPTRIRQGLERALL,853,False,375,375,0,-0.8,manual,Duenas-Decamp,Saturation Mutagenesis of the HIV-1 Envelope CD4 Binding Loop Reveals Residues Controlling Distinct Trimer Conformations,2016,10.1371/journal.ppat.1005988,361-380,HIV env,Viral replication,Growth,ENV_HV1B9_S364P-M373R_b0.3.a2m,1,853,853,0.3,0.01,87271,0.989,844.0,11807.8,13.99028436,medium,947.0,1.122037915,ENV_HV1B9_DuenasDecamp_2016.csv,Fitness_Effect,1,mutant,ENV_HV1B9_theta_0.01.npy,ENV_HV1B9.pdb,0.1,,OrganismalFitness -ENV_HV1BR_Haddox_2016,ENV_HV1BR_Haddox_2016.csv,ENV_HV1BR,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MRVKEKYQHLWRWGWKWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL,861,False,12863,12863,0,0.0191127558,median,Haddox,Experimental Estimation of the Effects of All Amino-Acid Mutations to HIV‚Äôs Envelope Protein on Viral Replication in Cell Culture,2016,10.1371/journal.ppat.1006114,31-707,HIV env,Viral replication,Growth,ENV_HV1BR_theta0.99_full_11-26-2021_b09.a2m,1,861,861,0.9,0.01,74844,0.98,844.0,36809.8,43.61350711,medium,2359.0,2.795023697,ENV_HV1BR_Haddox_2016.csv,score,1,mutant,ENV_HV1BR_theta_0.01.npy,ENV_HV1BR.pdb,0.1,,OrganismalFitness -ENVZ_ECOLI_Ghose_2023,ENVZ_ECOLI_Ghose_2023.csv,ENVZ_ECOLI,Prokaryote,Escherichia coli,LADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEECNAIIEQFIDYLR,60,False,1121,1121,0,3.5,manual,Ghose,Marginal specificity in protein interactions constrains evolution of a paralogous family,2023,10.1073/pnas.2221163120,1-60,EnvZ kinase,fluorescent reporter,FACS,ENVZ_ECOLI_2023-08-07_b02.a2m,1,60,60,0.2,0.2,1879223,0.933,56.0,254652.1,4547.358929,High,55.0,0.9821428571,ENVZ_ECOLI_Ghose_2023.csv,mean_on,1,mutant,ENVZ_ECOLI_theta0.2_2023-08-07_b02.npy,ENVZ_ECOLI.pdb,1.0,,Activity -EPHB2_HUMAN_Tsuboyama_2023_1F0M,EPHB2_HUMAN_Tsuboyama_2023_1F0M.csv,EPHB2_HUMAN,Human,Homo sapiens,SFNTVDEWLEAIKMGQYKESFANAGFTSFDVVSQMMMEDILRVGVTLAGHQKKILNSIQVMRAQMN,66,True,1960,1239,721,-1.932053964,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-18,1-66,Ephrin type-B receptor 2,Stability,cDNA display proteolysis,EPHB2_HUMAN_2023-08-07_b04.a2m,1,66,66,0.4,0.2,212234,0.894,59.0,8426.3,142.8186441,High,29.0,0.4915254237,Tsuboyama2023_Dataset2_Dataset15,ddG_ML_float,1,mut_type,EPHB2_HUMAN_theta0.2_2023-08-07_b04.npy,EPHB2_HUMAN.pdb,1.0,,Stability -ERBB2_HUMAN_Elazar_2016,ERBB2_HUMAN_Elazar_2016.csv,ERBB2_HUMAN,Human,Homo sapiens,MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV,1255,False,326,326,0,0.0678339381,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,651-674,ErbB2 membrane domain,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,ERBB2_HUMAN_2023-10-12_b02.a2m,1,1255,1255,0.2,0.2,8311,0.981,1231.0,447.9,0.363850528,Low,187.0,0.1519090171,urn_mavedb_00000051-b-1_scores.csv,score,-1,mutant,ERBB2_HUMAN_theta0.2_2023-10-12_b02.npy,ERBB2_HUMAN.pdb,1.0,650.0,Expression -ESTA_BACSU_Nutschel_2020,ESTA_BACSU_Nutschel_2020.csv,ESTA_BACSU,Prokaryote,Bacillus subtilis,MKFVKRRIIALVTILMLSVTSLFALQPSAKAAEHNPVVMVHGIGGASFNFAGIKSYLVSQGWSRDKLYAVDFWDKTGTNYNNGPVLSRFVQKVLDETGAKKVDIVAHSMGGANTLYYIKNLDGGNKVANVVTLGGANRLTTGKALPGTDPNQKILYTSIYSSADMIVMNYLSRLDGARNVQIHGVGHIGLLYSSQVNSLIKEGLNGGGQNTN,212,False,2172,2172,0,46.34,median,Nutschel,Systematically Scrutinizing the Impact of Substitution Sites on Thermostability and Detergent Tolerance for Bacillus subtilis Lipase A,2020,10.1021/acs.jcim.9b00954,32-205,estA,thermostability,thermostability,ESTA_BACSU_full_11-26-2021_b03.a2m,1,212,212,0.3,0.2,234310,0.774,164.0,64492.5,393.2469512,high,292.0,1.780487805,ESTA_BACSU_Nutschel_2020.csv,T50,1,Variants of BsLipA,ESTA_BACSU_theta_0.2.npy,ESTA_BACSU.pdb,0.1,,Stability -F7YBW7_MESOW_Ding_2023,F7YBW7_MESOW_Ding_2023.csv,F7YBW7_MESOW,Prokaryote,M. opportunism,MANVEKMSVAVTPQQAAVMREAVEAGEYATASEIVREAVRDWLAKRELRHDDIRRLRQLWDEGKASGRPEPVDFDALRKEARQKLTEVPPNGR,93,True,7922,80,7842,0.3,manual,Ding,Protein design using structure-based residue preferences,2023,10.1101/2022.10.31.514613,48-82,Antitoxin ParD3,growth enrichment,,F7YBW8_MESOW_full_01-07-2022_b02.a2m,1,93,93,0.2,0.2,38613,0.774,72.0,16262.4,225.8666667,high,31.0,0.4305555556,df_at_10pos.csv,DMS_score,1,mutant,F7YBW8_MESOW_theta_0.2.npy,F7YBW7_MESOW.pdb,1.0,,OrganismalFitness -F7YBW8_MESOW_Aakre_2015,F7YBW8_MESOW_Aakre_2015.csv,F7YBW8_MESOW,Prokaryote,Mesorhizobium opportunistum (strain LMG 24607 / HAMBI 3007 / WSM2075),MANVEKMSVAVTPQQAAVMREAVEAGEYATASEIVREAVRDWLAKRELRHDDIRRLRQLWDEGKASGRPEPVDFDALRKEARQKLTEVPPNGR,93,True,9192,37,9155,-0.001724,median,Aakre,Evolving New Protein-Protein Interaction Specificity through Promiscuous Intermediates,2015,10.1016/j.cell.2015.09.055,59-64,Antitoxin ParD3,fitness,Growth (antitoxin neutralization of ParE3),F7YBW8_MESOW_full_01-07-2022_b02.a2m,1,93,93,0.2,0.2,38613,0.774,72.0,16262.4,225.8666667,high,31.0,0.4305555556,F7YBW8_MESOW_Aakre_2015.csv,fitness,1,mutant,F7YBW8_MESOW_theta_0.2.npy,F7YBW8_MESOW.pdb,0.1,,OrganismalFitness -FECA_ECOLI_Tsuboyama_2023_2D1U,FECA_ECOLI_Tsuboyama_2023_2D1U.csv,FECA_ECOLI,Eukaryote,Escherichia coli,QVNIAPGSLDKALNQYAAHSGFTLSVDASLTRGKQSNGLHGDYDVESGLQQLLDGSGLQVKPLGNNSWTLEP,72,True,1886,1219,667,-0.813576222,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-19,1-72,Fe(3+) dicitrate transport protein FecA,Stability,cDNA display proteolysis,FECA_ECOLI_2023-08-07_b06.a2m,1,72,72,0.6,0.2,74248,0.986,71.0,9949.9,140.1394366,High,63.0,0.8873239437,Tsuboyama2023_Dataset2_Dataset16,ddG_ML_float,1,mut_type,FECA_ECOLI_theta0.2_2023-08-07_b06.npy,FECA_ECOLI.pdb,1.0,,Stability -FKBP3_HUMAN_Tsuboyama_2023_2KFV,FKBP3_HUMAN_Tsuboyama_2023_2KFV.csv,FKBP3_HUMAN,Human,Homo sapiens,VPQRAWTVEQLRSEQLPKKDIIKFLQEHGSDSFLAEHKLLGNIKNVAKTANKDHLVTAYNHLFETKRFK,69,False,1237,1237,0,-0.1631252002,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-20,1-69,Peptidyl-prolyl cis-trans isomerase FKBP3,Stability,cDNA display proteolysis,FKBP3_HUMAN_2023-08-07_b03.a2m,1,69,69,0.3,0.2,3216,0.957,66.0,132.0,2.0,Medium,7.0,0.1060606061,Tsuboyama2023_Dataset2_Dataset17,ddG_ML_float,1,mut_type,FKBP3_HUMAN_theta0.2_2023-08-07_b03.npy,FKBP3_HUMAN.pdb,1.0,,Stability -GAL4_YEAST_Kitzman_2015,GAL4_YEAST_Kitzman_2015.csv,GAL4_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MKLLSSIEQACDICRLKKLKCSKEKPKCAKCLKNNWECRYSPKTKRSPLTRAHLTEVESRLERLEQLFLLIFPREDLDMILKMDSLQDIKALLTGLFVQDNVNKDAVTDRLASVETDMPLTLRQHRISATSSSEESSNKGQRQLTVSIDSAAHHDNSTIPLDFMPRDALHGFDWSEEDDMSDGLPFLKTDPNNNGFFGDGSLLCILRSIGFKPENYTNSNVNRLPTMITDRYTLASRSTTSRLLQSYLNNFHPYCPIVHSPTLMMLYNNQIEIASKDQWQILFNCILAIGAWCIEGESTDIDVFYYQNAKSHLTSKVFESGSIILVTALHLLSRYTQWRQKTNTSYNFHSFSIRMAISLGLNRDLPSSFSDSSILEQRRRIWWSVYSWEIQLSLLYGRSIQLSQNTISFPSSVDDVQRTTTGPTIYHGIIETARLLQVFTKIYELDKTVTAEKSPICAKKCLMICNEIEEVSRQAPKFLQMDISTTALTNLLKEHPWLSFTRFELKWKQLSLIIYVLRDFFTNFTQKKSQLEQDQNDHQSYEVKRCSIMLSDAAQRTVMSVSSYMDNHNVTPYFAWNCSYYLFNAVLVPIKTLLSNSKSNAENNETAQLLQQINTVLMLLKKLATFKIQTCEKYIQVLEEVCAPFLLSQCAIPLPHISYNNSNGSAIKNIVGSATIAQYPTLPEENVNNISVKYVSPGSVGPSPVPLKSGASFSDLVKLLSNRPPSRNSPVTIPRSTPSHRSVTPFLGQQQQLQSLVPLTPSALFGGANFNQSGNIADSSLSFTFTNSSNGPNLITTQTNSQALSQPIASSNVHDNFMNNEITASKIDDGNNSKPLSPGWTDQTAYNAFGITTGMFNTTTMDDVYNYLFDDEDTPPNPKKE,881,False,1195,1195,0,-8.0,manual,Kitzman,Massively parallel single-amino-acid mutagenesis,2015,10.1038/nmeth.3223,2-65,GAL4,"Growth (no selection, 24h)",Growth,GAL4_YEAST_full_11-26-2021_b02.a2m,1,881,881,0.2,0.2,16159,0.707,623.0,7942.3,12.74847512,medium,163.0,0.2616372392,GAL4_YEAST_Kitzman_2015.csv,SEL_C_64h,1,mutant,GAL4_YEAST_theta_0.2.npy,GAL4_YEAST.pdb,0.1,,OrganismalFitness -GCN4_YEAST_Staller_2018,GCN4_YEAST_Staller_2018.csv,GCN4_YEAST,Eukaryote,Saccharomyces cerevisiae,MSEYQPSLFALNPMGFSPLDGSKSTNENVSASTSTAKPMVGQLIFDKFIKTEEDPIIKQDTPSNLDFDFALPQTATAPDAKTVLPIPELDDAVVESFFSSSTDSTPMFEYENLEDNSKEWTSLFDNDIPVTTDDVSLADKAIESTEEVSLVPSNLEVSTTSFLPTPVLEDAKLTQTRKVKKPNSVVKKSHHVGKDDESRLDHLGVVAYNRKQRSIPLSPIVPESSDPAALKRARNTEAARRSRARKLQRMKQLEDKVEELLSKNYHLENEVARLKKLVGER,281,True,2638,33,2605,1.293757864,median,Staller,A High-Throughput Mutational Scan of an Intrinsically Disordered Acidic Transcriptional Activation Domain,2018,10.1016/j.cels.2018.01.015,101-144,Gcn4,Binding,FACS,GCN4_YEAST_full_24-02-2022_b03.a2m,1,281,281,0.3,0.2,350,0.719,202.0,177.9,0.8806930693,low,1.0,0.00495049505,GCN4_YEAST_Staller_2018.csv,Induction,1,mutant,GCN4_YEAST_theta_0.2.npy,GCN4_YEAST.pdb,0.1,,Binding -GDIA_HUMAN_Silverstein_2021,GDIA_HUMAN_Silverstein_2021.csv,GDIA_HUMAN,Human,Homo sapiens,MDEEYDVIVLGTGLTECILSGIMSVNGKKVLHMDRNPYYGGESSSITPLEELYKRFQLLEGPPESMGRGRDWNVDLIPKFLMANGQLVKMLLYTEVTRYLDFKVVEGSFVYKGGKIYKVPSTETEALASNLMGMFEKRRFRKFLVFVANFDENDPKTFEGVDPQTTSMRDVYRKFDLGQDVIDFTGHALALYRTDDYLDQPCLETVNRIKLYSESLARYGKSPYLYPLYGLGELPQGFARLSAIYGGTYMLNKPVDDIIMENGKVVGVKSEGEVARCKQLICDPSYIPDRVRKAGQVIRIICILSHPIKNTNDANSCQIIIPQNQVNRKSDIYVCMISYAHNVAAQGKYIAIASTTVETTDPEKEVEPALELLEPIDQKFVAISDLYEPIDDGCESQVFCSCSYDATTHFETTCNDIKDIYKRMAGTAFDFENMKRKQNDVFGEAEQ,447,False,1154,1154,0,0.8425936955,median,Silverstein,A systematic genotype-phenotype map for missense variants in the human intellectual disability-associated gene GDI1,2021,10.1101/2021.10.06.463360,2-447,GDI1,Yeast Growth,Growth,GDIA_HUMAN_2023-10-12_b05.a2m,1,447,447,0.5,0.2,5196,0.996,445.0,398.1,0.8946067416,Low,86.0,0.193258427,media-1.xlsx,fitness,1,mutant,GDIA_HUMAN_theta0.2_2023-10-12_b05.npy,GDIA_HUMAN.pdb,1.0,,OrganismalFitness -GFP_AEQVI_Sarkisyan_2016,GFP_AEQVI_Sarkisyan_2016.csv,GFP_AEQVI,Eukaryote,Aequorea victoria,MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK,238,True,51714,1084,50630,2.5,manual,Sarkisyan,Local fitness landscape of the green fluorescent protein,2016,10.1038/nature17995,3-237,GFP,Fluorescence,FACS,GFP_AEQVI_full_04-29-2022_b08.a2m,1,238,238,0.8,0.2,396,0.975,232.0,14.9,0.06422413793,low,0.0,0.0,GFP_AEQVI_Sarkisyan_2016.csv,mean_medianBrightness_per_aaseq,1,mutant,GFP_AEQVI_theta_0.2.npy,GFP_AEQVI.pdb,0.1,,Activity -GLPA_HUMAN_Elazar_2016,GLPA_HUMAN_Elazar_2016.csv,GLPA_HUMAN,Human,Homo sapiens,MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFGVMAGVIGTILLISYGIRRLIKKSPSDVKPLPSPDTDVPLSSVEIENPETSDQ,150,False,245,245,0,0.9321105779,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,94-108,Glycophorin A membrane domain,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,GLPA_HUMAN_2023-10-12_b03.a2m,1,150,150,0.3,0.2,273,1.0,150.0,81.0,0.54,Low,1.0,0.006666666667,urn_mavedb_00000051-c-1_scores.csv,score,-1,mutant,GLPA_HUMAN_theta0.2_2023-10-12_b03.npy,GLPA_HUMAN.pdb,1.0,93.0,Expression -GRB2_HUMAN_Faure_2021,GRB2_HUMAN_Faure_2021.csv,GRB2_HUMAN,Human,Homo sapiens,MEAIAKYDFKATADDELSFKRGDILKVLNEECDQNWYKAELNGKDGFIPKNYIEMKPHPWFFGKIPRAKAEEMLSKQRHDGAFLIRESESAPGDFSLSVKFGNDVQHFKVLRDGAGKYFLWVVKFNSLNELVDYHRSTSVSRNQQIFLRDIEQVPQQPTYVQALFDFDPQEDGELGFRRGDFIHVMDNSDPNWWKGACHGQTGMFPRNYVTPVNRNV,217,True,63366,1034,62332,-0.7,manual,Faure,Mapping the energetic and allosteric landscapes of protein binding domains,2022,10.1038/s41586-022-04586-4,159-214,GRB2-SH3,Yeast growth,Growth,GRB2_HUMAN_full_11-26-2021_b05.a2m,1,217,217,0.5,0.2,33228,0.816,177.0,1485.9,8.394915254,medium,42.0,0.2372881356,GRB2_HUMAN_Faure_2021.csv,fitness,1,mutant,GRB2_HUMAN_theta_0.2.npy,GRB2_HUMAN.pdb,0.1,,OrganismalFitness -HCP_LAMBD_Tsuboyama_2023_2L6Q,HCP_LAMBD_Tsuboyama_2023_2L6Q.csv,HCP_LAMBD,Virus,Escherichia phage lambda,VRQEELAAARAALHDLMTGKRVATVQKDGRRVEFTATSVSDLKKYIAELEVQTGM,55,False,1040,1040,0,-0.350614016,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-21,1-55,Head completion protein,Stability,cDNA display proteolysis,HCP_LAMBD_2023-08-07_b05.a2m,1,55,55,0.5,0.01,2128,0.945,52.0,606.5,11.66346154,Medium,15.0,0.2884615385,Tsuboyama2023_Dataset2_Dataset18,ddG_ML_float,1,mut_type,HCP_LAMBD_theta0.01_2023-08-07_b05.npy,HCP_LAMBD.pdb,1.0,,Stability -HECD1_HUMAN_Tsuboyama_2023_3DKM,HECD1_HUMAN_Tsuboyama_2023_3DKM.csv,HECD1_HUMAN,Human,Homo sapiens,NLYFQGLKYMVPGARVTRGLDWKWRDQDGSPQGEGTVTGELHNGWIDVTWDAGGSNSYRMGAEGKFDLKLAP,72,True,5586,1244,4342,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-22,1-72,E3 ubiquitin-protein ligase HECTD1,Stability,cDNA display proteolysis,HECD1_HUMAN_2023-08-07_b03.a2m,1,72,72,0.3,0.2,18660,0.903,65.0,1192.3,18.34307692,Medium,24.0,0.3692307692,Tsuboyama2023_Dataset2_Dataset19,ddG_ML_float,1,mut_type,HECD1_HUMAN_theta0.2_2023-08-07_b03.npy,HECD1_HUMAN.pdb,1.0,,Stability -HEM3_HUMAN_Loggerenberg_2023,HEM3_HUMAN_Loggerenberg_2023.csv,HEM3_HUMAN,Human,Homo sapiens,MSGNGNAAATAEENSPKMRVIRVGTRKSQLARIQTDSVVATLKASYPGLQFEIIAMSTTGDKILDTALSKIGEKSLFTKELEHALEKNEVDLVVHSLKDLPTVLPPGFTIGAICKRENPHDAVVFHPKFVGKTLETLPEKSVVGTSSLRRAAQLQRKFPHLEFRSIRGNLNTRLRKLDEQQEFSAIILATAGLQRMGWHNRVGQILHPEECMYAVGQGALGVEVRAKDQDILDLVGVLHDPETLLRCIAERAFLRHLEGGCSVPVAVHTAMKDGQLYLTGGVWSLDGSDSIQETMQATIHVPAQHEDGPEDDPQLVGITARNIPRGPQLAAQNLGISLANLLLSKGAKNILDVARQLNDAH,361,False,5689,5689,0,0.6142990455,median,van Loggerenberg,Systematically testing human HMBS missense variants to reveal mechanism and pathogenic variation,2023,10.1101/2023.02.06.527353,19-360,hydroxymethylbilane synthase,activity,Yeast complementation,HEM3_HUMAN_2023-08-07_b02.a2m,1,361,361,0.2,0.2,59544,0.85,307.0,11510.2,37.49250814,Medium,500.0,1.628664495,,score,1,mutant,HEM3_HUMAN_theta0.2_2023-08-07_b02.npy,HEM3_HUMAN.pdb,1.0,,Activity -HIS7_YEAST_Pokusaeva_2019,HIS7_YEAST_Pokusaeva_2019.csv,HIS7_YEAST,Eukaryote,Saccharomyces cerevisiae,MTEQKALVKRITNETKIQIAISLKGGPLAIEHSIFPEKEAEAVAEQATQSQVINVHTGIGFLDHMIHALAKHSGWSLIVECIGDLHIDDHHTTEDCGIALGQAFKEALGAVRGVKRFGSGFAPLDEALSRAVVDLSNRPYAVVELGLQREKVGDLSCEMIPHFLESFAEASRITLHVDCLRGKNDHHRSESAFKALAVAIREATSPNGTNDVPSTKGVLM,220,True,496137,168,495969,0.3,manual,Pokusaeva,An experimental assay of the interactions of amino acids from orthologous sequences shaping a complex fitness landscape,2019,10.1371/journal.pgen.1008079,6-211,IGP dehydratase (HIS3),Growth,Growth,HIS7_YEAST_full_11-26-2021_b09.a2m,1,220,220,0.9,0.2,40154,0.873,192.0,5191.3,27.03802083,medium,318.0,1.65625,HIS7_YEAST_Pokusaeva_2019.csv,selection,1,mutant,HIS7_YEAST_theta_0.2.npy,HIS7_YEAST.pdb,0.1,,OrganismalFitness -HMDH_HUMAN_Jiang_2019,HMDH_HUMAN_Jiang_2019.csv,HMDH_HUMAN,Human,Homo sapiens,MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGWNYECPKFEEDVLSSDIIILTITRCIAILYIYFQFQNLRQLGSKYILGIAGLFTIFSSFVFSTVVIHFLDKELTGLNEALPFFLLLIDLSRASTLAKFALSSNSQDEVRENIARGMAILGPTFTLDALVECLVIGVGTMSGVRQLEIMCCFGCMSVLANYFVFMTFFPACVSLVLELSRESREGRPIWQLSHFARVLEEEENKPNPVTQRVKMIMSLGLVLVHAHSRWIADPSPQNSTADTSKVSLGLDENVSKRIEPSVSLWQFYLSKMISMDIEQVITLSLALLLAVKYIFFEQTETESTLSLKNPITSPVVTQKKVPDNCCRREPMLVRNNQKCDSVEEETGINRERKVEVIKPLVAETDTPNRATFVVGNSSLLDTSSVLVTQEPEIELPREPRPNEECLQILGNAEKGAKFLSDAEIIQLVNAKHIPAYKLETLMETHERGVSIRRQLLSKKLSEPSSLQYLPYRDYNYSLVMGACCENVIGYMPIPVGVAGPLCLDEKEFQVPMATTEGCLVASTNRGCRAIGLGGGASSRVLADGMTRGPVVRLPRACDSAEVKAWLETSEGFAVIKEAFDSTSRFARLQKLHTSIAGRNLYIRFQSRSGDAMGMNMISKGTEKALSKLHEYFPEMQILAVSGNYCTDKKPAAINWIEGRGKSVVCEAVIPAKVVREVLKTTTEAMIEVNINKNLVGSAMAGSIGGYNAHAANIVTAIYIACGQDAAQNVGSSNCITLMEASGPTNEDLYISCTMPSIEIGTVGGGTNLLPQQACLQMLGVQGACKDNPGENARQLARIVCGTVMAGELSLMAALAAGHLVKSHMIHNRSKINLQDLQGACTKKTA,888,False,16853,16853,0,0.48275,median,Jiang,Exhaustive mapping of missense variation in coronary heart disease-related genes,2019,https://hdl.handle.net/1807/98076,2-888,3-hydroxy-3-methylglutaryl-coenzyme A reductase,Fitness,Resistance to statin inhibition,HMDH_HUMAN_2023-10-12_b05.a2m,1,888,888,0.5,0.2,3153,0.995,884.0,554.6,0.6273755656,Low,778.0,0.8800904977,urn_mavedb_00000035-a-1_scores.csv,score,1,mutant,HMDH_HUMAN_theta0.2_2023-10-12_b05.npy,HMDH_HUMAN.pdb,1.0,,OrganismalFitness -HSP82_YEAST_Cote-Hammarlof_2020_growth-H2O2,HSP82_YEAST_Cote-Hammarlof_2020_growth-H2O2.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,False,2252,2252,0,-0.0020874765,median,Cote-Hammarlof,The Adaptive Potential of the Middle Domain of Yeast Hsp90,2020,10.1093/molbev/msaa211,291-409,HSP82,Growth (H2O2 stress),,HSP82_YEAST_2023-08-07_b01.a2m,1,709,709,0.1,0.2,48695,0.917,650.0,4395.2,6.761846154,Medium,531.0,0.8169230769,,score,1,mut_proteingym,HSP82_YEAST_theta0.2_2023-08-07_b01.npy,HSP82_YEAST.pdb,1.0,290.0,OrganismalFitness -HSP82_YEAST_Flynn_2019,HSP82_YEAST_Flynn_2019.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,False,13294,13294,0,-0.3,manual,Flynn,Comprehensive fitness maps of Hsp90 show widespread environmental dependence,2019,10.7554/eLife.53810,2-709,HSP82,"growth, nitrogen depletion (0.0125% ammonium sulfate), hyperosmotic shock (0.8 M NaCl), alcohol stress (7.5% ethanol), sulfhydryl-oxidation (0.85 mM diamide), temperature shock (37C)",,HSP82_YEAST_full_11-26-2021_b01.a2m,1,709,709,0.1,0.2,38923,0.862,611.0,3684.8,6.030769231,medium,433.0,0.7086743044,HSP82_YEAST_Flynn_2019.csv,s (37°C),1,mutant,HSP82_YEAST_theta_0.2.npy,HSP82_YEAST.pdb,1.0,,OrganismalFitness -HSP82_YEAST_Mishra_2016,HSP82_YEAST_Mishra_2016.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,False,4323,4323,0,-0.4,manual,Mishra,Systematic Mutant Analyses Elucidate General and Client-Specific Aspects of Hsp90 Function,2016,10.1016/j.celrep.2016.03.046,2-231,HSP82,Growth,Growth,HSP82_YEAST_full_11-26-2021_b01.a2m,1,709,709,0.1,0.2,38923,0.862,611.0,3684.8,6.030769231,medium,433.0,0.7086743044,HSP82_YEAST_Mishra_2016.csv,selection_coefficient,1,mutant,HSP82_YEAST_theta_0.2.npy,HSP82_YEAST.pdb,0.1,,OrganismalFitness -HXK4_HUMAN_Gersing_2022_activity,HXK4_HUMAN_Gersing_2022_activity.csv,HXK4_HUMAN,Human,Homo sapiens (Human),MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,465,False,8570,8570,0,0.5631652235,median,Gersing,A comprehensive map of human glucokinase variant activity,2022,10.1101/2022.05.04.490571,2-465,glucokinase regulatory protein,functional complementation to reduced growth on glucose medium,enzymatic activity,HXK4_HUMAN_b0.1.a2m,1,465,465,0.1,0.2,23354,1.0,465.0,2336.1,5.023870968,Medium,181.0,0.3892473118,HXK4_HUMAN_Gersing_2022.csv,score,1,mutant,HXK4_HUMAN_theta_0.2.npy,HXK4_HUMAN.pdb,1.0,,OrganismalFitness -HXK4_HUMAN_Gersing_2023_abundance,HXK4_HUMAN_Gersing_2023_abundance.csv,HXK4_HUMAN,Human,Homo sapiens,MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,465,False,8396,8396,0,0.6,manual,Gersing,Characterizing glucokinase variant mechanisms using a multiplexed abundance assay,2023,10.1101/2023.05.24.542036,2-465,GCK,abundance,Growth,HXK4_HUMAN_2023-08-07_b01.a2m,1,465,465,0.1,0.2,24177,0.966,449.0,2626.4,5.849443207,Medium,170.0,0.3786191537,HXK4_HUMAN_Gersing_2022.csv,score,1,mutant,HXK4_HUMAN_theta0.2_2023-08-07_b01.npy,HXK4_HUMAN.pdb,1.0,,Expression -I6TAH8_I68A0_Doud_2015,I6TAH8_I68A0_Doud_2015.csv,I6TAH8_I68A0,Virus,"Influenza A virus (strain A/Puerto Rico/8/1934 H1N1), Influenza A virus (strain A/Aichi/2/1968 H3N2)",MASQGTKRSYEQMETDGERQNATEIRASVGKMIDGIGRFYIQMCTELKLSDYEGRLIQNSLTIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYKRVDRKWMRELVLYDKEEIRRIWRQANNGDDATAGLTHMMIWHSNLNDTTYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELIRMIKRGINDRNFWRGENGRKTRSAYERMCNILKGKFQTAAQRAMMDQVRESRNPGNAEIEDLIFLARSALILRGSVAHKSCLPACVYGPAVASGYDFEKEGYSLVGIDPFKLLQNSQVYSLIRPNENPAHKSQLVWMACNSAAFEDLRVLSFIRGTKVSPRGKLSTRGVQIASNENMDAMESSTLELRSRYWAIRTRSGGNTNQQRASAGQISVQPAFSVQRNLPFDKPTIMAAFTGNTEGRTSDMRAEIIRMMEGAKPEEMSFQGRGVFELSDERAANPIVPSFDMSNEGSYFFGDNAEEYDN,498,False,9462,9462,0,-2.329469119,median,Doud,Site-Specific Amino Acid Preferences Are Mostly Conserved in Two Closely Related Protein Homologs,2015,10.1093/molbev/msv167,1-498,Influenza nucleoprotein,,Growth,I6TAH8_I68A0_theta0.99_full_11-26-2021_b09.a2m,1,498,498,0.9,0.01,15390,1.0,498.0,1493.3,2.998594378,medium,2118.0,4.253012048,I6TAH8_I68A0_Doud_2015.csv,log_fitness_by_syn_mut_fitness,1,mutant,I6TAH8_I68A0_theta_0.01.npy,I6TAH8_I68A0.pdb,0.1,,OrganismalFitness -IF1_ECOLI_Kelsic_2016,IF1_ECOLI_Kelsic_2016.csv,IF1_ECOLI,Prokaryote,Escherichia coli,MAKEDNIEMQGTVLETLPNTMFRVELENGHVVTAHISGKMRKNYIRILTGDKVTVELTPYDLSKGRIVFRSR,72,False,1367,1367,0,0.8,manual,Kelsic,RNA Structural Determinants of Optimal Codons Revealed by MAGE-Seq,2016,10.1016/j.cels.2016.11.004,1-72,infA,Growth,Growth,IF1_ECOLI_full_11-26-2021_b02.a2m,1,72,72,0.2,0.2,361806,0.806,58.0,38189.0,658.4310345,high,46.0,0.7931034483,IF1_ECOLI_Kelsic_2016.csv,fitness_rich,1,mutant,IF1_ECOLI_theta_0.2.npy,IF1_ECOLI.pdb,0.1,,OrganismalFitness -ILF3_HUMAN_Tsuboyama_2023_2L33,ILF3_HUMAN_Tsuboyama_2023_2L33.csv,ILF3_HUMAN,Human,Homo sapiens,MLTKHGKNPVMELNEKRRGLKYELISETGGSHDKRFVMEVEVDGQKFQGAGSNKKVAKAYAALAALEKLFP,71,False,1329,1329,0,-0.4,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-23,1-71,Interleukin enhancer-binding factor 3,Stability,cDNA display proteolysis,ILF3_HUMAN_2023-08-07_b03.a2m,1,71,71,0.3,0.2,145438,0.915,65.0,21228.0,326.5846154,High,57.0,0.8769230769,Tsuboyama2023_Dataset2_Dataset20,ddG_ML_float,1,mut_type,ILF3_HUMAN_theta0.2_2023-08-07_b03.npy,ILF3_HUMAN.pdb,1.0,,Stability -ISDH_STAAW_Tsuboyama_2023_2LHR,ISDH_STAAW_Tsuboyama_2023_2LHR.csv,ISDH_STAAW,Prokaryote,Staphylococcus aureus,YNLQKLLAPYHKAKTLERQVYELEKLQEKLPEKYKAEYKKKLDQTRVELADQVKS,55,True,1944,940,1004,-0.7942702247,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-24,1-55,Iron-regulated surface determinant protein H,Stability,cDNA display proteolysis,ISDH_STAAW_2023-08-07_b01.a2m,1,55,55,0.1,0.2,115488,0.818,45.0,38123.1,847.18,High,6.0,0.1333333333,Tsuboyama2023_Dataset2_Dataset21,ddG_ML_float,1,mut_type,ISDH_STAAW_theta0.2_2023-08-07_b01.npy,ISDH_STAAW.pdb,1.0,,Stability -KCNE1_HUMAN_Muhammad_2023_expression,KCNE1_HUMAN_Muhammad_2023_expression.csv,KCNE1_HUMAN,Human,Homo sapiens,MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSGDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP,129,False,2339,2339,0,0.75,manual,Muhammad,"High-throughput functional mapping of variants in an arrhythmia gene, KCNE1, reveals novel biology",2023,10.1101/2023.04.28.538612,1-128,KCNE1,cell surface expression,FACS,KCNE1_HUMAN_2023-08-07_b02.a2m,1,129,129,0.2,0.2,2118,0.969,125.0,213.7,1.7096,Medium,5.0,0.04,KCNE1_HUMAN_Muhammad_2023.csv,TrafScore,1,mutant,KCNE1_HUMAN_theta0.2_2023-08-07_b02.npy,KCNE1_HUMAN.pdb,1.0,,Expression -KCNE1_HUMAN_Muhammad_2023_function,KCNE1_HUMAN_Muhammad_2023_function.csv,KCNE1_HUMAN,Human,Homo sapiens,MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSGDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP,129,False,2315,2315,0,0.9043514345,median,Muhammad,"High-throughput functional mapping of variants in an arrhythmia gene, KCNE1, reveals novel biology",2023,10.1101/2023.04.28.538612,1-128,KCNE1,potassium channel function,Growth,KCNE1_HUMAN_2023-08-07_b02.a2m,1,129,129,0.2,0.2,2118,0.969,125.0,213.7,1.7096,Medium,5.0,0.04,KCNE1_HUMAN_Muhammad_2023.csv,funcScore,1,mutant,KCNE1_HUMAN_theta0.2_2023-08-07_b02.npy,KCNE1_HUMAN.pdb,1.0,,Activity -KCNH2_HUMAN_Kozek_2020,KCNH2_HUMAN_Kozek_2020.csv,KCNH2_HUMAN,Human,Homo sapiens,MPVRRGHVAPQNTFLDTIIRKFEGQSRKFIIANARVENCAVIYCNDGFCELCGYSRAEVMQRPCTCDFLHGPRTQRRAAAQIAQALLGAEERKVEIAFYRKDGSCFLCLVDVVPVKNEDGAVIMFILNFEVVMEKDMVGSPAHDTNHRGPPTSWLAPGRAKTFRLKLPALLALTARESSVRSGGAGGAGAPGAVVVDVDLTPAAPSSESLALDEVTAMDNHVAGLGPAEERRALVGPGSPPRSAPGQLPSPRAHSLNPDASGSSCSLARTRSRESCASVRRASSADDIEAMRAGVLPPPPRHASTGAMHPLRSGLLNSTSDSDLVRYRTISKIPQITLNFVDLKGDPFLASPTSDREIIAPKIKERTHNVTEKVTQVLSLGADVLPEYKLQAPRIHRWTILHYSPFKAVWDWLILLLVIYTAVFTPYSAAFLLKETEEGPPATECGYACQPLAVVDLIVDIMFIVDILINFRTTYVNANEEVVSHPGRIAVHYFKGWFLIDMVAAIPFDLLIFGSGSEELIGLLKTARLLRLVRVARKLDRYSEYGAAVLFLLMCTFALIAHWLACIWYAIGNMEQPHMDSRIGWLHNLGDQIGKPYNSSGLGGPSIKDKYVTALYFTFSSLTSVGFGNVSPNTNSEKIFSICVMLIGSLMYASIFGNVSAIIQRLYSGTARYHTQMLRVREFIRFHQIPNPLRQRLEEYFQHAWSYTNGIDMNAVLKGFPECLQADICLHLNRSLLQHCKPFRGATKGCLRALAMKFKTTHAPPGDTLVHAGDLLTALYFISRGSIEILRGDVVVAILGKNDIFGEPLNLYARPGKSNGDVRALTYCDLHKIHRDDLLEVLDMYPEFSDHFWSSLEITFNLRDTNMIPGSPGSTELEGGFSRQRKRKLSFRRRTDKDTEQPGEVSALGPGRAGAGPSSRGRPGGPWGESPSSGPSSPESSEDEGPGRSSSPLRLVPFSSPRPPGEPPGGEPLMEDCEKSSDTCNPLSGAFSGVSNIFSFWGDSRGRQYQELPRCPAPTPSLLNIPLSSPGRRPRGDVESRLDALQRQLNRLETRLSADMATVLQLLQRQMTLVPPAYSAVTTPGPGPTSTSPLLPVSPLPTLTLDSLSQVSQFMACEELPPGAPELPQEGPTRRLSLPGQLGALTSQPLHRHGSDPGS,1159,False,200,200,0,58.87492867,median,Kozek,High-throughput discovery of trafficking-deficient variants in the cardiac potassium channel KCNH2: Deep mutational scan of KCNH2 trafficking,2020,10.1016/j.hrthm.2020.05.041,545-555,KCNH2,Voltage,Voltage,KCNH2_HUMAN_535-565_11-26-2021_b05.a2m,535,565,31,0.5,0.2,13907,1.0,31.0,186.6,6.019354839,medium,1.0,0.03225806452,KCNH2_HUMAN_Kozek_2020.csv,score.ave,1,var,KCNH2_HUMAN_theta_0.2.npy,KCNH2_HUMAN.pdb,0.1,,Activity -KCNJ2_MOUSE_Coyote-Maestas_2022_function,KCNJ2_MOUSE_Coyote-Maestas_2022_function.csv,KCNJ2_MOUSE,Human,Homo sapiens,MGSVRTNRYSIVSSEEDGMKLATMAVANGFGNGKSKVHTRQQCRSRFVKKDGHCNVQFINVGEKGQRYLADIFTTCVDIRWRWMLVIFCLAFVLSWLFFGCVFWLIALLHGDLDTSKVSKACVSEVNSFTAAFLFSIETQTTIGYGFRCVTDECPIAVFMVVFQSIVGCIIDAFIIGAVMAKMAKPKKRNETLVFSHNAVIAMRDGKLCLMWRVGNLRKSHLVEAHVRAQLLKSRITSEGEYIPLDQIDINVGFDSGIDRIFLVSPITIVHEIDEDSPLYDLSKQDIDNADFEIVVILEGMVEATAMTTQCRSSYLANEILWGHRYEPVLFEEKHYYKVDYSRFHKTYEVPNTPLCSARDLAEKKYILSNANSFCYENEVALTSKEEEEDSENGVPESTSTDSPPGIDLHNQASVPLEPRPLRRESEI,428,False,6963,6963,0,0.039,median,Coyote-Maestas,"Determinants of trafficking, conduction, and disease within a K+ channel revealed through multiparametric deep mutational scanning",2022,10.7554/eLife.76903,2-392,Kir2.1,Ion conduction,FACS,KCNJ2_MOUSE_b01.a2m,1,428,428,0.1,0.2,20953,0.86,370.0,986.7,2.666756757,Medium,94.0,0.2540540541,,function_score,1,mutant_noflag,KCNJ2_MOUSE_b01_theta_0.2.npy,KCNJ2_MOUSE.pdb,1.0,,Activity -KCNJ2_MOUSE_Coyote-Maestas_2022_surface,KCNJ2_MOUSE_Coyote-Maestas_2022_surface.csv,KCNJ2_MOUSE,Human,Homo sapiens,MGSVRTNRYSIVSSEEDGMKLATMAVANGFGNGKSKVHTRQQCRSRFVKKDGHCNVQFINVGEKGQRYLADIFTTCVDIRWRWMLVIFCLAFVLSWLFFGCVFWLIALLHGDLDTSKVSKACVSEVNSFTAAFLFSIETQTTIGYGFRCVTDECPIAVFMVVFQSIVGCIIDAFIIGAVMAKMAKPKKRNETLVFSHNAVIAMRDGKLCLMWRVGNLRKSHLVEAHVRAQLLKSRITSEGEYIPLDQIDINVGFDSGIDRIFLVSPITIVHEIDEDSPLYDLSKQDIDNADFEIVVILEGMVEATAMTTQCRSSYLANEILWGHRYEPVLFEEKHYYKVDYSRFHKTYEVPNTPLCSARDLAEKKYILSNANSFCYENEVALTSKEEEEDSENGVPESTSTDSPPGIDLHNQASVPLEPRPLRRESEI,428,False,6917,6917,0,-0.157352583,median,Coyote-Maestas,"Determinants of trafficking, conduction, and disease within a K+ channel revealed through multiparametric deep mutational scanning",2022,10.7554/eLife.76903,2-392,Kir2.1,Surface trafficking,FACS,KCNJ2_MOUSE_b01.a2m,1,428,428,0.1,0.2,20953,0.86,370.0,986.7,2.666756757,Medium,94.0,0.2540540541,,surface_score,1,mutant_noflag,KCNJ2_MOUSE_b01_theta_0.2.npy,KCNJ2_MOUSE.pdb,1.0,,Expression -KKA2_KLEPN_Melnikov_2014,KKA2_KLEPN_Melnikov_2014.csv,KKA2_KLEPN,Prokaryote,Klebsiella pneumoniae,MIEQDGLHAGSPAAWVERLFGYDWAQQTIGCSDAAVFRLSAQGRPVLFVKTDLSGALNELQDEAARLSWLATTGVPCAAVLDVVTEAGRDWLLLGEVPGQDLLSSHLAPAEKVSIMADAMRRLHTLDPATCPFDHQAKHRIERARTRMEAGLVDQDDLDEEHQGLAPAELFARLKARMPDGEDLVVTHGDACLPNIMVENGRFSGFIDCGRLGVADRYQDIALATRDIAEELGGEWADRFLVLYGIAAPDSQRIAFYRLLDEFF,264,False,4960,4960,0,0.5,manual,Melnikov,Comprehensive mutational scanning of a kinasein vivoreveals substrate-dependent fitness landscapes,2014,10.1093/nar/gku511,1-264,"APH(3’)II, neo","Growth (225 ug/mL kanamycin) 1:1, 1:2, 1:4, 1:8 dilutions",Growth,KKA2_KLEPN_full_11-26-2021_b02.a2m,1,264,264,0.2,0.2,234760,0.795,210.0,76876.7,366.0795238,high,377.0,1.795238095,KKA2_KLEPN_Melnikov_2014.csv,Kan18_avg,1,mutant,KKA2_KLEPN_theta_0.2.npy,KKA2_KLEPN.pdb,0.1,,OrganismalFitness -LGK_LIPST_Klesmith_2015,LGK_LIPST_Klesmith_2015.csv,LGK_LIPST,Eukaryote,Lipomyces starkeyi (Oleaginous yeast),MPIATSTGDNVLDFTVLGLNSGTSMDGIDCALCHFYQKTPDAPMEFELLEYGEVPLAQPIKQRVMRMILEDTTSPSELSEVNVILGEHFADAVRQFAAERNVDLSTIDAIASHGQTIWLLSMPEEGQVKSALTMAEGAIIAARTGITSITDFRISDQAAGRQGAPLIAFFDALLLHHPTKLRACQNIGGIANVCFIPPDVDGRRTDEYYDFDTGPGNVFIDAVVRHFTNGEQEYDKDGAMGKRGKVDQELVDDFLKMPYFQLDPPKTTGREVFRDTLAHDLIRRAEAKGLSPDDIVATTTRITAQAIVDHYRRYAPSQEIDEIFMCGGGAYNPNIVEFIQQSYPNTKIMMLDEAGVPAGAKEAITFAWQGMECLVGRSIPVPTRVETRQHYVLGKVSPGLNYRSVMKKGMAFGGDAQQLPWVSEMIVKKKGKVITNNWA,439,False,7890,7890,0,-0.6245,median,Klesmith,Comprehensive Sequence-Flux Mapping of a Levoglucosan Utilization Pathway in E. coli,2015,10.1021/acssynbio.5b00131,1-439,LGK (levoglucosan kinase),Growth,Growth,LGK_LIPST_full_11-26-2021_b03.a2m,1,439,439,0.3,0.2,31069,0.813,357.0,7971.0,22.32773109,medium,588.0,1.647058824,B3VI55_LIPST_Klesmith_2015.csv,SelectionTwo,1,mutant,LGK_LIPST_theta_0.2.npy,LGK_LIPST.pdb,0.1,,Activity -LYAM1_HUMAN_Elazar_2016,LYAM1_HUMAN_Elazar_2016.csv,LYAM1_HUMAN,Human,Homo sapiens,MIFPWKCQSTQRDLWNIFKLWGWTMLCCDFLAHHGTDCWTYHYSEKPMNWQRARRFCRDNYTDLVAIQNKAEIEYLEKTLPFSRSYYWIGIRKIGGIWTWVGTNKSLTEEAENWGDGEPNNKKNKEDCVEIYIKRNKDAGKWNDDACHKLKAALCYTASCQPWSCSGHGECVEIINNYTCNCDVGYYGPQCQFVIQCEPLEAPELGTMDCTHPLGNFSFSSQCAFSCSEGTNLTGIEETTCGPFGNWSSPEPTCQVIQCEPLSAPDLGIMNCSHPLASFSFTSACTFICSEGTELIGKKKTICESSGIWSNPSPICQKLDKSFSMIKEGDYNPLFIPVAVMVTAFSGLAFIIWLARRLKKGKKSKRSMNDPY,372,False,359,359,0,1.306138768,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,333-355,L-selectin,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,LYAM1_HUMAN_2023-10-12_b04.a2m,1,372,372,0.4,0.2,3974,0.825,307.0,412.3,1.342996743,Medium,41.0,0.1335504886,urn_mavedb_00000051-a-1_scores.csv,score,-1,mutant,LYAM1_HUMAN_theta0.2_2023-10-12_b04.npy,LYAM1_HUMAN.pdb,1.0,332.0,Expression -MAFG_MOUSE_Tsuboyama_2023_1K1V,MAFG_MOUSE_Tsuboyama_2023_1K1V.csv,MAFG_MOUSE,Eukaryote,Mus musculus,LTDEELVTMSVRELNQHLRGLSKEEIIQLKQRRRTLKNRGY,41,True,1429,762,667,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-25,1-41,Transcription factor MafG,Stability,cDNA display proteolysis,MAFG_MOUSE_2023-08-07_b07.a2m,1,41,41,0.7,0.2,6178,1.0,41.0,156.7,3.82195122,Medium,4.0,0.09756097561,Tsuboyama2023_Dataset2_Dataset22,ddG_ML_float,1,mut_type,MAFG_MOUSE_theta0.2_2023-08-07_b07.npy,MAFG_MOUSE.pdb,1.0,,Stability -MBD11_ARATH_Tsuboyama_2023_6ACV,MBD11_ARATH_Tsuboyama_2023_6ACV.csv,MBD11_ARATH,Eukaryote,Arabidopsis thaliana,VSVELPAPSSWKKLFYPNKVGSVKKTEVVFVAPTGEEISNRKQLEQYLKSHPGNPAIAEFDWTTSG,66,True,2116,1155,961,-1.578921171,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-26,1-66,Methyl-CpG-binding domain-containing protein 11,Stability,cDNA display proteolysis,MBD11_ARATH_2023-08-07_b03.a2m,1,66,66,0.3,0.2,26035,0.909,60.0,1510.5,25.175,Medium,11.0,0.1833333333,Tsuboyama2023_Dataset2_Dataset23,ddG_ML_float,1,mut_type,MBD11_ARATH_theta0.2_2023-08-07_b03.npy,MBD11_ARATH.pdb,1.0,,Stability -MET_HUMAN_Estevam_2023,MET_HUMAN_Estevam_2023.csv,MET_HUMAN,Human,Homo sapiens,NPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFI,287,False,5393,5393,0,-2.0,manual,Estevam,Conserved regulatory motifs in the juxtamembrane domain and kinase N-lobe revealed through deep mutational scanning of the MET receptor tyrosine kinase domain,2023,10.1101/2023.08.03.551866,1-287,MET RTK,Human cell line with growth linked to kinase activity in the absense of IL-3,Growth,MET_HUMAN_2023-08-07_b09.a2m,1,287,287,0.9,0.2,185885,0.951,273.0,5338.5,19.5549451,Medium,200.0,0.73,ex14_scores.csv,IL3_withdrawal_score,1,mutant,MET_HUMAN_theta0.2_2023-08-07_b09.npy,MET_HUMAN.pdb,1.0,,Activity -MK01_HUMAN_Brenan_2016,MK01_HUMAN_Brenan_2016.csv,MK01_HUMAN,Human,Homo sapiens,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,360,False,6809,6809,0,-8.040790936,median,Brenan,Phenotypic Characterization of a Comprehensive Set of MAPK1 /ERK2 Missense Mutants,2016,10.1016/j.celrep.2016.09.061,2-360,MAPK1,Growth,inhibitor resistance,MK01_HUMAN_full_11-26-2021_b06.a2m,1,360,360,0.6,0.2,124248,0.806,290.0,8815.9,30.39965517,medium,287.0,0.9896551724,MK01_HUMAN_Brenan_2016.csv,DOX_Average,-1,mutant,MK01_HUMAN_theta_0.2.npy,MK01_HUMAN.pdb,0.1,,OrganismalFitness -MLAC_ECOLI_MacRae_2023,MLAC_ECOLI_MacRae_2023.csv,MLAC_ECOLI,Prokaryote,Escherichia coli,MFKRLMMVALLVIAPLSAATAADQTNPYKLMDEAAQKTFDRLKNEQPQIRANPDYLRTIVDQELLPYVQVKYAGALVLGQYYKSATPAQREAYFAAFREYLKQAYGQALAMYHGQTYQIAPEQPLGDKTIVPIRVTIIDPNGRPPVRLDFQWRKNSQTGNWQAYDMIAEGVSMITTKQNEWGTLLRTKGIDGLTAQLKSISQQKITLEEKK,211,False,4007,4007,0,-0.1041157905,median,MacRae,Protein-protein interactions in the Mla lipid transport system probed by computational structure prediction and deep mutational scanning,2023,10.1016/j.jbc.2023.104744,1-211,MlaC lipid transporter,cell growth in ∆mlaC and selective medium,Growth,MLAC_ECOLI_2023-08-07_b02.a2m,1,211,211,0.2,0.2,22874,0.934,197.0,7904.3,40.12335025,Medium,126.0,0.6395939086,MLAC_ECOLI_MacRae_2023.csv,score,1,mutant,MLAC_ECOLI_theta0.2_2023-08-07_b02.npy,MLAC_ECOLI.pdb,1.0,,OrganismalFitness -MSH2_HUMAN_Jia_2020,MSH2_HUMAN_Jia_2020.csv,MSH2_HUMAN,Human,Homo sapiens,MAVQPKETLQLESAAEVGFVRFFQGMPEKPTTTVRLFDRGDFYTAHGEDALLAAREVFKTQGVIKYMGPAGAKNLQSVVLSKMNFESFVKDLLLVRQYRVEVYKNRAGNKASKENDWYLAYKASPGNLSQFEDILFGNNDMSASIGVVGVKMSAVDGQRQVGVGYVDSIQRKLGLCEFPDNDQFSNLEALLIQIGPKECVLPGGETAGDMGKLRQIIQRGGILITERKKADFSTKDIYQDLNRLLKGKKGEQMNSAVLPEMENQVAVSSLSAVIKFLELLSDDSNFGQFELTTFDFSQYMKLDIAAVRALNLFQGSVEDTTGSQSLAALLNKCKTPQGQRLVNQWIKQPLMDKNRIEERLNLVEAFVEDAELRQTLQEDLLRRFPDLNRLAKKFQRQAANLQDCYRLYQGINQLPNVIQALEKHEGKHQKLLLAVFVTPLTDLRSDFSKFQEMIETTLDMDQVENHEFLVKPSFDPNLSELREIMNDLEKKMQSTLISAARDLGLDPGKQIKLDSSAQFGYYFRVTCKEEKVLRNNKNFSTVDIQKNGVKFTNSKLTSLNEEYTKNKTEYEEAQDAIVKEIVNISSGYVEPMQTLNDVLAQLDAVVSFAHVSNGAPVPYVRPAILEKGQGRIILKASRHACVEVQDEIAFIPNDVYFEKDKQMFHIITGPNMGGKSTYIRQTGVIVLMAQIGCFVPCESAEVSIVDCILARVGAGDSQLKGVSTFMAEMLETASILRSATKDSLIIIDELGRGTSTYDGFGLAWAISEYIATKIGAFCMFATHFHELTALANQIPTVNNLHVTALTTEETLTMLYQVKKGVCDQSFGIHVAELANFPKHVIECAKQKALELEEFQYIGESQGYDIMEPAAKKCYLEREQGEKIIQEFLSKVKQMPFTEMSEENITIKLKQLKAEVIAKNNSFVNEIISRIKVTT,934,False,16749,16749,0,1.0,manual,Jia,Massively parallel functional testing of MSH2 missense variants conferring Lynch Syndrome risk,2020,10.1016/j.ajhg.2020.12.003,1-934,MSH2,"drug resistance (surrogate for protein activity, 6-thioguanine (6-TG))",,MSH2_HUMAN_full_11-26-2021_b05.a2m,1,934,934,0.5,0.2,61226,0.901,842.0,10716.4,12.72731591,medium,1035.0,1.229216152,MSH2_HUMAN_Jia_2020.csv,LOF score,-1,Variant,MSH2_HUMAN_theta_0.2.npy,MSH2_HUMAN.pdb,0.1,,OrganismalFitness -MTH3_HAEAE_RockahShmuel_2015,MTH3_HAEAE_RockahShmuel_2015.csv,MTH3_HAEAE,Prokaryote,Haemophilus aegyptius,MNLISLFSGAGGLDLGFQKAGFRIIAANEYDKSIWKTYESNHSAKLIKGDISKISSDEFPKCDGIIGGPPCQSWSEGGSLRGIDDPRGKLFYEYIRILKQKKPKFFLAENVKGMLAQRHNKAVQEFIQEFDNAGYDVHIILLNANDYGVAQDRKRVFYIGFRKELNINYLPPIPHLIKPTLKDVIWDLKDNPIPALDKNKTNGNKCIYPNHEYFIGSYSTIFMSRNRVRQWNEPAFTVQASGRQCQLHPQAPVMLKVSKNLNKFVEGKEHLYRRLTVRECARVQGFPDDFIFHYESLNDGYKMIGNAVPVNLAYEIAKTIKSALEIRKGN,330,False,1777,1777,0,0.01,manual,Rockah-Shmuel,Systematic Mapping of Protein Mutational Space by Prolonged Drift Reveals the Deleterious Effects of Seemingly Neutral Mutations,2015,10.1371/journal.pcbi.1004421,2-330,DNA methylase HaeIII,Growth,Activity,MTH3_HAEAE_full_11-26-2021_b02.a2m,1,330,330,0.2,0.2,82734,0.891,294.0,26962.4,91.70884354,medium,582.0,1.979591837,MTH3_HAEAE_Rockah-Shmuel_2015.csv,Wrel_G17_filtered,1,mutant,MTH3_HAEAE_theta_0.2.npy,MTH3_HAEAE.pdb,0.1,,OrganismalFitness -MTHR_HUMAN_Weile_2021,MTHR_HUMAN_Weile_2021.csv,MTHR_HUMAN,Human,Homo sapiens,MVNEARGNSSLNPCLEGSASSGSESSKDSSRCSTPGLDPERHERLREKMRRRLESGDKWFSLEFFPPRTAEGAVNLISRFDRMAAGGPLYIDVTWHPAGDPGSDKETSSMMIASTAVNYCGLETILHMTCCRQRLEEITGHLHKAKQLGLKNIMALRGDPIGDQWEEEEGGFNYAVDLVKHIRSEFGDYFDICVAGYPKGHPEAGSFEADLKHLKEKVSAGADFIITQLFFEADTFFRFVKACTDMGITCPIVPGIFPIQGYHSLRQLVKLSKLEVPQEIKDVIEPIKDNDAAIRNYGIELAVSLCQELLASGLVPGLHFYTLNREMATTEVLKRLGMWTEDPRRPLPWALSAHPKRREEDVRPIFWASRPKSYIYRTQEWDEFPNGRWGNSSSPAFGELKDYYLFYLKSKSPKEELLKMWGEELTSEESVFEVFVLYLSGEPNRNGHKVTCLPWNDEPLAAETSLLKEELLRVNRQGILTINSQPNINGKPSSDPIVGWGPSGGYVFQKAYLEFFTSRETAEALLQVLKKYELRVNYHLVNVKGENITNAPELQPNAVTWGIFPGREIIQPTVVDPVSFMFWKDEAFALWIERWGKLYEEESPSRTIIQYIHDNYFLVNLVDNDFPLDNCLWQVVEDTLELLNRPTQNARETEAP,656,False,12464,12464,0,0.746,median,Weile,Shifting landscapes of human MTHFR missense-variant effects,2021,10.1016/j.ajhg.2021.05.009,1-656,MTHFR reductase,Growth,,MTHR_HUMAN_2023-08-07_b02.a2m,1,656,656,0.2,0.2,4783,0.96,630.0,614.5,0.9753968254,Low,65.0,0.1031746032,urn_mavedb_00000049-a-6_scores.csv,score,1,mutant,MTHR_HUMAN_theta0.2_2023-08-07_b02.npy,MTHR_HUMAN.pdb,1.0,,OrganismalFitness -MYO3_YEAST_Tsuboyama_2023_2BTT,MYO3_YEAST_Tsuboyama_2023_2BTT.csv,MYO3_YEAST,Eukaryote,Saccharomyces cerevisiae,KDPKFEAAYDFPGSGSSSELPLKKGDIVFISRDEPSGWSLAKLLDGSKEGWVPTAYMTPYK,61,True,3297,947,2350,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-27,1-61,Myosin-3,Stability,cDNA display proteolysis,MYO3_YEAST_2023-08-07_b07.a2m,1,61,61,0.7,0.2,442941,0.885,54.0,12893.2,238.762963,High,51.0,0.9444444444,Tsuboyama2023_Dataset2_Dataset24,ddG_ML_float,1,mut_type,MYO3_YEAST_theta0.2_2023-08-07_b07.npy,MYO3_YEAST.pdb,1.0,,Stability -NCAP_I34A1_Doud_2015,NCAP_I34A1_Doud_2015.csv,NCAP_I34A1,Virus,"Influenza A virus (strain A/Puerto Rico/8/1934 H1N1), Influenza A virus (strain A/Aichi/2/1968 H3N2)",MASQGTKRSYEQMETDGERQNATEIRASVGKMIGGIGRFYIQMCTELKLSDYEGRLIQNSLTIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYRRVNGKWMRELILYDKEEIRRIWRQANNGDDATAGLTHMMIWHSNLNDATYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELVRMIKRGINDRNFWRGENGRKTRIAYERMCNILKGKFQTAAQKAMMDQVRESRNPGNAEFEDLTFLARSALILRGSVAHKSCLPACVYGPAVASGYDFEREGYSLVGIDPFRLLQNSQVYSLIRPNENPAHKSQLVWMACHSAAFEDLRVLSFIKGTKVLPRGKLSTRGVQIASNENMETMESSTLELRSRYWAIRTRSGGNTNQQRASAGQISIQPTFSVQRNLPFDRTTIMAAFNGNTEGRTSDMRTEIIRMMESARPEDVSFQGRGVFELSDEKAASPIVPSFDMSNEGSYFFGDNAEEYDN,498,False,9462,9462,0,-2.872717233,median,Doud,Site-Specific Amino Acid Preferences Are Mostly Conserved in Two Closely Related Protein Homologs,2015,10.1093/molbev/msv167,1-498,Influenza nucleoprotein,,Growth,NCAP_I34A1_theta0.99_full_11-26-2021_b09.a2m,1,498,498,0.9,0.01,15390,1.0,498.0,1493.2,2.998393574,medium,2116.0,4.248995984,NCAP_I34A1_Doud_2015.csv,log_fitness_by_syn_mut_fitness,1,mutant,NCAP_I34A1_theta_0.01.npy,NCAP_I34A1.pdb,0.1,,OrganismalFitness -NKX31_HUMAN_Tsuboyama_2023_2L9R,NKX31_HUMAN_Tsuboyama_2023_2L9R.csv,NKX31_HUMAN,Human,Homo sapiens,HSHMSHTQVIELERKFSHQKYLSAPERAHLAKNLKLTETQVKIWFQNRRYKTKRKQLSSEL,61,True,2482,1149,1333,-0.3,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-28,1-61,Homeobox protein Nkx-3.1,Stability,cDNA display proteolysis,NKX31_HUMAN_2023-08-07_b04.a2m,1,61,61,0.4,0.2,319273,0.902,55.0,8440.8,153.4690909,High,27.0,0.4909090909,Tsuboyama2023_Dataset2_Dataset25,ddG_ML_float,1,mut_type,NKX31_HUMAN_theta0.2_2023-08-07_b04.npy,NKX31_HUMAN.pdb,1.0,,Stability -NPC1_HUMAN_Erwood_2022_HEK293T,NPC1_HUMAN_Erwood_2022_HEK293T.csv,NPC1_HUMAN,Human,Homo sapiens,MTARGLALGLLLLLLCPAQVFSQSCVWYGECGIAYGDKRYNCEYSGPPKPLPKDGYDLVQELCPGFFFGNVSLCCDVRQLQTLKDNLQLPLQFLSRCPSCFYNLLNLFCELTCSPRQSQFLNVTATEDYVDPVTNQTKTNVKELQYYVGQSFANAMYNACRDVEAPSSNDKALGLLCGKDADACNATNWIEYMFNKDNGQAPFTITPVFSDFPVHGMEPMNNATKGCDESVDEVTAPCSCQDCSIVCGPKPQPPPPPAPWTILGLDAMYVIMWITYMAFLLVFFGAFFAVWCYRKRYFVSEYTPIDSNIAFSVNASDKGEASCCDPVSAAFEGCLRRLFTRWGSFCVRNPGCVIFFSLVFITACSSGLVFVRVTTNPVDLWSAPSSQARLEKEYFDQHFGPFFRTEQLIIRAPLTDKHIYQPYPSGADVPFGPPLDIQILHQVLDLQIAIENITASYDNETVTLQDICLAPLSPYNTNCTILSVLNYFQNSHSVLDHKKGDDFFVYADYHTHFLYCVRAPASLNDTSLLHDPCLGTFGGPVFPWLVLGGYDDQNYNNATALVITFPVNNYYNDTEKLQRAQAWEKEFINFVKNYKNPNLTISFTAERSIEDELNRESDSDVFTVVISYAIMFLYISLALGHMKSCRRLLVDSKVSLGIAGILIVLSSVACSLGVFSYIGLPLTLIVIEVIPFLVLAVGVDNIFILVQAYQRDERLQGETLDQQLGRVLGEVAPSMFLSSFSETVAFFLGALSVMPAVHTFSLFAGLAVFIDFLLQITCFVSLLGLDIKRQEKNRLDIFCCVRGAEDGTSVQASESCLFRFFKNSYSPLLLKDWMRPIVIAIFVGVLSFSIAVLNKVDIGLDQSLSMPDDSYMVDYFKSISQYLHAGPPVYFVLEEGHDYTSSKGQNMVCGGMGCNNDSLVQQIFNAAQLDNYTRIGFAPSSWIDDYFDWVKPQSSCCRVDNITDQFCNASVVDPACVRCRPLTPEGKQRPQGGDFMRFLPMFLSDNPNPKCGKGGHAAYSSAVNILLGHGTRVGATYFMTYHTVLQTSADFIDALKKARLIASNVTETMGINGSAYRVFPYSVFYVFYEQYLTIIDDTIFNLGVSLGAIFLVTMVLLGCELWSAVIMCATIAMVLVNMFGVMWLWGISLNAVSLVNLVMSCGISVEFCSHITRAFTVSMKGSRVERAEEALAHMGSSVFSGITLTKFGGIVVLAFAKSQIFQIFYFRMYLAMVLLGATHGLIFLPVLLSYIGPSVNKAKSCATEERYKGTERERLLNF,1278,False,637,637,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,347-1190,NPC intracellular cholesterol transporter,Fluorescence measurement,Flow Cytometry Assay,NPC1_HUMAN_2023-10-12_b07.a2m,1,1278,1278,0.7,0.2,6333,0.987,1261.0,918.9,0.7287073751,Low,137.0,0.1086439334,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,NPC1_HUMAN_theta0.2_2023-10-12_b07.npy,NPC1_HUMAN.pdb,1.0,,Activity -NPC1_HUMAN_Erwood_2022_RPE1,NPC1_HUMAN_Erwood_2022_RPE1.csv,NPC1_HUMAN,Human,Homo sapiens,MTARGLALGLLLLLLCPAQVFSQSCVWYGECGIAYGDKRYNCEYSGPPKPLPKDGYDLVQELCPGFFFGNVSLCCDVRQLQTLKDNLQLPLQFLSRCPSCFYNLLNLFCELTCSPRQSQFLNVTATEDYVDPVTNQTKTNVKELQYYVGQSFANAMYNACRDVEAPSSNDKALGLLCGKDADACNATNWIEYMFNKDNGQAPFTITPVFSDFPVHGMEPMNNATKGCDESVDEVTAPCSCQDCSIVCGPKPQPPPPPAPWTILGLDAMYVIMWITYMAFLLVFFGAFFAVWCYRKRYFVSEYTPIDSNIAFSVNASDKGEASCCDPVSAAFEGCLRRLFTRWGSFCVRNPGCVIFFSLVFITACSSGLVFVRVTTNPVDLWSAPSSQARLEKEYFDQHFGPFFRTEQLIIRAPLTDKHIYQPYPSGADVPFGPPLDIQILHQVLDLQIAIENITASYDNETVTLQDICLAPLSPYNTNCTILSVLNYFQNSHSVLDHKKGDDFFVYADYHTHFLYCVRAPASLNDTSLLHDPCLGTFGGPVFPWLVLGGYDDQNYNNATALVITFPVNNYYNDTEKLQRAQAWEKEFINFVKNYKNPNLTISFTAERSIEDELNRESDSDVFTVVISYAIMFLYISLALGHMKSCRRLLVDSKVSLGIAGILIVLSSVACSLGVFSYIGLPLTLIVIEVIPFLVLAVGVDNIFILVQAYQRDERLQGETLDQQLGRVLGEVAPSMFLSSFSETVAFFLGALSVMPAVHTFSLFAGLAVFIDFLLQITCFVSLLGLDIKRQEKNRLDIFCCVRGAEDGTSVQASESCLFRFFKNSYSPLLLKDWMRPIVIAIFVGVLSFSIAVLNKVDIGLDQSLSMPDDSYMVDYFKSISQYLHAGPPVYFVLEEGHDYTSSKGQNMVCGGMGCNNDSLVQQIFNAAQLDNYTRIGFAPSSWIDDYFDWVKPQSSCCRVDNITDQFCNASVVDPACVRCRPLTPEGKQRPQGGDFMRFLPMFLSDNPNPKCGKGGHAAYSSAVNILLGHGTRVGATYFMTYHTVLQTSADFIDALKKARLIASNVTETMGINGSAYRVFPYSVFYVFYEQYLTIIDDTIFNLGVSLGAIFLVTMVLLGCELWSAVIMCATIAMVLVNMFGVMWLWGISLNAVSLVNLVMSCGISVEFCSHITRAFTVSMKGSRVERAEEALAHMGSSVFSGITLTKFGGIVVLAFAKSQIFQIFYFRMYLAMVLLGATHGLIFLPVLLSYIGPSVNKAKSCATEERYKGTERERLLNF,1278,False,63,63,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,420-920,NPC intracellular cholesterol transporter,Fluorescence measurement,Flow Cytometry Assay,NPC1_HUMAN_2023-10-12_b07.a2m,1,1278,1278,0.7,0.2,6333,0.987,1261.0,918.9,0.7287073751,Low,137.0,0.1086439334,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,NPC1_HUMAN_theta0.2_2023-10-12_b07.npy,NPC1_HUMAN.pdb,1.0,,Activity -NRAM_I33A0_Jiang_2016,NRAM_I33A0_Jiang_2016.csv,NRAM_I33A0,Virus,Influenza A virus (A/WSN/1933(H1N1)),MNPNQKIITIGSICMVVGIISLILQIGNIISIWISHSIQTGNQNHTGICNQGIITYNVVAGQDSTSVILTGNSSLCPIRGWAIHSKDNGIRIGSKGDVFVIREPFISCSHLECRTFFLTQGALLNDKHSNGTVKDRSPYRALMSCPVGEAPSPYNSRFESVAWSASACHDGMGWLTIGISGPDNGAVAVLKYNGIITETIKSWRKKILRTQESECTCVNGSCFTIMTDGPSNGLASYKIFKIEKGKVTKSIELNAPNSHYEECSCYPDTGKVMCVCRDNWHGSNRPWVSFDQNLDYQIGYICSGVFGDNPRPKDGPGSCGPVSADGANGVKGFSYRYGNGVWIGRTKSDSSRHGFEMIWDPNGWTETDSRFSVRQDVVAMTDRSGYSGSFVQHPELTGLDCMRPCFWVELIRGRPEEETIWTSGSIISFCGVNSDTVDWSWPDGAELPFTIDK,453,False,298,298,0,-0.7772013612,median,Jiang,A Balance between Inhibitor Binding and Substrate Processing Confers Influenza Drug Resistance,2016,10.1016/j.jmb.2015.11.027,67-285,Influenza neuraminidase,,Growth,NRAM_I33A0_full_11-26-2021_b01.a2m,1,453,453,0.1,0.01,47174,0.976,442.0,33.1,0.07488687783,low,0.0,0.0,NRAM_I33A0_Jiang_2016.csv,Standard Conditions,1,mutant,NRAM_I33A0_theta_0.01.npy,NRAM_I33A0.pdb,0.1,,OrganismalFitness -NUD15_HUMAN_Suiter_2020,NUD15_HUMAN_Suiter_2020.csv,NUD15_HUMAN,Human,Homo sapiens,MTASAQPRGRRPGVGVGVVVTSCKHPRCVLLGKRKGSVGAGSFQLPGGHLEFGETWEECAQRETWEEAALHLKNVHFASVVNSFIEKENYHYVTILMKGEVDVTHDSEPKNVEPEKNESWEWVPWEELPPLDQLFWGLRCLKEQGYDPFKEDLNHLVGYKGNHL,164,False,2844,2844,0,0.25,manual,Suiter,Massively parallel variant characterization identifies NUDT15 alleles associated with thiopurine toxicity,2020,10.1073/pnas.1915680117,2-164,NUDT15,,"VAMP-seq, drug sensitivity",NUD15_HUMAN_full_11-26-2021_b04.a2m,1,164,164,0.4,0.2,153922,0.72,118.0,43847.8,371.5915254,high,151.0,1.279661017,NUD15_HUMAN_Suiter_2020.csv,Final NUDT15 activity Score,1,mutant,NUD15_HUMAN_theta_0.2.npy,NUD15_HUMAN.pdb,0.1,,Expression -NUSA_ECOLI_Tsuboyama_2023_1WCL,NUSA_ECOLI_Tsuboyama_2023_1WCL.csv,NUSA_ECOLI,Prokaryote,Escherichia coli,EAHAAIDTFTKYLDIDEDFATVLVEEGFSTLEELAYVPMKELLEIEGLDEPTVEALRERAKNALATIAQ,69,True,2028,1306,722,-1.318069467,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-29,1-69,Transcription termination/antitermination protein NusA,Stability,cDNA display proteolysis,NUSA_ECOLI_2023-08-07_b03.a2m,1,69,69,0.3,0.2,205612,0.812,56.0,39002.5,696.4732143,High,38.0,0.6785714286,Tsuboyama2023_Dataset2_Dataset26,ddG_ML_float,1,mut_type,NUSA_ECOLI_theta0.2_2023-08-07_b03.npy,NUSA_ECOLI.pdb,1.0,,Stability -NUSG_MYCTU_Tsuboyama_2023_2MI6,NUSG_MYCTU_Tsuboyama_2023_2MI6.csv,NUSG_MYCTU,Prokaryote,Mycobacterium tuberculosis,DYEVGESVTVMDGPFATLPATISEVNAEQQKLKVLVSIFGRETPVELTFGQVSKI,55,True,1380,1019,361,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-30,1-55,Transcription termination/antitermination protein NusG,Stability,cDNA display proteolysis,NUSG_MYCTU_2023-08-07_b03.a2m,1,55,55,0.3,0.2,102004,0.964,53.0,16625.7,313.6924528,High,41.0,0.7735849057,Tsuboyama2023_Dataset2_Dataset27,ddG_ML_float,1,mut_type,NUSG_MYCTU_theta0.2_2023-08-07_b03.npy,NUSG_MYCTU.pdb,1.0,,Stability -OBSCN_HUMAN_Tsuboyama_2023_1V1C,OBSCN_HUMAN_Tsuboyama_2023_1V1C.csv,OBSCN_HUMAN,Human,Homo sapiens,FDIYVVTADYLPLGAEQDAITLREGQYVEVLDAAHPLRWLVRTKPTKSSPSRQGWVSPAYLDRRL,65,True,3197,1213,1984,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-31,1-65,Obscurin,Stability,cDNA display proteolysis,OBSCN_HUMAN_2023-08-07_b02.a2m,1,65,65,0.2,0.2,718751,0.815,53.0,23710.7,447.3716981,High,54.0,1.018867925,Tsuboyama2023_Dataset2_Dataset28,ddG_ML_float,1,mut_type,OBSCN_HUMAN_theta0.2_2023-08-07_b02.npy,OBSCN_HUMAN.pdb,1.0,,Stability -ODP2_GEOSE_Tsuboyama_2023_1W4G,ODP2_GEOSE_Tsuboyama_2023_1W4G.csv,ODP2_GEOSE,Prokaryote,Geobacillus stearothermophilus,NRRVIAMPSVRKWAREKGVDIRLVQGTGKNGRVLKEDIDAFLAG,44,True,1134,669,465,-0.4168227551,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-32,1-44,Dihydrolipoyllysine-residue acetyltransferase component of pyruvate dehydrogenase complex,Stability,cDNA display proteolysis,ODP2_GEOSE_2023-08-07_b07.a2m,1,44,44,0.7,0.2,163835,0.909,40.0,14834.6,370.865,High,21.0,0.525,Tsuboyama2023_Dataset2_Dataset29,ddG_ML_float,1,mut_type,ODP2_GEOSE_theta0.2_2023-08-07_b07.npy,ODP2_GEOSE.pdb,1.0,,Stability -OPSD_HUMAN_Wan_2019,OPSD_HUMAN_Wan_2019.csv,OPSD_HUMAN,Human,Homo sapiens,MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA,348,False,165,165,0,0.5795144905,median,Wan,Characterizing variants of unknown significance in rhodopsin: A functional genomics approach,2019,10.1002/humu.23762,4-347,Rhodopsin,Expression,Flow Cytometry Assay,OPSD_HUMAN_2023-10-12_b04.a2m,1,348,348,0.4,0.2,342311,0.876,305.0,36900.5,120.9852459,High,247.0,0.8098360656,urn_mavedb_00000099-a-1_scores.csv,score,1,mutant,OPSD_HUMAN_theta0.2_2023-10-12_b04.npy,OPSD_HUMAN.pdb,1.0,,Expression -OTC_HUMAN_Lo_2023,OTC_HUMAN_Lo_2023.csv,OTC_HUMAN,Human,Homo sapiens,MLFNLRILLNNAAFRNGHNFMVRNFRCGQPLQNKVQLKGRDLLTLKNFTGEEIKYMLWLSADLKFRIKQKGEYLPLLQGKSLGMIFEKRSTRTRLSTETGFALLGGHPCFLTTQDIHLGVNESLTDTARVLSSMADAVLARVYKQSDLDTLAKEASIPIINGLSDLYHPIQILADYLTLQEHYSSLKGLTLSWIGDGNNILHSIMMSAAKFGMHLQAATPKGYEPDASVTKLAEQYAKENGTKLLLTNDPLEAAHGGNVLITDTWISMGQEEEKKKRLQAFQGYQVTMKTAKVAASDWTFLHCLPRKPEEVDDEVFYSPRSLVFPEAENRKWTIMAVMVSLLTDYSPQLQKPKF,354,False,1570,1570,0,0.417,median,Lo,"The functional impact of 1,570 individual amino acid substitutions in human OTC",2023,10.1016/j.ajhg.2023.03.019,33-354,OTC,Enzymatic activity,,OTC_HUMAN_2023-08-07_b02.a2m,1,354,354,0.2,0.2,135607,0.87,308.0,18646.2,60.53961039,Medium,641.0,2.081168831,urn_mavedb_00000112-a-1_scores.csv,DMS_score,1,mutant,OTC_HUMAN_theta0.2_2023-08-07_b02.npy,OTC_HUMAN.pdb,1.0,,Activity -OTU7A_HUMAN_Tsuboyama_2023_2L2D,OTU7A_HUMAN_Tsuboyama_2023_2L2D.csv,OTU7A_HUMAN,Human,Homo sapiens,TLDMDAVLSDFVRSTGAEPGLARDLLEGKNWDLTAALSDYEQ,42,False,635,635,0,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-33,1-42,OTU domain-containing protein 7A,Stability,cDNA display proteolysis,OTU7A_HUMAN_2023-08-07_b02.a2m,1,42,42,0.2,0.2,1359071,0.881,37.0,514715.2,13911.22162,High,28.0,0.7567567568,Tsuboyama2023_Dataset2_Dataset30,ddG_ML_float,1,mut_type,OTU7A_HUMAN_theta0.2_2023-08-07_b02.npy,OTU7A_HUMAN.pdb,1.0,,Stability -OXDA_RHOTO_Vanella_2023_activity,OXDA_RHOTO_Vanella_2023_activity.csv,OXDA_RHOTO,Eukaryote,Rhodotorula gracilis,HSQKRVVVLGSGVIGLSSALILARKGYSVHILARDLPEDVSSQTFASPWAGANWTPFMTLTDGPRQAKWEESTFKKWVELVPTGHAMWLKGTRRFAQNEDGLLGHWYKDITPNYRPLPSSECPPGAIGVTYDTLSVHAPKYCQYLARELQKLGATFERRTVTSLEQAFDGADLVVNATGLGAKSIAGIDDQAAEPIRGQTVLVKSPCKRCTMDSSDPASPAYIIPRPGGEVICGGTYGVGDWDLSVNPETVQRILKHCLRLDPTISSDGTIEGIEVLRHNVGLRPARRGGPRVEAERIVLPLDRTKSPLSLGRGSARAAKEKEVTLVHAYGFSSAGYQQSWGAAEDVAQLVDEAFQRYHGAARE,364,False,6396,6396,0,-0.2,manual,Vanella,Understanding Activity-Stability Tradeoffs in Biocatalysts by Enzyme Proximity Sequencing,2023,10.1101/2023.02.24.529916,1-364,D-amino acid oxidase (DAOx),fluorescent label of enzyme product,FACS,OXDA_RHOTO_2023-08-07_b02.a2m,1,364,364,0.2,0.2,520184,0.876,319.0,98000.4,307.2112853,High,892.0,2.796238245,Figure_2.xlsx,activity fitness,1,mutant,OXDA_RHOTO_theta0.2_2023-08-07_b02.npy,OXDA_RHOTO.pdb,1.0,,Activity -OXDA_RHOTO_Vanella_2023_expression,OXDA_RHOTO_Vanella_2023_expression.csv,OXDA_RHOTO,Eukaryote,Rhodotorula gracilis,HSQKRVVVLGSGVIGLSSALILARKGYSVHILARDLPEDVSSQTFASPWAGANWTPFMTLTDGPRQAKWEESTFKKWVELVPTGHAMWLKGTRRFAQNEDGLLGHWYKDITPNYRPLPSSECPPGAIGVTYDTLSVHAPKYCQYLARELQKLGATFERRTVTSLEQAFDGADLVVNATGLGAKSIAGIDDQAAEPIRGQTVLVKSPCKRCTMDSSDPASPAYIIPRPGGEVICGGTYGVGDWDLSVNPETVQRILKHCLRLDPTISSDGTIEGIEVLRHNVGLRPARRGGPRVEAERIVLPLDRTKSPLSLGRGSARAAKEKEVTLVHAYGFSSAGYQQSWGAAEDVAQLVDEAFQRYHGAARE,364,False,6769,6769,0,-0.2,manual,Vanella,Understanding Activity-Stability Tradeoffs in Biocatalysts by Enzyme Proximity Sequencing,2023,10.1101/2023.02.24.529916,1-364,D-amino acid oxidase (DAOx),cell surface expression,FACS,OXDA_RHOTO_2023-08-07_b02.a2m,1,364,364,0.2,0.2,520184,0.876,319.0,98000.4,307.2112853,High,892.0,2.796238245,Figure_2.xlsx,expression fitness,1,mutant,OXDA_RHOTO_theta0.2_2023-08-07_b02.npy,OXDA_RHOTO.pdb,1.0,,Expression -P53_HUMAN_Giacomelli_2018_Null_Etoposide,P53_HUMAN_Giacomelli_2018_Null_Etoposide.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,False,7467,7467,0,-0.5,manual,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337.0,153.2,0.4545994065,low,7.0,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53NULL_Etoposide_Z-score,1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,0.1,,OrganismalFitness -P53_HUMAN_Giacomelli_2018_Null_Nutlin,P53_HUMAN_Giacomelli_2018_Null_Nutlin.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,False,7467,7467,0,0.04438920187,median,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337.0,153.2,0.4545994065,low,7.0,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53NULL_Nutlin-3_Z-score,-1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,0.1,,OrganismalFitness -P53_HUMAN_Giacomelli_2018_WT_Nutlin,P53_HUMAN_Giacomelli_2018_WT_Nutlin.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,False,7467,7467,0,-1.0,manual,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337.0,153.2,0.4545994065,low,7.0,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53WT_Nutlin-3_Z-score,-1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,0.1,,OrganismalFitness -P53_HUMAN_Kotler_2018,P53_HUMAN_Kotler_2018.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,False,1048,1048,0,1.0,manual,Kotler,A Systematic p53 Mutation Library Links Differential Functional Impact to Cancer Mutation Pattern and Evolutionary Conservation,2018,10.1016/j.molcel.2018.06.012,102-292,p53,growth,Growth,P53_HUMAN_full_11-26-2021_b09.a2m,1,393,393,0.9,0.2,4129,0.863,339.0,148.0,0.4365781711,low,15.0,0.04424778761,P53_HUMAN_Kotler_2018.csv,RFS_H1299,-1,mutant,P53_HUMAN_Kotler_theta_0.2.npy,P53_HUMAN.pdb,0.1,,OrganismalFitness -P84126_THETH_Chan_2017,P84126_THETH_Chan_2017.csv,P84126_THETH,Prokaryote,Thermus thermophilus,MRPDLSRVPGVLGEIARKRASEVAPYPLPEPPSVPSFKEALLRPGLSVIAEVKRQSPSEGLIREVDPVEAALAYARGGARAVSVLTEPHRFGGSLLDLKRVREAVDLPLLRKDFVVDPFMLEEARAFGASAALLIVALLGELTGAYLEEARRLGLEALVEVHTERELEIALEAGAEVLGINNRDLATLHINLETAPRLGRLARKRGFGGVLVAESGYSRKEELKALEGLFDAVLIGTSLMRAPDLEAALRELVG,254,False,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,44-238,TIM Barrell (T. thermophilus),fitness,Growth,P84126_THETH_full_11-26-2021_b04.a2m,1,254,254,0.4,0.2,53441,0.941,239.0,10704.6,44.78912134,medium,390.0,1.631799163,P84126_THETH_Chan_2017.csv,fitness,1,mutant,P84126_THETH_theta_0.2.npy,P84126_THETH.pdb,0.1,,OrganismalFitness -PA_I34A1_Wu_2015,PA_I34A1_Wu_2015.csv,PA_I34A1,Virus,influenza subtype?,MEDFVRQCFNPMIVELAEKAMKEYGEDLKIETNKFAAICTHLEVCFMYSDFHFIDEQGESIVVELGDPNALLKHRFEIIEGRDRTIAWTVVNSICNTTGAEKPKFLPDLYDYKKNRFIEIGVTRREVHIYYLEKANKIKSEKTHIHIFSFTGEEMATKADYTLDEESRARIKTRLFTIRQEMASRGLWDSFRQSERGEETIEERFEITGTMRKLADQSLPPNFSSLEKFRAYVDGFEPNGYIEGKLSQMSKEVNARIEPFLKSTPRPLRLPDGPPCSQRSKFLLMDALKLSIEDPSHEGEGIPLYDAIKCMRTFFGWKEPNVVKPHEKGINPNYLLSWKQVLAELQDIENEEKIPRTKNMKKTSQLKWALGENMAPEKVDFDDCKDVGDLKQYDSDEPELRSLASWIQNEFNKACELTDSSWIELDEIGEDAAPIEHIASMRRNYFTAEVSHCRATEYIMKGVYINTALLNASCAAMDDFQLIPMISKCRTKEGRRKTNLYGFIIKGRSHLRNDTDVVNFVSMEFSLTDPRLEPHKWEKYCVLEVGDMLLRSAIGHVSRPMFLYVRTNGTSKIKMKWGMEMRRCLLQSLQQIESMIEAESSVKEKDMTKEFFENKSETWPVGESPKGVEEGSIGKVCRTLLAKSVFNSLYASPQLEGFSAESRKLLLIVQALRDNLEPGTFDLGGLYEAIEECLINDPWVLLNASWFNSFLTHALR,716,False,1820,1820,0,0.290683953,median,Wu,Functional Constraint Profiling of a Viral Protein Reveals Discordance of Evolutionary Conservation and Functionality,2015,10.1371/journal.pgen.1005310,8-716,Influenza polymerase acidic protein,Viral replication,Growth,PA_I34A1_full_theta0.99_04-29-2022_b09.a2m,1,716,716,0.9,0.01,26750,1.0,716.0,1608.0,2.245810056,medium,3706.0,5.175977654,PA_I34A1_Wu_2015.csv,RF_index,1,mutant,PA_I34A1_theta_0.01.npy,PA_I34A1.pdb,0.1,,OrganismalFitness -PABP_YEAST_Melamed_2013,PABP_YEAST_Melamed_2013.csv,PABP_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MADITDKTAEQLENLNIQDDQKQAATGSESQSVENSSASLYVGDLEPSVSEAHLYDIFSPIGSVSSIRVCRDAITKTSLGYAYVNFNDHEAGRKAIEQLNYTPIKGRLCRIMWSQRDPSLRKKGSGNIFIKNLHPDIDNKALYDTFSVFGDILSSKIATDENGKSKGFGFVHFEEEGAAKEAIDALNGMLLNGQEIYVAPHLSRKERDSQLEETKAHYTNLYVKNINSETTDEQFQELFAKFGPIVSASLEKDADGKLKGFGFVNYEKHEDAVKAVEALNDSELNGEKLYVGRAQKKNERMHVLKKQYEAYRLEKMAKYQGVNLFVKNLDDSVDDEKLEEEFAPYGTITSAKVMRTENGKSKGFGFVCFSTPEEATKAITEKNQQIVAGKPLYVAIAQRKDVRRSQLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQQKQRQALGEQLYKKVSAKTSNEEAAGKITGMILDLPPQEVFPLLESDELFEQHYKEASAAYESFKKEQEQQTEQA,577,True,37708,1187,36521,0.3,manual,Melamed,Deep mutational scanning of an RRM domain of the Saccharomyces cerevisiae poly(A)-binding protein,2013,10.1261/rna.040709.113,126-200,PAB1,"Growth (essential function), RNA binding",Growth,PABP_YEAST_full_11-26-2021_b07.a2m,1,577,577,0.7,0.2,7866,0.919,530.0,855.1,1.613396226,medium,83.0,0.1566037736,PABP_YEAST_Melamed_2013.csv,linear,1,mutant,PABP_YEAST_theta_0.2.npy,PABP_YEAST.pdb,0.1,,OrganismalFitness -PAI1_HUMAN_Huttinger_2021,PAI1_HUMAN_Huttinger_2021.csv,PAI1_HUMAN,Human,Homo sapiens,MQMSPALTCLVLGLALVFGEGSAVHHPPSYVAHLASDFGVRVFQQVAQASKDRNVVFSPYGVASVLAMLQLTTGGETQQQIQAAMGFKIDDKGMAPALRHLYKELMGPWNKDEISTTDAIFVQRDLKLVQGFMPHFFRLFRSTVKQVDFSEVERARFIINDWVKTHTKGMISNLLGKGAVDQLTRLVLVNALYFNGQWKTPFPDSSTHRRLFHKSDGSTVSVPMMAQTNKFNYTEFTTPDGHYYDILELPYHGDTLSMFIAAPYEKEVPLSALTNILSAQLISHWKGNMTRLPRLLVLPKFSLETEVDLRKPLENLGMTDMFRQFQADFTSLSDQEPLHVAQALQKVKIEVNESGTVASSSTAVIVSARMAPEEIIMDRPFLFVVRHNPTGTVLFMGQVMEP,402,False,5345,5345,0,0.029313547,median,Huttinger,Deep mutational scanning of the plasminogen activator inhibitor-1 functional landscape,2021,10.1038/s41598-021-97871-7,24-402,"PAI-1, SERPINE1",PAI-1 inhibition of uPA,phage fitness,PAI1_HUMAN_2023-10-12_b05.a2m,1,402,402,0.5,0.2,52528,,,,,,,,PAI1_HUMAN_Huttinger_2021,log2FoldChange,1,mutation,PAI1_HUMAN_theta0.2_2023-10-12_b05.npy,PAI1_HUMAN.pdb,1.0,,Activity -PHOT_CHLRE_Chen_2023,PHOT_CHLRE_Chen_2023.csv,PHOT_CHLRE,Eukaryote,Chlamydomonas reinhardtii,AGLRHTFVVADATLPDCPLVYASEGFYAMTGYGPDEVLGHNARFLQGEGTDPKEVQKIRDAIKKGEACSVRLLNYRKDGTPFWNLLTVTPIKTPDGRVSKFVGVQVDVTSKTEGKALA,118,True,167529,2122,165407,0.6317018878,median,Chen,Deep Mutational Scanning of an Oxygen-Independent Fluorescent Protein CreiLOV for Comprehensive Profiling of Mutational and Epistatic Effects,2023,10.1021/acssynbio.2c00662,1-118,Phototropin,Fluorescence,FACS,PHOT_CHLRE_2023-08-07_b02.a2m,1,118,118,0.2,0.2,1627150,0.873,103.0,610128.5,5923.57767,High,232.0,2.252427184,sb2c00662_si_001.xlsx,mean,1,mutant,PHOT_CHLRE_theta0.2_2023-08-07_b02.npy,PHOT_CHLRE.pdb,1.0,,Activity -PIN1_HUMAN_Tsuboyama_2023_1I6C,PIN1_HUMAN_Tsuboyama_2023_1I6C.csv,PIN1_HUMAN,Human,Homo sapiens,KLPPGWEKRMSRSSGRVYYFNHITNASQWERPSGNSSSG,39,True,802,686,116,-0.6844420472,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-34,1-39,Peptidyl-prolyl cis-trans isomerase NIMA-interacting 1,Stability,cDNA display proteolysis,PIN1_HUMAN_2023-08-07_b02.a2m,1,39,39,0.2,0.2,248269,0.821,32.0,10833.2,338.5375,High,13.0,0.40625,Tsuboyama2023_Dataset2_Dataset31,ddG_ML_float,1,mut_type,PIN1_HUMAN_theta0.2_2023-08-07_b02.npy,PIN1_HUMAN.pdb,1.0,,Stability -PITX2_HUMAN_Tsuboyama_2023_2L7M,PITX2_HUMAN_Tsuboyama_2023_2L7M.csv,PITX2_HUMAN,Human,Homo sapiens,THFTSQQLQELEATFQRNHYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWR,52,True,1824,938,886,-1.201366007,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-35,1-52,Pituitary homeobox 2,Stability,cDNA display proteolysis,PITX2_HUMAN_2023-08-07_b04.a2m,1,52,52,0.4,0.2,344174,1.0,52.0,9819.6,188.8384615,High,25.0,0.4807692308,Tsuboyama2023_Dataset2_Dataset32,ddG_ML_float,1,mut_type,PITX2_HUMAN_theta0.2_2023-08-07_b04.npy,PITX2_HUMAN.pdb,1.0,,Stability -PKN1_HUMAN_Tsuboyama_2023_1URF,PKN1_HUMAN_Tsuboyama_2023_1URF.csv,PKN1_HUMAN,Human,Homo sapiens,GIPATNLSRVAGLEKQLAIELKVKQGAENMIQTYSNGSTKDRKLLLTAQQMLQDSKTKIDIIRMQLRRALQ,71,False,1301,1301,0,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-36,1-71,Serine/threonine-protein kinase N1,Stability,cDNA display proteolysis,PKN1_HUMAN_2023-08-07_b01.a2m,1,71,71,0.1,0.2,187829,0.845,60.0,53755.8,895.93,High,13.0,0.2166666667,Tsuboyama2023_Dataset2_Dataset33,ddG_ML_float,1,mut_type,PKN1_HUMAN_theta0.2_2023-08-07_b01.npy,PKN1_HUMAN.pdb,1.0,,Stability -POLG_CXB3N_Mattenberger_2021,POLG_CXB3N_Mattenberger_2021.csv,POLG_CXB3N,Virus,Coxsackievirus B3,MGAQVSTQKTGAHETRLNASGNSIIHYTNINYYKDAASNSANRQDFTQDPGKFTEPVKDIMIKSLPALNSPTVEECGYSDRARSITLGNSTITTQECANVVVGYGVWPDYLKDSEATAEDQPTQPDVATCRFYTLDSVQWQKTSPGWWWKLPDALSNLGLFGQNMQYHYLGRTGYTVHVQCNASKFHQGCLLVVCVPEAEMGCATLDNTPSSAELLGGDSAKEFADKPVASGSNKLVQRVVYNAGMGVGVGNLTIFPHQWINLRTNNSATIVMPYTNSVPMDNMFRHNNVTLMVIPFVPLDYCPGSTTYVPITVTIAPMCAEYNGLRLAGHQGLPTMNTPGSCQFLTSDDFQSPSAMPQYDVTPEMRIPGEVKNLMEIAEVDSVVPVQNVGEKVNSMEAYQIPVRSNEGSGTQVFGFPLQPGYSSVFSRTLLGEILNYYTHWSGSIKLTFMFCGSAMATGKFLLAYSPPGAGAPTKRVDAMLGTHVIWDVGLQSSCVLCIPWISQTHYRFVASDEYTAGGFITCWYQTNIVVPADAQSSCYIMCFVSACNDFSVRLLKDTPFISQQNFFQGPVEDAITAAIGRVADTVGTGPTNSEAIPALTAAETGHTSQVVPGDTMQTRHVKNYHSRSESTIENFLCRSACVYFTEYKNSGAKRYAEWVLTPRQAAQLRRKLEFFTYVRFDLELTFVITSTQQPSTTQNQDAQILTHQIMYVPPGGPVPDKVDSYVWQTSTNPSVFWTEGNAPPRMSIPFLSIGNAYSNFYDGWSEFSRNGVYGINTLNNMGTLYARHVNAGSTGPIKSTIRIYFKPKHVKAWIPRPPRLCQYEKAKNVNFQPSGVTTTRQSITTMTNTGAFGQQSGAVYVGNYRVVNRHLATSADWQNCVWESYNRDLLVSTTTAHGCDIIARCQCTTGVYFCASKNKHYPISFEGPGLVEVQESEYYPRRYQSHVLLAAGFSEPGDCGGILRCEHGVIGIVTMGGEGVVGFADIRDLLWLEDDAMEQGVKDYVEQLGNAFGSGFTNQICEQVNLLKESLVGQDSILEKSLKALVKIISALVIVVRNHDDLITVTATLALIGCTSSPWRWLKQKVSQYYGIPMAERQNNSWLKKFTEMTNACKGMEWIAVKIQKFIEWLKVKILPEVREKHEFLNRLKQLPLLESQIATIEQSAPSQSDQEQLFSNVQYFAHYCRKYAPLYAAEAKRVFSLEKKMSNYIQFKSKCRIEPVCLLLHGSPGAGKSVATNLIGRSLAEKLNSSVYSLPPDPDHFDGYKQQAVVIMDDLCQNPDGKDVSLFCQMVSSVDFVPPMAALEEKGILFTSPFVLASTNAGSINAPTVSDSRALARRFHFDMNIEVISMYSQNGKINMPMSVKTCDDECCPVNFKKCCPLVCGKAIQFIDRRTQVRYSLDMLVTEMFREYNHRHSVGTTLEALFQGPPVYREIKISVAPETPPPPAIADLLKSVDSEAVREYCKEKGWLVPEINSTLQIEKHVSRAFICLQALTTFVSVAGIIYIIYKLFAGFQGAYTGVPNQKPRVPTLRQAKVQGPAFEFAVAMMKRNSSTVKTEYGEFTMLGIYDRWAVLPRHAKPGPTILMNDQEVGVLDAKELVDKDGTNLELTLLKLNRNEKFRDIRGFLAKEEVEVNEAVLAINTSKFPNMYIPVGQVTEYGFLNLGGTPTKRMLMYNFPTRAGQCGGVLMSTGKVLGIHVGGNGHQGFSAALLKHYFNDEQGEIEFIESSKDAGFPVINTPSKTKLEPSVFHQVFEGNKEPAVLRSGDPRLKANFEEAIFSKYIGNVNTHVDEYMLEAVDHYAGQLATLDISTEPMKLEDAVYGTEGLEALDLTTSAGYPYVALGIKKRDILSKKTKDLTKLKECMDKYGLNLPMVTYVKDELRSIEKVAKGKSRLIEASSLNDSVAMRQTFGNLYKTFHLNPGVVTGSAVGCDPDLFWSKIPVMLDGHLIAFDYSGYDASLSPVWFACLKMLLEKLGYTHKETNYIDYLCNSHHLYRDKHYFVRGGMPSGCSGTSIFNSMINNIIIRTLMLKVYKGIDLDQFRMIAYGDDVIASYPWPIDASLLAEAGKGYGLIMTPADKGECFNEVTWTNATFLKRYFRADEQYPFLVHPVMPMKDIHESIRWTKDPKNTQDHVRSLCLLAWHNGEHEYEEFIRKIRSVPVGRCLTLPAFSTLRRKWLDSF,2185,False,15711,15711,0,-2.76355725,median,Mattenberger,Globally defining the effects of mutations in a picornavirus capsid,2021,10.7554/eLife.64256,1-851,Picornavirus capsid,Viral replication,Growth,POLG_CXB3N_1-861_theta0.99_04-29-2022_b07.a2m,1,861,861,0.7,0.01,7909,0.959,826.0,1515.2,1.834382567,medium,94.0,0.1138014528,POLG_CXB3N_Mattenberger_2021.csv,log_fitness_by_syn_mut_fitness,1,mutant,POLG_CXB3N_theta_0.01.npy,POLG_CXB3N.pdb,0.1,,OrganismalFitness -POLG_DEN26_Suphatrakul_2023,POLG_DEN26_Suphatrakul_2023.csv,POLG_DEN26,Virus,Dengue virus,GTGNIGETLGEKWKSRLNALGKSEFQIYKKSGIQEVDRTLAKEGIKRGETDHHAVSRGSAKLRWFVERNMVTPEGKVVDLGCGRGGWSYYCGGLKNVREVKGLTKGGPGHEEPIPMSTYGWNLVRLQSGVDVFFIPPEKCDTLLCDIGESSPNPTVEAGRTLRVLNLVENWLNNNTQFCIKVLNPYMPSVIEKMEALQRKYGGALVRNPLSRNSTHEMYWVSNASGNIVSSVNMISRMLINRFTMRYKKATYEPDVDLGSGTRNIGIESEIPNLDIIGKRIEKIKQEHETSWHYDQDHPYKTWAYHGSYETKQTGSASSMVNGVVRLLTKPWDVVPMVTQMAMTDTTPFGQQRVFKEKVDTRTQEPKEGTKKLMKITAEWLWKELGKKKTPRMCTREEFTRKVRSNAALGAIFTDENKWKSAREAVEDSRFWELVDKERNLHLEGKCETCVYNMMGKREKKLGEFGKAKGSRAIWYMWLGARFLEFEALGFLNEDHWFSRENSLSGVEGEGLHKLGYILRDVSKKEGGAMYADDTAGWDTRITLEDLKNEEMVTNHMEGEHKKLAEAIFKLTYQNKVVRVQRPTPRGTVMDIISRRDQRGSGQVGTYGLNTFTNMEAQLIRQMEGEGVFKSIQHLTITEEIAVQNWLARVGRERLSRMAISGDDCVVKPLDDRFASALTALNDMGKIRKDIQQWEPSRGWNDWTQVPFCSHHFHELIMKDGRVLVVPCRNQDELIGRARISQGAGWSLRETACLGKSYAQMWSLMYFHRRDLRLAANAICSAVPSHWVPTSRTTWSIHAKHEWMTTEDMLTVWNRVWIQENPWMEDKTPVESWEEIPYLGKREDQWCGSLIGLTSRATWAKNIQAAINQVRSLIGNEEYTDYMPSMKRFRREEEEAGVLW,900,False,16897,16897,0,-5.373371442,median,Suphatrakul,Functional analysis of flavivirus replicase by deep mutational scanning of dengue NS5,2023,10.1101/2023.03.07.531617,1-900,Flavivirus NS5,Viral replication,Growth,POLG_DEN26_2023-08-07_b01.a2m,1,900,900,0.1,0.01,10676,1.0,900.0,114.5,0.1272222222,Low,0.0,0.0,POLG_DEN26_Suphatrakul_2023.csv,score,1,mutant,POLG_DEN26_theta0.01_2023-08-07_b01.npy,POLG_DEN26.pdb,1.0,,OrganismalFitness -POLG_HCVJF_Qi_2014,POLG_HCVJF_Qi_2014.csv,POLG_HCVJF,Virus,Hepatitis C virus genotype 2a (isolate JFH-1) (HCV),MSTNPKPQRKTKRNTNRRPEDVKFPGGGQIVGGVYLLPRRGPRLGVRTTRKTSERSQPRGRRQPIPKDRRSTGKAWGKPGRPWPLYGNEGLGWAGWLLSPRGSRPSWGPTDPRHRSRNVGKVIDTLTCGFADLMGYIPVVGAPLSGAARAVAHGVRVLEDGVNYATGNLPGFPFSIFLLALLSCITVPVSAAQVKNTSSSYMVTNDCSNDSITWQLEAAVLHVPGCVPCERVGNTSRCWVPVSPNMAVRQPGALTQGLRTHIDMVVMSATFCSALYVGDLCGGVMLAAQVFIVSPQYHWFVQECNCSIYPGTITGHRMAWDMMMNWSPTATMILAYVMRVPEVIIDIVSGAHWGVMFGLAYFSMQGAWAKVIVILLLAAGVDAGTTTVGGAVARSTNVIAGVFSHGPQQNIQLINTNGSWHINRTALNCNDSLNTGFLAALFYTNRFNSSGCPGRLSACRNIEAFRIGWGTLQYEDNVTNPEDMRPYCWHYPPKPCGVVPARSVCGPVYCFTPSPVVVGTTDRRGVPTYTWGENETDVFLLNSTRPPQGSWFGCTWMNSTGFTKTCGAPPCRTRADFNASTDLLCPTDCFRKHPDATYIKCGSGPWLTPKCLVHYPYRLWHYPCTVNFTIFKIRMYVGGVEHRLTAACNFTRGDRCDLEDRDRSQLSPLLHSTTEWAILPCTYSDLPALSTGLLHLHQNIVDVQYMYGLSPAITKYVVRWEWVVLLFLLLADARVCACLWMLILLGQAEAALEKLVVLHAASAANCHGLLYFAIFFVAAWHIRGRVVPLTTYCLTGLWPFCLLLMALPRQAYAYDAPVHGQIGVGLLILITLFTLTPGYKTLLGQCLWWLCYLLTLGEAMIQEWVPPMQVRGGRDGIAWAVTIFCPGVVFDITKWLLALLGPAYLLRAALTHVPYFVRAHALIRVCALVKQLAGGRYVQVALLALGRWTGTYIYDHLTPMSDWAASGLRDLAVAVEPIIFSPMEKKVIVWGAETAACGDILHGLPVSARLGQEILLGPADGYTSKGWKLLAPITAYAQQTRGLLGAIVVSMTGRDRTEQAGEVQILSTVSQSFLGTTISGVLWTVYHGAGNKTLAGLRGPVTQMYSSAEGDLVGWPSPPGTKSLEPCKCGAVDLYLVTRNADVIPARRRGDKRGALLSPRPISTLKGSSGGPVLCPRGHVVGLFRAAVCSRGVAKSIDFIPVETLDVVTRSPTFSDNSTPPAVPQTYQVGYLHAPTGSGKSTKVPVAYAAQGYKVLVLNPSVAATLGFGAYLSKAHGINPNIRTGVRTVMTGEAITYSTYGKFLADGGCASGAYDIIICDECHAVDATSILGIGTVLDQAETAGVRLTVLATATPPGSVTTPHPDIEEVGLGREGEIPFYGRAIPLSCIKGGRHLIFCHSKKKCDELAAALRGMGLNAVAYYRGLDVSIIPAQGDVVVVATDALMTGYTGDFDSVIDCNVAVTQAVDFSLDPTFTITTQTVPQDAVSRSQRRGRTGRGRQGTYRYVSTGERASGMFDSVVLCECYDAGAAWYDLTPAETTVRLRAYFNTPGLPVCQDHLEFWEAVFTGLTHIDAHFLSQTKQAGENFAYLVAYQATVCARAKAPPPSWDAMWKCLARLKPTLAGPTPLLYRLGPITNEVTLTHPGTKYIATCMQADLEVMTSTWVLAGGVLAAVAAYCLATGCVSIIGRLHVNQRVVVAPDKEVLYEAFDEMEECASRAALIEEGQRIAEMLKSKIQGLLQQASKQAQDIQPAMQASWPKVEQFWARHMWNFISGIQYLAGLSTLPGNPAVASMMAFSAALTSPLSTSTTILLNIMGGWLASQIAPPAGATGFVVSGLVGAAVGSIGLGKVLVDILAGYGAGISGALVAFKIMSGEKPSMEDVINLLPGILSPGALVVGVICAAILRRHVGPGEGAVQWMNRLIAFASRGNHVAPTHYVTESDASQRVTQLLGSLTITSLLRRLHNWITEDCPIPCSGSWLRDVWDWVCTILTDFKNWLTSKLFPKLPGLPFISCQKGYKGVWAGTGIMTTRCPCGANISGNVRLGSMRITGPKTCMNTWQGTFPINCYTEGQCAPKPPTNYKTAIWRVAASEYAEVTQHGSYSYVTGLTTDNLKIPCQLPSPEFFSWVDGVQIHRFAPTPKPFFRDEVSFCVGLNSYAVGSQLPCEPEPDADVLRSMLTDPPHITAETAARRLARGSPPSEASSSVSQLSAPSLRATCTTHSNTYDVDMVDANLLMEGGVAQTEPESRVPVLDFLEPMAEEESDLEPSIPSECMLPRSGFPRALPAWARPDYNPPLVESWRRPDYQPPTVAGCALPPPKKAPTPPPRRRRTVGLSESTISEALQQLAIKTFGQPPSSGDAGSSTGAGAAESGGPTSPGEPAPSETGSASSMPPLEGEPGDPDLESDQVELQPPPQGGGVAPGSGSGSWSTCSEEDDTTVCCSMSYSWTGALITPCSPEEEKLPINPLSNSLLRYHNKVYCTTSKSASQRAKKVTFDRTQVLDAHYDSVLKDIKLAASKVSARLLTLEEACQLTPPHSARSKYGFGAKEVRSLSGRAVNHIKSVWKDLLEDPQTPIPTTIMAKNEVFCVDPAKGGKKPARLIVYPDLGVRVCEKMALYDITQKLPQAVMGASYGFQYSPAQRVEYLLKAWAEKKDPMGFSYDTRCFDSTVTERDIRTEESIYQACSLPEEARTAIHSLTERLYVGGPMFNSKGQTCGYRRCRASGVLTTSMGNTITCYVKALAACKAAGIVAPTMLVCGDDLVVISESQGTEEDERNLRAFTEAMTRYSAPPGDPPRPEYDLELITSCSSNVSVALGPRGRRRYYLTRDPTTPLARAAWETVRHSPINSWLGNIIQYAPTIWVRMVLMTHFFSILMVQDTLDQNLNFEMYGSVYSVNPLDLPAIIERLHGLDAFSMHTYSHHELTRVASALRKLGAPPLRVWKSRARAVRASLISRGGKAAVCGRYLFNWAVKTKLKLTPLPEARLLDLSSWFTVGAGGGDIFHSVSRARPRSLLFGLLLLFVGVGLFLLPAR,3033,False,1630,1630,0,-0.95,manual,Qi,A Quantitative High-Resolution Genetic Profile Rapidly Identifies Sequence Determinants of Hepatitis C Viral Fitness and Drug Sensitivity,2014,10.1371/journal.ppat.1004064,1994-2079,NS5A,Viral replication,Growth,POLG_HCVJF_theta0.99_1984-2089_11-26-2021_b08.a2m,1984,2089,106,0.8,0.01,16556,1.0,106.0,4421.2,41.70943396,medium,93.0,0.8773584906,POLG_HCVJF_Qi_2014.csv,fitness,1,mutant,POLG_HCVJF_theta_0.01.npy,POLG_HCVJF.pdb,0.1,,OrganismalFitness -POLG_PESV_Tsuboyama_2023_2MXD,POLG_PESV_Tsuboyama_2023_2MXD.csv,POLG_PESV,Virus,Porcine enteric sapovirus,ALRDDEYDEWQDIIRDWRKEMTVQQFLDLKERALSGASDPDSQRYNAWLELRA,53,True,5130,995,4135,-1.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-37,1-53,Genome polyprotein,Stability,cDNA display proteolysis,POLG_PESV_2023-08-07_b03.a2m,1,53,53,0.3,0.01,20190,0.887,47.0,3718.4,79.11489362,Medium,12.0,0.2553191489,Tsuboyama2023_Dataset2_Dataset34,ddG_ML_float,1,mut_type,POLG_PESV_theta0.01_2023-08-07_b03.npy,POLG_PESV.pdb,1.0,,Stability -PPARG_HUMAN_Majithia_2016,PPARG_HUMAN_Majithia_2016.csv,PPARG_HUMAN,Human,Homo sapiens,MGETLGDSPIDPESDSFTDTLSANISQEMTMVDTEMPFWPTNFGISSVDLSVMEDHSHSFDIKPFTTVDFSSISTPHYEDIPFTRTDPVVADYKYDLKLQEYQSAIKVEPASPPYYSEKTQLYNKPHEEPSNSLMAIECRVCGDKASGFHYGVHACEGCKGFFRRTIRLKLIYDRCDLNCRIHKKSRNKCQYCRFQKCLAVGMSHNAIRFGRMPQAEKEKLLAEISSDIDQLNPESADLRALAKHLYDSYIKSFPLTKAKARAILTGKTTDKSPFVIYDMNSLMMGEDKIKFKHITPLQEQSKEVAIRIFQGCQFRSVEAVQEITEYAKSIPGFVNLDLNDQVTLLKYGVHEIIYTMLASLMNKDGVLISEGQGFMTREFLKSLRKPFGDFMEPKFEFAVKFNALELDDSDLAIFIAVIILSGDRPGLLNVKPIEDIQDNLLQALELQLKLNHPESSQLFAKLLQKMTDLRQIVTEHVQLLQVIKKTETDMSLHPLLQEIYKDLY,505,False,9576,9576,0,-2.5,manual,Majithia,Prospective functional classification of all possible missense variants in PPARG,2016,10.1038/ng.3700,2-505,PPARG,Expression of CD36,FACS,PPARG_HUMAN_2023-10-12_b04.a2m,1,505,505,0.4,0.2,39993,0.8,404.0,3092.1,7.653712871,Medium,86.0,0.2128712871,https://miter.broadinstitute.org/mitergrade/?query=p.Y505A&prevalence=1.0e-5,Experimental function score,1,mutant,PPARG_HUMAN_theta0.2_2023-10-12_b04.npy,PPARG_HUMAN.pdb,1.0,,Activity -PPM1D_HUMAN_Miller_2022,PPM1D_HUMAN_Miller_2022.csv,PPM1D_HUMAN,Human,Homo sapiens,MAGLYSLGVSVFSDQGGRKYMEDVTQIVVEPEPTAEEKPSPRRSLSQPLPPRPSPAALPGGEVSGKGPAVAAREARDPLPDAGASPAPSRCCRRRSSVAFFAVCDGHGGREAAQFAREHLWGFIKKQKGFTSSEPAKVCAAIRKGFLACHLAMWKKLAEWPKTMTGLPSTSGTTASVVIIRGMKMYVAHVGDSGVVLGIQDDPKDDFVRAVEVTQDHKPELPKERERIEGLGGSVMNKSGVNRVVWKRPRLTHNGPVRRSTVIDQIPFLAVARALGDLWSYDFFSGEFVVSPEPDTSVHTLDPQKHKYIILGSDGLWNMIPPQDAISMCQDQEEKKYLMGEHGQSCAKMLVNRALGRWRQRMLRADNTSAIVICISPEVDNQGNFTNEDELYLNLTDSPSYNSQETCVMTPSPCSTPPVKSLEEDPWPRVNSKDHIPALVRSNAFSENFLEVSAEIARENVQGVVIPSKDPEPLEENCAKALTLRIHDSLNNSLPIGLVPTNSTNTVMDQKNLKMSTPGQMKAQEIERTPPTNFKRTLEESNSGPLMKKHRRNGLSRSSGAQPASLPTTSQRKNSVKLTMRRRLRGQKKIGNPLLHQHRKTVCVC,605,False,7889,7889,0,0.01275,median,Miller,Allosteric inhibition of PPM1D serine/threonine phosphatase via an altered conformational state,2022,10.1038/s41467-022-30463-9,2-421,Protein phosphatase 1D,Fitness with GFP reporter,quantification and selection of GFP-positive cells by flow cytometry after DNA damage induced by daunorubicin,PPM1D_HUMAN_2023-10-12_b01.a2m,1,605,605,0.1,0.2,1844,0.993,601.0,346.3,0.5762063228,Low,27.0,0.04492512479,PPM1D_HUMAN_Miller_2022_raw.xlsx,fitness,1,mutant,PPM1D_HUMAN_theta0.2_2023-10-12_b01.npy,PPM1D_HUMAN.pdb,1.0,,OrganismalFitness -PR40A_HUMAN_Tsuboyama_2023_1UZC,PR40A_HUMAN_Tsuboyama_2023_1UZC.csv,PR40A_HUMAN,Human,Homo sapiens,TYTWNTKEEAKQAFKELLKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTE,63,True,2033,1163,870,-1.362579422,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-38,1-63,Pre-mRNA-processing factor 40 homolog A,Stability,cDNA display proteolysis,PR40A_HUMAN_2023-08-07_b03.a2m,1,63,63,0.3,0.2,63560,0.857,54.0,3663.8,67.84814815,Medium,16.0,0.2962962963,Tsuboyama2023_Dataset2_Dataset35,ddG_ML_float,1,mut_type,PR40A_HUMAN_theta0.2_2023-08-07_b03.npy,PR40A_HUMAN.pdb,1.0,,Stability -PRKN_HUMAN_Clausen_2023,PRKN_HUMAN_Clausen_2023.csv,PRKN_HUMAN,Human,Homo sapiens,MIVFVRFNSSHGFPVEVDSDTSIFQLKEVVAKRQGVPADQLRVIFAGKELRNDWTVQNCDLDQQSIVHIVQRPWRKGQEMNATGGDDPRNAAGGCEREPQSLTRVDLSSSVLPGDSVGLAVILHTDSRKDSPPAGSPAGRSIYNSFYVYCKGPCQRVQPGKLRVQCSTCRQATLTLTQGPSCWDDVLIPNRMSGECQSPHCPGTSAEFFFKCGAHPTSDKETSVALHLIATNSRNITCITCTDVRSPVLVFQCNSRHVICLDCFHLYCVTRLNDRQFVHDPQLGYSLPCVAGCPNSLIKELHHFRILGEEQYNRYQQYGAEECVLQMGGVLCPRPGCGAGLLPEPDQRKVTCEGGNGLGCGFAFCRECKEAYHEGECSAVFEASGTTTQAYRVDERAAEQARWEAASKETIKKTTKPCPRCHVPVEKNGGCMHMKCPQPQCRLEWCWNCGCEWNRVCMGDHWFDV,465,False,8756,8756,0,0.75,manual,Clausen,A mutational atlas for Parkin proteostasis,2023,10.1101/2023.06.08.544160,1-465,Parkin,protein stability,FACS,PRKN_HUMAN_2023-08-07_b05.a2m,1,465,465,0.5,0.2,1457,0.998,464.0,195.2,0.4206896552,Low,21.0,0.04525862069,urn_mavedb_00000114-a-1_scores.csv,score,1,mutant,PRKN_HUMAN_theta0.2_2023-08-07_b05.npy,PRKN_HUMAN.pdb,1.0,,Expression -PSAE_SYNP2_Tsuboyama_2023_1PSE,PSAE_SYNP2_Tsuboyama_2023_1PSE.csv,PSAE_SYNP2,Prokaryote,Synechococcus sp.,AIERGSKVKILRKESYWYGDVGTVASIDKSGIIYPVIVRFNKVNYNGFSGSAGGLNTNNFAEHELEVV,68,True,1579,1219,360,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-39,1-68,Photosystem I reaction center subunit IV,Stability,cDNA display proteolysis,PSAE_SYNP2_2023-08-07_b09.a2m,1,68,68,0.9,0.2,1785,0.868,59.0,130.7,2.215254237,Medium,9.0,0.1525423729,Tsuboyama2023_Dataset2_Dataset36,ddG_ML_float,1,mut_type,PSAE_SYNP2_theta0.2_2023-08-07_b09.npy,PSAE_SYNP2.pdb,1.0,,Stability -PTEN_HUMAN_Matreyek_2021,PTEN_HUMAN_Matreyek_2021.csv,PTEN_HUMAN,Human,Homo sapiens,MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV,403,False,5083,5083,0,0.7708605475,median,Matreyek,Integrating thousands of PTEN variant activity and abundance measurements reveals variant subgroups and new dominant negatives in cancers,2021,10.1186/s13073-021-00984-x,1-403,PTEN,Protein abundance (FACS sorting for abundance of GFP-fused target),Protein stability,PTEN_HUMAN_full_11-26-2021_b01.a2m,1,403,403,0.1,0.2,19058,0.752,303.0,1425.3,4.703960396,medium,52.0,0.1716171617,PTEN_HUMAN_Matreyek_2021.csv,score_total,1,variant,PTEN_HUMAN_theta_0.2.npy,PTEN_HUMAN.pdb,0.1,,Expression -PTEN_HUMAN_Mighell_2018,PTEN_HUMAN_Mighell_2018.csv,PTEN_HUMAN,Human,Homo sapiens,MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV,403,False,7260,7260,0,-1.5,manual,Mighell,A Saturation Mutagenesis Approach to Understanding PTEN Lipid Phosphatase Activity and Genotype-Phenotype Relationships,2018,10.1016/j.ajhg.2018.03.018,1-403,PTEN,"growth (surrogate for enzymatic activity/hydrolysis of lipid phosphates to restore PIP2, which affects proliferation rate)",lipid phosphatase activity,PTEN_HUMAN_full_11-26-2021_b01.a2m,1,403,403,0.1,0.2,19058,0.752,303.0,1425.3,4.703960396,medium,52.0,0.1716171617,PTEN_HUMAN_Mighell_2018.csv,Fitness_score,1,mutant,PTEN_HUMAN_theta_0.2.npy,PTEN_HUMAN.pdb,0.1,,Activity -Q2N0S5_9HIV1_Haddox_2018,Q2N0S5_9HIV1_Haddox_2018.csv,Q2N0S5_9HIV1,Virus,HIV,MRVMGIQRNCQHLFRWGTMILGMIIICSAAENLWVTVYYGVPVWKDAETTLFCASDAKAYETEKHNVWATHACVPTDPNPQEIHLENVTEEFNMWKNNMVEQMHTDIISLWDQSLKPCVKLTPLCVTLQCTNVTNNITDDMRGELKNCSFNMTTELRDKKQKVYSLFYRLDVVQINENQGNRSNNSNKEYRLINCNTSAITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGTGPCPSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENITNNAKNILVQFNTPVQINCTRPNNNTRKSIRIGPGQAFYATGDIIGDIRQAHCNVSKATWNETLGKVVKQLRKHFGNNTIIRFANSSGGDLEVTTHSFNCGGEFFYCNTSGLFNSTWISNTSVQGSNSTGSNDSITLPCRIKQIINMWQRIGQAMYAPPIQGVIRCVSNITGLILTRDGGSTNSTTETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVGREKRAVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLRAIEAQQHLLKLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTNVPWNSSWSNRNLSEIWDNMTWLQWDKEISNYTQIIYGLLEESQNQQEKNEQDLLALDKWASLWNWFDISNWLWYIKIFIMIVGGLIGLRIVFAVLSVIHRVRQGYSPLSFQTHTPNPRGLDRPERIEEEDGEQDRGRSTRLVSGFLALAWDDLRSLCLFCYHRLRDFILIAARIVELLGHSSLKGLRLGWEGLKYLWNLLAYWGRELKISAINLFDTIAIAVAEWTDRVIEIGQRLCRAFLHIPRRIRQGLERALL,860,False,12729,12729,0,-2.0,manual,Haddox,Mapping mutational effects along the evolutionary landscape of HIV envelope,2018,10.7554/eLife.34420,30-699,HIV env (BG505),Viral replication,Growth,Q2N0S5_9HIV1_full_theta0.99_04-29-2022_b09.a2m,1,860,860,0.9,0.01,75014,0.976,839.0,36369.7,43.3488677,medium,2462.0,2.934445769,Q2N0S5_9HIV1_Haddox_2018.csv,fitness,1,mutant,Q2N0S5_9HIV1_theta_0.01.npy,Q2N0S5_9HIV1.pdb,0.1,,OrganismalFitness -Q53Z42_HUMAN_McShan_2019_binding-TAPBPR,Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.csv,Q53Z42_HUMAN,Human,Homo sapiens,MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSLTACKV,365,False,3344,3344,0,0.19,median,McShan,Molecular determinants of chaperone interactions on MHC-I for folding and antigen repertoire selection,2019,10.1073/pnas.1915562116,26-205,HLA-A,binding affinity (TAPBPR),,Q53Z42_HUMAN_2023-08-07_b01.a2m,1,365,365,0.1,0.2,41636,0.986,360.0,4986.2,13.85055556,Medium,210.0,0.5833333333,,score,1,mut_proteingym,Q53Z42_HUMAN_theta0.2_2023-08-07_b01.npy,Q53Z42_HUMAN.pdb,1.0,25.0,Binding -Q53Z42_HUMAN_McShan_2019_expression,Q53Z42_HUMAN_McShan_2019_expression.csv,Q53Z42_HUMAN,Human,Homo sapiens,MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSLTACKV,365,False,3344,3344,0,-0.73,median,McShan,Molecular determinants of chaperone interactions on MHC-I for folding and antigen repertoire selection,2019,10.1073/pnas.1915562116,26-205,HLA-A,surface expression,,Q53Z42_HUMAN_2023-08-07_b01.a2m,1,365,365,0.1,0.2,41636,0.986,360.0,4986.2,13.85055556,Medium,210.0,0.5833333333,,score,1,mut_proteingym,Q53Z42_HUMAN_theta0.2_2023-08-07_b01.npy,Q53Z42_HUMAN.pdb,1.0,25.0,Expression -Q59976_STRSQ_Romero_2015,Q59976_STRSQ_Romero_2015.csv,Q59976_STRSQ,Prokaryote,Streptomyces sp.,MVPAAQQTAMAPDAALTFPEGFLWGSATASYQIEGAAAEDGRTPSIWDTYARTPGRVRNGDTGDVATDHYHRWREDVALMAELGLGAYRFSLAWPRIQPTGRGPALQKGLDFYRRLADELLAKGIQPVATLYHWDLPQELENAGGWPERATAERFAEYAAIAADALGDRVKTWTTLNEPWCSAFLGYGSGVHAPGRTDPVAALRAAHHLNLGHGLAVQALRDRLPADAQCSVTLNIHHVRPLTDSDADADAVRRIDALANRVFTGPMLQGAYPEDLVKDTAGLTDWSFVRDGDLRLAHQKLDFLGVNYYSPTLVSEADGSGTHNSDGHGRSAHSPWPGADRVAFHQPPGETTAMGWAVDPSGLYELLRRLSSDFPALPLVITENGAAFHDYADPEGNVNDPERIAYVRDHLAAVHRAIKDGSDVRGYFLWSLLDNFEWAHGYSKRFGAVYVDYPTGTRIPKASARWYAEVARTGVLPTAGDPNSSSVDKLAAALEHHHHHH,501,False,2999,2999,0,-1.0,manual,Romero,Dissecting enzyme function with microfluidic-based deep mutational scanning,2015,10.1073/pnas.1422285112,2-501,β-glucosidase,Enzyme function,Activity,Q59976_STRSQ_full_11-26-2021_b03.a2m,1,501,501,0.3,0.2,105913,0.882,442.0,13981.2,31.63167421,medium,850.0,1.923076923,Q59976_STRSQ_Romero_2015.csv,enrichment,1,mutant,Q59976_STRSQ_theta_0.2.npy,Q59976_STRSQ.pdb,0.1,,Activity -Q6WV13_9MAXI_Somermeyer_2022,Q6WV13_9MAXI_Somermeyer_2022.csv,Q6WV12_9MAXI,Eukaryote,Pontellina plumata,MPAMKIECRITGTLNGVEFELVGGGEGTPEQGRMTNKMKSTKGALTFSPYLLSHVMGYGFYHFGTYPSGYENPFLHAINNGGYTNTRIEKYEDGGVLHVSFSYRYEAGRVIGDFKVVGTGFPEDSVIFTDKIIRSNATVEHLHPMGDNVLVGSFARTFSLRDGGYYSFVVDSHMHFKSAIHPSILQNGGPMFAFRRVEELHSNTELGIVEYQHAFKTPIAFA,222,True,31401,1141,30260,15721.24977,median,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-222,Green fluorescent protein ppluGFP2,Fluorescence,FACS,Q6WV12_9MAXI_full_b0.6.a2m,1,222,222,0.6,0.2,506,1.0,222.0,95.9,0.431981982,Low,4.0,0.01801801802,Q6WV13_9MAXI_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,Q6WV12_9MAXI_theta_0.2.npy,Q6WV12_9MAXI.pdb,1.0,,Activity -Q837P4_ENTFA_Meier_2023,Q837P4_ENTFA_Meier_2023.csv,Q837P4_ENTFA,Prokaryote,Enterococcus faecalis,MTDLIKASKFFYHYLKRYKVSFLFIFLAIFAATYLQVKAPQFVGEAIQELAKYAVNVMQGKDDKSAFVSVIWKLLIFYVLTSAASFIYSILFTQVVGKSTNRMRIGLFNKLEKLTIRFFDSHQDGEILSRFTSDLDNIQNSLNQALLQVLTNIALLVGVLIMMFRQNVELAWATIASTPIAILIAVFVISKARKYVDLQQDEVGKLNGYMDEKISGQRVIITNGLQEETIDGFLEQNEKVRAATYKGQVYSGLLFPMMQGMSLVNTAIVIFFGGWLAINGSVDRAAALGLVVMFVQYSQQYYQPLMQISSGYSMIQLAVTGARRLNEMFDEPDEIRPENGEKLEEINKAVALNHVVFGYNPETPVLKDVSIHVDKGEMVALVGPTGSGKTTIMNLMNRFYDVNEGAVTFDGVDIREMDLDSLRSHVGIVLQESVLFSGTIRENIAFGKPEATDEEIVQAAKQANIHEFIVNLEQGYDTEITEENNLFSTGQKQLVSIARTIITNPELLILDEATSNVDTVTEAKIQKAMDEAIKGRTSFVIAHRLKTILNADRIIVLRDGEVIEEGNHHELVEQDGFYAELYKNQFVFE,589,False,697,697,0,-0.6270963227,median,Meier,Deep mutational scan of a drug efflux pump reveals its structure–function landscape,2023,10.1038/s41589-022-01205-1,32-543,EfrD ABC transporter,Drug efflux,Growth,Q837P4_ENTFA_2023-08-07_b09.a2m,1,589,589,0.9,0.2,343933,0.975,574.0,54079.8,94.2151568,Medium,1123.0,1.95644599,41589_2022_1205_MOESM4_ESM.xlsx,avg_score,1,mutant,Q837P4_ENTFA_theta0.2_2023-08-07_b09.npy,Q837P4_ENTFA.pdb,1.0,,Activity -Q837P5_ENTFA_Meier_2023,Q837P5_ENTFA_Meier_2023.csv,Q837P5_ENTFA,Prokaryote,Enterococcus faecalis,MDLIIQHAKKYKGSVVIALLAVIVMVVSALWQPKLLQQVLEAIMNDDSDKMKNLGIQLIAIAGLGLVAGVINTIFSAKVAQGVSADIREATFRKIQTFSFGNIEKFSAGNLVVRLTNDVTQIQNVIMIALQTLFRIPFLFIGSFILAMLTLPQLWWVIVALVIAVILISMLSFSQMGKHFMIIQNLIDKINGIAKENLLGIRVVKSFVQEKNQLSRFTKVSEELTTHNLIVGSLFAVMIPAFMLVANLAVVGSIFFVSNLVKDDPTLIGGVASFMNYLMQIMMAIIIGGMMMMMTSRAAVSIKRIKEVMETEPDVTYKKVPEQELIGSVEFDHVSFRYPGDEEDTLKDISFSIQPGEMIGIVGATGAGKSTLAQLIPRLFDPTEGKIEVGGVDLREVNEHSLRKTVSFVLQKAILFSGTIAQNLRHGKRDASEADMERASGIAQAKEFIEKLAEGYDAPVEERSNNFSGGQKQRLSITRGVIGEPKILILDDSTSALDARSERLVREALDKELKETTTIVIAQKISSVVHADRILVLDNGRLVGEGTHEELAATNPVYQEIYETQKGKEEA,571,False,747,747,0,-0.85731232,median,Meier,Deep mutational scan of a drug efflux pump reveals its structure–function landscape,2023,10.1038/s41589-022-01205-1,25-523,EfrC ABC transporter,Drug efflux,Growth,Q837P5_ENTFA_2023-08-07_b09.a2m,1,571,571,0.9,0.2,346355,0.993,567.0,54910.5,96.8439153,Medium,1135.0,2.00176367,,avg_score,1,mutant,Q837P5_ENTFA_theta0.2_2023-08-07_b09.npy,Q837P5_ENTFA.pdb,1.0,,Activity -Q8WTC7_9CNID_Somermeyer_2022,Q8WTC7_9CNID_Somermeyer_2022.csv,Q8WTC7_9CNID,Eukaryote,Aequorea macrodactyla,MSKGEELFTGIVPVLIELDGDVHGHKFSVRGEGEGDADYGKLEIKFICTTGKLPVPWPTLVTTLSYGILCFARYPEHMKMNDFFKSAMPEGYIQERTIFFQDDGKYKTRGEVKFEGDTLVNRIELKGMDFKEDGNILGHKLEYNFNSHNVYIMPDKANNGLKVNFKIRHNIEGGGVQLADHYQTNVPLGDGPVLIPINHYLSCQTAISKDRNETRDHMVFLEFFSACGHTHGMDELYK,238,True,33510,1201,32309,5000.0,manual,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-238,Green fluorescent protein amacGFP,Fluorescence,FACS,Q8WTC7_9CNID_full_b0.5.a2m,1,238,238,0.5,0.2,655,1.0,238.0,118.5,0.4978991597,Low,5.0,0.02100840336,Q8WTC8_9CNID_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,Q8WTC7_9CNID_theta_0.2.npy,Q8WTC7_9CNID.pdb,1.0,,Activity -R1AB_SARS2_Flynn_2022,R1AB_SARS2_Flynn_2022.csv,R1AB_SARS2,Virus,SARS-COV2,SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ,306,False,5725,5725,0,0.5,manual,Flynn,Comprehensive fitness landscape of SARS-CoV-2 Mpro reveals insights into viral resistance mechanisms,2022,10.7554/eLife.77433,1-306,SARS-CoV-2 Mpro,"FRET, Growth",,R1AB_SARS2_02-19-2022_b07.a2m,1,306,306,0.7,0.01,182169,1.0,306.0,326.3,1.066339869,medium,79.0,0.2581699346,R1AB_SARS2_Flynn_2022.csv,average_growth,1,mutant,R1AB_SARS2_theta_0.01.npy,R1AB_SARS2.pdb,0.1,,OrganismalFitness -RAD_ANTMA_Tsuboyama_2023_2CJJ,RAD_ANTMA_Tsuboyama_2023_2CJJ.csv,RAD_ANTMA,Eukaryote,Antirrhinum majus,PWSAKENKAFERALAVYDKDTPDRWANVARAVEGRTPEEVKKHYEILVEDIKYI,54,True,912,774,138,-0.3943851731,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-40,1-54,Transcription factor RADIALIS,Stability,cDNA display proteolysis,RAD_ANTMA_2023-08-07_b01.a2m,1,54,54,0.1,0.2,423275,0.833,45.0,38133.9,847.42,High,27.0,0.6,Tsuboyama2023_Dataset2_Dataset37,ddG_ML_float,1,mut_type,RAD_ANTMA_theta0.2_2023-08-07_b01.npy,RAD_ANTMA.pdb,1.0,,Stability -RAF1_HUMAN_Zinkus-Boltz_2019,RAF1_HUMAN_Zinkus-Boltz_2019.csv,RAF1_HUMAN,Human,Homo sapiens,MEHIQGAWKTISNGFGFKDAVFDGSSCISPTIVQQFGYQRRASDDGKLTDPSKTSNTIRVFLPNKQRTVVNVRNGMSLHDCLMKALKVRGLQPECCAVFRLLHEHKGKKARLDWNTDAASLIGEELQVDFLDHVPLTTHNFARKTFLKLAFCDICQKFLLNGFRCQTCGYKFHEHCSTKVPTMCVDWSNIRQLLLFPNSTIGDSGVPALPSLTMRRMRESVSRMPVSSQHRYSTPHAFTFNTSSPSSEGSLSQRQRSTSTPNVHMVSTTLPVDSRMIEDAIRSHSESASPSALSSSPNNLSPTGWSQPKTPVPAQRERAPVSGTQEKNKIRPRGQRDSSYYWEIEASEVMLSTRIGSGSFGTVYKGKWHGDVAVKILKVVDPTPEQFQAFRNEVAVLRKTRHVNILLFMGYMTKDNLAIVTQWCEGSSLYKHLHVQETKFQMFQLIDIARQTAQGMDYLHAKNIIHRDMKSNNIFLHEGLTVKIGDFGLATVKSRWSGSQQVEQPTGSVLWMAPEVIRMQDNNPFSFQSDVYSYGIVLYELMTGELPYSHINNRDQIIFMVGRGYASPDLSKLYKNCPKAMKRLVADCVKKVKEERPLFPQILSSIELLQHSLPKINRSASEPSLHRAAHTEDINACTLTTSPRLPVF,648,False,297,297,0,-0.0671,median,Zinkus-Boltz,A Phage-Assisted Continuous Selection Approach for Deep Mutational Scanning of Protein–Protein Interactions,2019,10.1021/acschembio.9b00669,52-90,RAF oncogene,Viral Replication,binding assays,RAF1_HUMAN_2023-10-12_b05.a2m,1,648,648,0.5,0.2,9685,0.972,630.0,350.5,0.5563492063,Low,30.0,0.04761904762,urn_mavedb_00000061-a-1_scores.csv,score,1,mutant,RAF1_HUMAN_theta0.2_2023-10-12_b05.npy,RAF1_HUMAN.pdb,1.0,,OrganismalFitness -RASH_HUMAN_Bandaru_2017,RASH_HUMAN_Bandaru_2017.csv,RASH_HUMAN,Human,Homo sapiens,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMSCKCVLS,189,False,3134,3134,0,-0.25,manual,Bandaru,Deconstruction of the Ras switching cycle through saturation mutagenesis,2017,10.7554/eLife.27810,2-166,HRAS,C-Raf binding and GEF,activity,RASH_HUMAN_full_11-26-2021_b03.a2m,1,189,189,0.3,0.2,204751,0.862,163.0,23971.6,147.0650307,high,205.0,1.257668712,RASH_HUMAN_Bandaru_2017.csv,unregulated,1,mutant,RASH_HUMAN_theta_0.2.npy,RASH_HUMAN.pdb,0.1,,Activity -RASK_HUMAN_Weng_2022_abundance,RASK_HUMAN_Weng_2022_abundance.csv,RASK_HUMAN,Human,Human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,188,True,26012,3066,22946,-0.504113408,median,Weng,The energetic and allosteric landscape for KRAS inhibition,2022,10.1101/2022.12.06.519122,2-188,KRAS,Yeast growth,,RASK_HUMAN_2023-08-07_b03.a2m,1,188,188,0.3,0.2,260539,0.888,167.0,27850.5,166.7694611,High,211.0,1.263473054,kras_fitness.xlsx,fitness,1,mutant,RASK_HUMAN_theta0.2_2023-08-07_b03.npy,RASK_HUMAN.pdb,1.0,,Expression -RASK_HUMAN_Weng_2022_binding-DARPin_K55,RASK_HUMAN_Weng_2022_binding-DARPin_K55.csv,RASK_HUMAN,Human,Human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,188,True,24873,3084,21789,-0.4605455262,median,Weng,The energetic and allosteric landscape for KRAS inhibition,2022,10.1101/2022.12.06.519127,2-188,KRAS,Yeast growth,,RASK_HUMAN_2023-08-07_b03.a2m,1,188,188,0.3,0.2,260539,0.888,167.0,27850.5,166.7694611,High,211.0,1.263473054,kras_fitness.xlsx,fitness,1,mutant,RASK_HUMAN_theta0.2_2023-08-07_b03.npy,RASK_HUMAN.pdb,1.0,,Binding -RBP1_HUMAN_Tsuboyama_2023_2KWH,RBP1_HUMAN_Tsuboyama_2023_2KWH.csv,RBP1_HUMAN,Human,Homo sapiens,ETQAGIKEEIRRQEFLLNSLHRDLQGGIKDLSKEERLWEVQRILTALKRKLR,52,True,1332,975,357,-0.2693189895,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-41,1-52,RalA-binding protein 1,Stability,cDNA display proteolysis,RBP1_HUMAN_2023-08-07_b01.a2m,1,52,52,0.1,0.2,135922,0.827,43.0,50510.0,1174.651163,High,6.0,0.1395348837,Tsuboyama2023_Dataset2_Dataset38,ddG_ML_float,1,mut_type,RBP1_HUMAN_theta0.2_2023-08-07_b01.npy,RBP1_HUMAN.pdb,1.0,,Stability -RCD1_ARATH_Tsuboyama_2023_5OAO,RCD1_ARATH_Tsuboyama_2023_5OAO.csv,RCD1_ARATH,Eukaryote,Arabidopsis thaliana,PTLFAAISHKVAENDMLLINADYQQLRDKKMTRAEFVRKLRVIVGDDLLRSTITTLQ,57,True,1261,988,273,-0.3828831078,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-42,1-57,Inactive poly [ADP-ribose] polymerase RCD1,Stability,cDNA display proteolysis,RCD1_ARATH_2023-08-07_b02.a2m,1,57,57,0.2,0.2,6525,0.93,53.0,1578.5,29.78301887,Medium,2.0,0.03773584906,Tsuboyama2023_Dataset2_Dataset39,ddG_ML_float,1,mut_type,RCD1_ARATH_theta0.2_2023-08-07_b02.npy,RCD1_ARATH.pdb,1.0,,Stability -RCRO_LAMBD_Tsuboyama_2023_1ORC,RCRO_LAMBD_Tsuboyama_2023_1ORC.csv,RCRO_LAMBD,Prokaryote,Escherichia phage lambda,QRITLKDYAMRFGQTKTAKDLGVYQSAINKAIHAGRKIFLTINADGSVYAEEVKDGEVKPFPS,63,True,2278,1195,1083,-1.255848942,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-43,1-63,Regulatory protein cro,Stability,cDNA display proteolysis,RCRO_LAMBD_2023-08-07_b03.a2m,1,63,63,0.3,0.2,392895,0.762,48.0,51658.6,1076.220833,High,32.0,0.6666666667,Tsuboyama2023_Dataset2_Dataset40,ddG_ML_float,1,mut_type,RCRO_LAMBD_theta0.2_2023-08-07_b03.npy,RCRO_LAMBD.pdb,1.0,,Stability -RD23A_HUMAN_Tsuboyama_2023_1IFY,RD23A_HUMAN_Tsuboyama_2023_1IFY.csv,RD23A_HUMAN,Human,Homo sapiens,SEYETMLTEIMSMGYERERVVAALRASYNNPHRAVEYLLTGIPG,44,True,1019,798,221,-0.7285205281,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-44,1-44,UV excision repair protein RAD23 homolog A,Stability,cDNA display proteolysis,RD23A_HUMAN_2023-08-07_b04.a2m,1,44,44,0.4,0.2,100991,0.864,38.0,7912.9,208.2342105,High,21.0,0.5526315789,Tsuboyama2023_Dataset2_Dataset41,ddG_ML_float,1,mut_type,RD23A_HUMAN_theta0.2_2023-08-07_b04.npy,RD23A_HUMAN.pdb,1.0,,Stability -RDRP_I33A0_Li_2023,RDRP_I33A0_Li_2023.csv,RDRP_I33A0,Virus,Influenza A virus,MDVNPTLLFLKVPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSERGRWTTNTETGAPQLNPIDGPLPEDNEPSGYAQTDCVLEAMAFLEESHPGIFETSCLETMEVVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRSNGLTANESGRLIDFLKDVMESMNKEEMEITTHFQRKRRVRDNMTKKMVTQRTIGKRKQRLNKRSYLIRALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEKLEQSGLPVGGNEKKAKLANVVRKMMTNSQDTEISFTITGDNTKWNENQNPRMFLAMITYITRNQPEWFRNVLSIAPIMFSNKMARLGKGYMFESKSMKIRTQIPAEMLASIDLKYFNDSTRKKIEKIRPLLIDGTASLSPGMMMGMFNMLSTVLGVSILNLGQKRHTKTTYWWDGLQSSDDFALIVNAPNHEGIQAGVNRFYRTCKLLGINMSKKKSYINRTGTFEFTSFFYRYGFVANFSMELPSFGVSGINESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFEIKKLWEQTHSKAGLLVSDGGPNLYNIRNLHIPEVCLKWELMDEDYQGRLCNPLNPFVNHKDIESVNNAVIMPAHGPAKNMEYDAVATTHSWIPKRNRSILNTSQRGILEDEQMYQKCCNLFEKFFPSSSYRRPVGISSMVEAMVSRARIDARIDFESGRIKKEEFTEIMKICSTIEELRRQK,757,False,12003,12003,0,-1.0,manual,Li,Deep mutational scanning reveals the functional constraints and evolutionary potential of the influenza A virus PB1 protein,2023,10.1101/2023.08.27.554986,1-757,Influenza RNA polymerase PB1,Viral Replication,Growth,RDRP_I33A0_2023-08-07_b01.a2m,1,757,757,0.1,0.01,26589,1.0,757.0,102.8,0.1357992074,Low,0.0,0.0,554986_file16.csv,fitness,1,mutant,RDRP_I33A0_theta0.01_2023-08-07_b01.npy,RDRP_I33A0.pdb,1.0,,OrganismalFitness -REV_HV1H2_Fernandes_2016,REV_HV1H2_Fernandes_2016.csv,REV_HV1H2,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MAGRSGDSDEELIRTVRLIKLLYQSNPPPNPEGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE,116,False,2147,2147,0,-0.06744744968,median,Fernandes,Functional Segregation of Overlapping Genes in HIV,2016,10.1016/j.cell.2016.11.031,1-116,HIV rev,Viral replication,Growth,REV_HV1H2_full_theta0.99_04-29-2022_b09.a2m,1,116,116,0.9,0.01,15839,0.948,110.0,9951.8,90.47090909,medium,54.0,0.4909090909,REV_HV1H2_Fernandes_2016.csv,sel_coeff_mean,1,mutant,REV_HV1H2_theta_0.01.npy,REV_HV1H2.pdb,0.1,,OrganismalFitness -RFAH_ECOLI_Tsuboyama_2023_2LCL,RFAH_ECOLI_Tsuboyama_2023_2LCL.csv,RFAH_ECOLI,Prokaryote,Escherichia coli,ATPYPGDKVIITEGAFEGFQAIFTEPDGEARSMLLLNLINKEIKHSVKNTEFRKL,55,True,1326,969,357,-0.4014057355,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-45,1-55,Transcription antitermination protein RfaH,Stability,cDNA display proteolysis,RFAH_ECOLI_2023-08-07_b04.a2m,1,55,55,0.4,0.2,86049,0.927,51.0,11748.4,230.3607843,High,35.0,0.6862745098,Tsuboyama2023_Dataset2_Dataset42,ddG_ML_float,1,mut_type,RFAH_ECOLI_theta0.2_2023-08-07_b04.npy,RFAH_ECOLI.pdb,1.0,,Stability -RL20_AQUAE_Tsuboyama_2023_1GYZ,RL20_AQUAE_Tsuboyama_2023_1GYZ.csv,RL20_AQUAE,Prokaryote,Aquifex aeolicus,WIARINAAVRAYGLNYSTFINGLKKAGIELDRKILADMAVRDPQAFEQVVNKVKEALQV,59,True,1461,1121,340,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-46,1-59,Large ribosomal subunit protein bL20,Stability,cDNA display proteolysis,RL20_AQUAE_2023-08-07_b01.a2m,1,59,59,0.1,0.2,397758,0.814,48.0,104766.4,2182.633333,High,34.0,0.7083333333,Tsuboyama2023_Dataset2_Dataset43,ddG_ML_float,1,mut_type,RL20_AQUAE_theta0.2_2023-08-07_b01.npy,RL20_AQUAE.pdb,1.0,,Stability -RL40A_YEAST_Mavor_2016,RL40A_YEAST_Mavor_2016.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,False,1253,1253,0,-0.2,manual,Mavor,Determination of ubiquitin fitness landscapes under different chemical stresses in a classroom setting,2016,10.7554/eLife.15802,2-76,Ubiquitin,Growth,Growth,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89.0,3974.4,44.65617978,medium,12.0,0.1348314607,RL401_YEAST_Mavor_2016.csv,DMSO,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,0.1,,OrganismalFitness -RL40A_YEAST_Roscoe_2013,RL40A_YEAST_Roscoe_2013.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,False,1195,1195,0,-0.2,manual,Roscoe,Analyses of the Effects of All Ubiquitin Point Mutants on Yeast Growth Rate,2013,10.1016/j.jmb.2013.01.032,2-76,Ubiquitin,Growth (essential function),Growth,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89.0,3974.4,44.65617978,medium,12.0,0.1348314607,RL401_YEAST_Roscoe_2013.csv,Selection Coefficient,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,0.1,,OrganismalFitness -RL40A_YEAST_Roscoe_2014,RL40A_YEAST_Roscoe_2014.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,False,1380,1380,0,0.5,manual,Roscoe,"Systematic Exploration of Ubiquitin Sequence, E1 Activation Efficiency, and Experimental Fitness in Yeast",2014,10.1016/j.jmb.2014.05.019,2-76,Ubiquitin,E1 reactivity,Binding,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89.0,3974.4,44.65617978,medium,12.0,0.1348314607,RL401_YEAST_Roscoe_2014.csv,rel_react,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,0.1,,Activity -RNC_ECOLI_Weeks_2023,RNC_ECOLI_Weeks_2023.csv,RNC_ECOLI,Prokaryote,Escherichia coli,MNPIVINRLQRKLGYTFNHQELLQQALTHRSASSKHNERLEFLGDSILSYVIANALYHRFPRVDEGDMSRMRATLVRGNTLAELAREFELGECLRLGPGELKSGGFRRESILADTVEALIGGVFLDSDIQTVEKLILNWYQTRLDEISPGDKQKDPKTRLQEYLQGRHLPLPTYLVVQVRGEAHDQEFTIHCQVSGLSEPVVGTGSSRRKAEQAAAEQALKKLELE,226,False,4277,4277,0,-0.054826707,median,Weeks,Fitness and Functional Landscapes of the E. coli RNase III Gene rnc,2023,10.1093/molbev/msad047,1-226,RNase III,Fluorescence,FACS,RNC_ECOLI_2023-08-07_b06.a2m,1,226,226,0.6,0.2,66507,0.969,219.0,16221.4,74.07031963,Medium,275.0,1.255707763,RNC_ECOLI_Weeks_2023.csv,Functional Score Weighted Mean,1,mutant,RNC_ECOLI_theta0.2_2023-08-07_b06.npy,RNC_ECOLI.pdb,1.0,,Activity -RPC1_BP434_Tsuboyama_2023_1R69,RPC1_BP434_Tsuboyama_2023_1R69.csv,RPC1_BP434,Virus,Enterobacteria phage 434,SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLN,61,True,1459,1124,335,-1.349855239,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-47,1-61,Repressor protein CI,Stability,cDNA display proteolysis,RPC1_BP434_2023-08-07_b05.a2m,1,61,61,0.5,0.01,820224,0.951,58.0,192520.2,3319.313793,High,73.0,1.25862069,Tsuboyama2023_Dataset2_Dataset44,ddG_ML_float,1,mut_type,RPC1_BP434_theta0.01_2023-08-07_b05.npy,RPC1_BP434.pdb,1.0,,Stability -RPC1_LAMBD_Li_2019_high-expression,RPC1_LAMBD_Li_2019_high-expression.csv,RPC1_LAMBD,Prokaryote,Escherichia phage lambda (Bacteriophage lambda),MSTKKKPLTQEQLEDARRLKAIYEKKKNELGLSQESVADKMGMGQSGVGALFNGINALNAYNAALLAKILKVSVEEFSPSIAREIYEMYEAVSMQPSLRSEYEYPVFSHVQAGMFSPELRTFTKGDAERWVSTTKKASDSAFWLEVEGNSMTAPTGSKPSFPDGMLILVDPEQAVEPGDFCIARLGGDEFTFKKLIRDSGQVFLQPLNPQYPMIPCNESCSVVGKVIASQWPEETFG,237,False,351,351,0,7.0,manual,Li,Changes in gene expression predictably shift and switch genetic interactions,2019,10.1038/s41467-019-11735-3,19-77,CI,Repressor activity (FACS sorting for expression of GFP reporter),FACS,RPC1_LAMBD_2023-08-07_b03.a2m,1,237,237,0.3,0.2,100755,0.886,210.0,28172.8,134.1561905,High,219.0,1.042857143,,H_GFP_mean_scaled,-1,mut_proteingym,RPC1_LAMBD_theta0.2_2023-08-07_b03.npy,RPC1_LAMBD.pdb,1.0,18.0,Activity -RPC1_LAMBD_Li_2019_low-expression,RPC1_LAMBD_Li_2019_low-expression.csv,RPC1_LAMBD,Prokaryote,Escherichia phage lambda (Bacteriophage lambda),MSTKKKPLTQEQLEDARRLKAIYEKKKNELGLSQESVADKMGMGQSGVGALFNGINALNAYNAALLAKILKVSVEEFSPSIAREIYEMYEAVSMQPSLRSEYEYPVFSHVQAGMFSPELRTFTKGDAERWVSTTKKASDSAFWLEVEGNSMTAPTGSKPSFPDGMLILVDPEQAVEPGDFCIARLGGDEFTFKKLIRDSGQVFLQPLNPQYPMIPCNESCSVVGKVIASQWPEETFG,237,False,351,351,0,8.481244509,median,Li,Changes in gene expression predictably shift and switch genetic interactions,2019,10.1038/s41467-019-11735-3,19-77,CI,Repressor activity (FACS sorting for expression of GFP reporter),FACS,RPC1_LAMBD_2023-08-07_b03.a2m,1,237,237,0.3,0.2,100755,0.886,210.0,28172.8,134.1561905,High,219.0,1.042857143,,L_GFP_mean_scaled,-1,mut_proteingym,RPC1_LAMBD_theta0.2_2023-08-07_b03.npy,RPC1_LAMBD.pdb,1.0,18.0,Activity -RS15_GEOSE_Tsuboyama_2023_1A32,RS15_GEOSE_Tsuboyama_2023_1A32.csv,RS15_GEOSE,Prokaryote,Geobacillus stearothermophilus,SPEVQIAILTEQINNLNEHLRVHKKDHHSRRGLLKMVGKRRRLLAYLRNKDVARYREIVEKLG,63,False,1195,1195,0,-0.1292928041,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-48,1-63,Small ribosomal subunit protein uS15,Stability,cDNA display proteolysis,RS15_GEOSE_2023-08-07_b06.a2m,1,63,63,0.6,0.2,44428,1.0,63.0,4519.5,71.73809524,Medium,35.0,0.5555555556,Tsuboyama2023_Dataset2_Dataset45,ddG_ML_float,1,mut_type,RS15_GEOSE_theta0.2_2023-08-07_b06.npy,RS15_GEOSE.pdb,1.0,,Stability -S22A1_HUMAN_Yee_2023_abundance,S22A1_HUMAN_Yee_2023_abundance.csv,S22A1_HUMAN,Human,Homo sapiens,PTVDDILEQVGESGWFQKQAFLILCLLSAAFAPICVGIVFLGFTPDHHCQSPGVAELSQRCGWSPAEELNYTVPGLGPAGEAFLGQCRRYEVDWNQSALSCVDPLASLATNRSHLPLGPCQDGWVYDTPGSSIVTEFNLVCADSWKLDLFQSCLNAGFLFGSLGVGYFADRFGRKLCLLGTVLVNAVSGVLMAFSPNYMSMLLFRLLQGLVSKGNWMAGYTLITEFVGSGSRRTVAIMYQMAFTVGLVALTGLAYALPHWRWLQLAVSLPTFLFLLYYWCVPESPRWLLSQKRNTEAIKIMDHIAQKNGKLPPADLKMLSLEEDVTEKLSPSFADLFRTPRLRKRTFILMYLWFTDSVLYQGLILHMGATSGNLYLDFLYSALVEIPGAFIALITIDRVGRIYPMAMSNLLAGAACLVMIFISPDLHWLNIIIMCVGRMGITIAIQMICLVNAELYPTFVRNLGVMVCSSLCDIGGIITPFIVFRLREVWQALPLILFAVLGLLAAGVTLLLPETKGVALPETMKDAENLGRKAKPKENTIYLKVQTSEPSGT,553,False,9803,9803,0,-1.0,manual,Yee,The full spectrum of OCT1 (SLC22A1) mutations bridges transporter biophysics to drug pharmacogenomics,2023,10.1101/2023.06.06.543963,1-549,Oct1,abundance,FACS,S22A1_HUMAN_2023-08-07_b02.a2m,1,553,553,0.2,0.2,198790,0.807,446.0,32557.5,72.99887892,Medium,485.0,1.087443946,543963_file04.xlsx,GFP_score,1,mutant,S22A1_HUMAN_theta0.2_2023-08-07_b02.npy,S22A1_HUMAN.pdb,1.0,,Expression -S22A1_HUMAN_Yee_2023_activity,S22A1_HUMAN_Yee_2023_activity.csv,S22A1_HUMAN,Human,Homo sapiens,PTVDDILEQVGESGWFQKQAFLILCLLSAAFAPICVGIVFLGFTPDHHCQSPGVAELSQRCGWSPAEELNYTVPGLGPAGEAFLGQCRRYEVDWNQSALSCVDPLASLATNRSHLPLGPCQDGWVYDTPGSSIVTEFNLVCADSWKLDLFQSCLNAGFLFGSLGVGYFADRFGRKLCLLGTVLVNAVSGVLMAFSPNYMSMLLFRLLQGLVSKGNWMAGYTLITEFVGSGSRRTVAIMYQMAFTVGLVALTGLAYALPHWRWLQLAVSLPTFLFLLYYWCVPESPRWLLSQKRNTEAIKIMDHIAQKNGKLPPADLKMLSLEEDVTEKLSPSFADLFRTPRLRKRTFILMYLWFTDSVLYQGLILHMGATSGNLYLDFLYSALVEIPGAFIALITIDRVGRIYPMAMSNLLAGAACLVMIFISPDLHWLNIIIMCVGRMGITIAIQMICLVNAELYPTFVRNLGVMVCSSLCDIGGIITPFIVFRLREVWQALPLILFAVLGLLAAGVTLLLPETKGVALPETMKDAENLGRKAKPKENTIYLKVQTSEPSGT,553,False,10094,10094,0,1.0,manual,Yee,The full spectrum of OCT1 (SLC22A1) mutations bridges transporter biophysics to drug pharmacogenomics,2023,10.1101/2023.06.06.543963,1-549,Oct1,uptake of cytotoxic substrate,Growth,S22A1_HUMAN_2023-08-07_b02.a2m,1,553,553,0.2,0.2,198790,0.807,446.0,32557.5,72.99887892,Medium,485.0,1.087443946,543963_file04.xlsx,SM73_1_score,-1,mutant,S22A1_HUMAN_theta0.2_2023-08-07_b02.npy,S22A1_HUMAN.pdb,1.0,,Activity -SAV1_MOUSE_Tsuboyama_2023_2YSB,SAV1_MOUSE_Tsuboyama_2023_2YSB.csv,SAV1_MOUSE,Eukaryote,Mus musculus,GEDLPLPPGWSVDWTMRGRKYYIDHNTNTTHWSHPLESGPSSG,43,True,965,679,286,-0.6280556038,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-49,1-43,Protein salvador homolog 1,Stability,cDNA display proteolysis,SAV1_MOUSE_2023-08-07_b06.a2m,1,43,43,0.6,0.2,177542,0.791,34.0,4627.6,136.1058824,High,14.0,0.4117647059,Tsuboyama2023_Dataset2_Dataset46,ddG_ML_float,1,mut_type,SAV1_MOUSE_theta0.2_2023-08-07_b06.npy,SAV1_MOUSE.pdb,1.0,,Stability -SBI_STAAM_Tsuboyama_2023_2JVG,SBI_STAAM_Tsuboyama_2023_2JVG.csv,SBI_STAAM,Prokaryote,Staphylococcus aureus,VRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVKEHLQKQLDALVAQKD,56,False,1025,1025,0,-0.5166138978,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-50,1-56,Immunoglobulin-binding protein Sbi,Stability,cDNA display proteolysis,SBI_STAAM_2023-08-07_b04.a2m,1,56,56,0.4,0.2,14476,0.875,49.0,1539.4,31.41632653,Medium,21.0,0.4285714286,Tsuboyama2023_Dataset2_Dataset47,ddG_ML_float,1,mut_type,SBI_STAAM_theta0.2_2023-08-07_b04.npy,SBI_STAAM.pdb,1.0,,Stability -SC6A4_HUMAN_Young_2021,SC6A4_HUMAN_Young_2021.csv,SC6A4_HUMAN,Human,Homo sapiens,METTPLNSQKQLSACEDGEDCQENGVLQKVVPTPGDKVESGQISNGYSAVPSPGAGDDTRHSIPATTTTLVAELHQGERETWGKKVDFLLSVIGYAVDLGNVWRFPYICYQNGGGAFLLPYTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTIMAWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFSEDNITWTLHSTSPAEEFYTRHVLQIHRSKGLQDLGGISWQLALCIMLIFTVIYFSIWKGVKTSGKVVWVTATFPYIILSVLLVRGATLPGAWRGVLFYLKPNWQKLLETGVWIDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQDALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPASTFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHVWAKRRERFVLAVVITCFFGSLVTLTFGGAYVVKLLEEYATGPAVLTVALIEAVAVSWFYGITQFCRDVKEMLGFSPGWFWRICWVAISPLFLLFIICSFLMSPPQLRLFQYNYPYWSIILGYCIGTSSFICIPTYIAYRLIITPGTFKERIIKSITPETPTEIPCGDIRLNAV,630,False,11576,11576,0,-0.1560688323,median,Young,Deep Mutagenesis of a Transporter for Uptake of a Non-Native Substrate Identifies Conformationally Dynamic Regions,2021,10.1101/2021.04.19.440442,2-630,Sodium-dependent serotonin transporter,Fluorescence,Fluorescence,SC6A4_HUMAN_full_11-26-2021_b02.a2m,1,630,630,0.2,0.2,40971,0.805,507.0,5278.9,10.41203156,medium,278.0,0.5483234714,SC6A4_HUMAN_Young_2021.csv,avg_MYC,1,mutant,SC6A4_HUMAN_theta_0.2.npy,SC6A4_HUMAN.pdb,0.1,,Activity -SCIN_STAAR_Tsuboyama_2023_2QFF,SCIN_STAAR_Tsuboyama_2023_2QFF.csv,SCIN_STAAR,Prokaryote,Staphylococcus aureus,QNEKLANELKSLLDELNVNELATGSLNTYYKRTIKISGQKAMYALKSKDFKKMSEAKYQLQKIYNEIDEA,70,False,1212,1212,0,-0.4037152866,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-51,1-70,Staphylococcal complement inhibitor,Stability,cDNA display proteolysis,SCIN_STAAR_2023-08-07_b02.a2m,1,70,70,0.2,0.2,38043,0.9,63.0,11146.3,176.9253968,High,4.0,0.06349206349,Tsuboyama2023_Dataset2_Dataset48,ddG_ML_float,1,mut_type,SCIN_STAAR_theta0.2_2023-08-07_b02.npy,SCIN_STAAR.pdb,1.0,,Stability -SCN5A_HUMAN_Glazer_2019,SCN5A_HUMAN_Glazer_2019.csv,SCN5A_HUMAN,Human,Homo sapiens,MANFLLPRGTSSFRRFTRESLAAIEKRMAEKQARGSTTLQESREGLPEEEAPRPQLDLQASKKLPDLYGNPPQELIGEPLEDLDPFYSTQKTFIVLNKGKTIFRFSATNALYVLSPFHPIRRAAVKILVHSLFNMLIMCTILTNCVFMAQHDPPPWTKYVEYTFTAIYTFESLVKILARGFCLHAFTFLRDPWNWLDFSVIIMAYTTEFVDLGNVSALRTFRVLRALKTISVISGLKTIVGALIQSVKKLADVMVLTVFCLSVFALIGLQLFMGNLRHKCVRNFTALNGTNGSVEADGLVWESLDLYLSDPENYLLKNGTSDVLLCGNSSDAGTCPEGYRCLKAGENPDHGYTSFDSFAWAFLALFRLMTQDCWERLYQQTLRSAGKIYMIFFMLVIFLGSFYLVNLILAVVAMAYEEQNQATIAETEEKEKRFQEAMEMLKKEHEALTIRGVDTVSRSSLEMSPLAPVNSHERRSKRRKRMSSGTEECGEDRLPKSDSEDGPRAMNHLSLTRGLSRTSMKPRSSRGSIFTFRRRDLGSEADFADDENSTAGESESHHTSLLVPWPLRRTSAQGQPSPGTSAPGHALHGKKNSTVDCNGVVSLLGAGDPEATSPGSHLLRPVMLEHPPDTTTPSEEPGGPQMLTSQAPCVDGFEEPGARQRALSAVSVLTSALEELEESRHKCPPCWNRLAQRYLIWECCPLWMSIKQGVKLVVMDPFTDLTITMCIVLNTLFMALEHYNMTSEFEEMLQVGNLVFTGIFTAEMTFKIIALDPYYYFQQGWNIFDSIIVILSLMELGLSRMSNLSVLRSFRLLRVFKLAKSWPTLNTLIKIIGNSVGALGNLTLVLAIIVFIFAVVGMQLFGKNYSELRDSDSGLLPRWHMMDFFHAFLIIFRILCGEWIETMWDCMEVSGQSLCLLVFLLVMVIGNLVVLNLFLALLLSSFSADNLTAPDEDREMNNLQLALARIQRGLRFVKRTTWDFCCGLLRQRPQKPAALAAQGQLPSCIATPYSPPPPETEKVPPTRKETRFEEGEQPGQGTPGDPEPVCVPIAVAESDTDDQEEDEENSLGTEEESSKQQESQPVSGGPEAPPDSRTWSQVSATASSEAEASASQADWRQQWKAEPQAPGCGETPEDSCSEGSTADMTNTAELLEQIPDLGQDVKDPEDCFTEGCVRRCPCCAVDTTQAPGKVWWRLRKTCYHIVEHSWFETFIIFMILLSSGALAFEDIYLEERKTIKVLLEYADKMFTYVFVLEMLLKWVAYGFKKYFTNAWCWLDFLIVDVSLVSLVANTLGFAEMGPIKSLRTLRALRPLRALSRFEGMRVVVNALVGAIPSIMNVLLVCLIFWLIFSIMGVNLFAGKFGRCINQTEGDLPLNYTIVNNKSQCESLNLTGELYWTKVKVNFDNVGAGYLALLQVATFKGWMDIMYAAVDSRGYEEQPQWEYNLYMYIYFVIFIIFGSFFTLNLFIGVIIDNFNQQKKKLGGQDIFMTEEQKKYYNAMKKLGSKKPQKPIPRPLNKYQGFIFDIVTKQAFDVTIMFLICLNMVTMMVETDDQSPEKINILAKINLLFVAIFTGECIVKLAALRHYYFTNSWNIFDFVVVILSIVGTVLSDIIQKYFFSPTLFRVIRLARIGRILRLIRGAKGIRTLLFALMMSLPALFNIGLLLFLVMFIYSIFGMANFAYVKWEAGIDDMFNFQTFANSMLCLFQITTSAGWDGLLSPILNTGPPYCDPTLPNSNGSRGDCGSPAVGILFFTTYIIISFLIVVNMYIAIILENFSVATEESTEPLSEDDFDMFYEIWEKFDPEATQFIEYSVLSDFADALSEPLRIAKPNQISLINMDLPMVSGDRIHCMDILFAFTKRVLGESGEMDALKIQMEEKFMAANPSKISYEPITTTLRRKHEEVSAMVIQRAFRRHLLQRSLKHASFLFRQQAGSGLSEEDAPEREGLIAYVMSENFSRPLGPPSSSSISSTSFPPSYDSVTRATSDNLQVRGSDYSHSEDLADFPPSPDRDRESIV,2016,False,224,224,0,-88.35,median,Glazer,Deep Mutational Scan of an SCN5A Voltage Sensor,2019,10.1161/CIRCGEN.119.002786,1621-1632,SCN5A,"drug resistance (triple-drug assay: veratridine + brevetoxin + ouabain; surrogate for sodium channel dysfunction, select against function)",,SCN5A_HUMAN_1611-1642_11-26-2021_b03.a2m,1611,1642,32,0.3,0.2,49973,0.812,26.0,743.1,28.58076923,medium,2.0,0.07692307692,SCN5A_HUMAN_Glazer_2019.csv,dms,-1,mutation,SCN5A_HUMAN_theta_0.2.npy,SCN5A_HUMAN.pdb,0.1,,OrganismalFitness -SDA_BACSU_Tsuboyama_2023_1PV0,SDA_BACSU_Tsuboyama_2023_1PV0.csv,SDA_BACSU,Prokaryote,Bacillus subtilis,MRKLSDELLIESYFKATEMNLNRDFIELIENEIKRRSLGHIISV,44,True,2770,834,1936,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-52,1-44,Sporulation inhibitor sda,Stability,cDNA display proteolysis,SDA_BACSU_2023-08-07_b05.a2m,1,44,44,0.5,0.2,1953,0.886,39.0,876.8,22.48205128,Medium,4.0,0.1025641026,Tsuboyama2023_Dataset2_Dataset49,ddG_ML_float,1,mut_type,SDA_BACSU_theta0.2_2023-08-07_b05.npy,SDA_BACSU.pdb,1.0,,Stability -SERC_HUMAN_Xie_2023,SERC_HUMAN_Xie_2023.csv,SERC_HUMAN,Human,Homo sapiens,MDAPRQVVNFGPGPAKLPHSVLLEIQKELLDYKGVGISVLEMSHRSSDFAKIINNTENLVRELLAVPDNYKVIFLQGGGCGQFSAVPLNLIGLKAGRCADYVVTGAWSAKAAEEAKKFGTINIVHPKLGSYTKIPDPSTWNLNPDASYVYYCANETVHGVEFDFIPDVKGAVLVCDMSSNFLSKPVDVSKFGVIFAGAQKNVGSAGVTVVIVRDDLLGFALRECPSVLEYKVQAGNSSLYNTPPCFSIYVMGLVLEWIKNNGGAAAMEKLSSIKSQTIYEIIDNSQGFYVCPVEPQNRSKMNIPFRIGNAKGDDALEKRFLDKALELNMLSLKGHRSVGGIRASLYNAVTIEDVQKLAAFMKKFLEMHQL,370,False,1914,1914,0,0.9360658319,median,Xie,Predicting the functional effect of compound heterozygous genotypes from large scale variant effect maps,2023,10.1101/2023.01.11.523651,2-370,PSAT1,Yeast growth,,SERC_HUMAN_2023-08-07_b02.a2m,1,370,370,0.2,0.2,232438,0.949,351.0,42521.5,121.1438746,High,899.0,2.561253561,urn_mavedb_00000107-b-1_scores-2.csv,score,1,mutant,SERC_HUMAN_theta0.2_2023-08-07_b02.npy,SERC_HUMAN.pdb,1.0,,OrganismalFitness -SHOC2_HUMAN_Kwon_2022,SHOC2_HUMAN_Kwon_2022.csv,SHOC2_HUMAN,Human,Homo sapiens,MSSSLGKEKDSKEKDPKVPSAKEREKEAKASGGFGKESKEKEPKTKGKDAKDGKKDSSAAQPGVAFSVDNTIKRPNPAPGTRKKSSNAEVIKELNKCREENSMRLDLSKRSIHILPSSIKELTQLTELYLYSNKLQSLPAEVGCLVNLMTLALSENSLTSLPDSLDNLKKLRMLDLRHNKLREIPSVVYRLDSLTTLYLRFNRITTVEKDIKNLSKLSMLSIRENKIKQLPAEIGELCNLITLDVAHNQLEHLPKEIGNCTQITNLDLQHNELLDLPDTIGNLSSLSRLGLRYNRLSAIPRSLAKCSALEELNLENNNISTLPESLLSSLVKLNSLTLARNCFQLYPVGGPSQFSTIYSLNMEHNRINKIPFGIFSRAKVLSKLNMKDNQLTSLPLDFGTWTSMVELNLATNQLTKIPEDVSGLVSLEVLILSNNLLKKLPHGLGNLRKLRELDLEENKLESLPNEIAYLKDLQKLVLTNNQLTTLPRGIGHLTNLTHLGLGENLLTHLPEEIGTLENLEELYLNDNPNLHSLPFELALCSKLSIMSIENCPLSHLPPQIVAGGPSFIIQFLKMQGPYRAMV,582,False,10972,10972,0,-0.34,median,Kwon,Structure–function analysis of the SHOC2–MRAS–PP1C holophosphatase complex,2022,10.1038/s41586-022-04928-2,2-582,Leucine-rich repeat protein SHOC-2,Drug resistance,Survival (dosed with trametinib),SHOC2_HUMAN_2023-10-12_b04.a2m,1,582,582,0.4,0.2,22163,0.777,452.0,8806.8,19.4840708,Medium,379.0,0.8384955752,2022.3.16.Extended Data Table 4.csv,LFC_scaled,1,variant.by.aa,SHOC2_HUMAN_theta0.2_2023-10-12_b04.npy,SHOC2_HUMAN.pdb,1.0,,OrganismalFitness -SOX30_HUMAN_Tsuboyama_2023_7JJK,SOX30_HUMAN_Tsuboyama_2023_7JJK.csv,SOX30_HUMAN,Human,Homo sapiens,RPMNAFMVWARIHRPALAKANPAANNAEISVQLGLEWNKLSEEQKKPYYDEAQKIKE,57,False,1010,1010,0,-0.3216404755,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-53,1-57,Transcription factor SOX-30,Stability,cDNA display proteolysis,SOX30_HUMAN_2023-08-07_b03.a2m,1,57,57,0.3,0.2,158104,0.982,56.0,14909.6,266.2428571,High,36.0,0.6428571429,Tsuboyama2023_Dataset2_Dataset50,ddG_ML_float,1,mut_type,SOX30_HUMAN_theta0.2_2023-08-07_b03.npy,SOX30_HUMAN.pdb,1.0,,Stability -SPA_STAAU_Tsuboyama_2023_1LP1,SPA_STAAU_Tsuboyama_2023_1LP1.csv,SPA_STAAU,Prokaryote,Staphylococcus aureus,KFNKELSVAGREIVTLPNLNDPQKKAFIFSLWDDPSQSANLLAEAKKLNDAQAPK,55,True,2105,1035,1070,-0.9794586971,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-54,1-55,Immunoglobulin G-binding protein A,Stability,cDNA display proteolysis,SPA_STAAU_2023-08-07_b04.a2m,1,55,55,0.4,0.2,184804,0.927,51.0,2042.1,40.04117647,Medium,25.0,0.4901960784,Tsuboyama2023_Dataset2_Dataset51,ddG_ML_float,1,mut_type,SPA_STAAU_theta0.2_2023-08-07_b04.npy,SPA_STAAU.pdb,1.0,,Stability -SPG1_STRSG_Olson_2014,SPG1_STRSG_Olson_2014.csv,SPG1_STRSG,Prokaryote,Streptococcus sp. group G,MEKEKKVKYFLRKSAFGLASVSAAFLVGSTVFAVDSPIEDTPIIRNGGELTNLLGNSETTLALRNEESATADLTAAAVADTVAAAAAENAGAAAWEAAAAADALAKAKADALKEFNKYGVSDYYKNLINNAKTVEGIKDLQAQVVESAKKARISEATDGLSDFLKSQTPAEDTVKSIELAEAKVLANRELDKYGVSDYHKNLINNAKTVEGVKELIDEILAALPKTDQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTEKPEVIDASELTPAVTTYKLVINGKTLKGETTTKAVDAETAEKAFKQYANDNGVDGVWTYDDATKTFTVTEMVTEVPGDAPTEPEKPEASIPLVPLTPATPIAKDDAKKDDTKKEDAKKPEAKKDDAKKAETLPTTGEGSNPFFTAAALAVMAGAGALAVASKRKED,448,True,536962,1045,535917,-4.0,manual,Olson,A comprehensive biophysical description of pairwise epistasis throughout an entire protein domain,2014,10.1016/j.cub.2014.09.072,228-282,GB1,Binding (IgG),Binding,SPG1_STRSG_full_11-26-2021_b07.a2m,1,448,448,0.7,0.2,44,0.913,409.0,3.3,0.008068459658,low,0.0,0.0,SPG1_STRSG_Olson_2014.csv,lnW,1,mutant,SPG1_STRSG_theta_0.2.npy,SPG1_STRSG.pdb,0.1,,Binding -SPG1_STRSG_Wu_2016,SPG1_STRSG_Wu_2016.csv,SPG1_STRSG,Prokaryote,Streptococcus sp. group G,MEKEKKVKYFLRKSAFGLASVSAAFLVGSTVFAVDSPIEDTPIIRNGGELTNLLGNSETTLALRNEESATADLTAAAVADTVAAAAAENAGAAAWEAAAAADALAKAKADALKEFNKYGVSDYYKNLINNAKTVEGIKDLQAQVVESAKKARISEATDGLSDFLKSQTPAEDTVKSIELAEAKVLANRELDKYGVSDYHKNLINNAKTVEGVKELIDEILAALPKTDQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTEKPEVIDASELTPAVTTYKLVINGKTLKGETTTKAVDAETAEKAFKQYANDNGVDGVWTYDDATKTFTVTEMVTEVPGDAPTEPEKPEASIPLVPLTPATPIAKDDAKKDDTKKEDAKKPEAKKDDAKKAETLPTTGEGSNPFFTAAALAVMAGAGALAVASKRKED,448,True,149360,76,149284,0.1224388752,median,Wu,Adaptation in protein fitness landscapes is facilitated by indirect paths,2016,10.7554/eLife.16965,265-280,GB1,Binding (IgG),binding,SPG1_STRSG_full_b0.1.a2m,1,448,448,0.1,0.2,3109,1.0,448.0,600.4,1.340178571,Medium,97.0,0.2165178571,SPG1_STRSG_Wu_2016.csv,Fitness,1,Variants,SPG1_STRSG_b01_theta_0.2.npy,SPG1_STRSG.pdb,1.0,,Binding -SPG2_STRSG_Tsuboyama_2023_5UBS,SPG2_STRSG_Tsuboyama_2023_5UBS.csv,SPG2_STRSG,Prokaryote,Streptococcus sp. group G,MTFKLIINGKTLKGETTTEAVDAATAEKVFKQYFNDNGIDGEWTYDDATKTFTITE,56,True,1451,1029,422,-1.000627629,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-55,1-56,Immunoglobulin G-binding protein G,Stability,cDNA display proteolysis,SPG2_STRSG_2023-08-07_b03.a2m,1,56,56,0.3,0.2,39899,0.75,42.0,2567.6,61.13333333,Medium,6.0,0.1428571429,Tsuboyama2023_Dataset2_Dataset52,ddG_ML_float,1,mut_type,SPG2_STRSG_theta0.2_2023-08-07_b03.npy,SPG2_STRSG.pdb,1.0,,Stability -SPIKE_SARS2_Starr_2020_binding,SPIKE_SARS2_Starr_2020_binding.csv,SPIKE_SARS2,Virus,SARS-COV2,MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT,1273,False,3802,3802,0,-0.5,manual,Starr,Deep Mutational Scanning of SARS-CoV-2 Receptor Binding Domain Reveals Constraints on Folding and ACE2 Binding,2020,10.1016/j.cell.2020.08.012,331-531,Spike RBD,ACE2 binding,Binding,SPIKE_SARS2_theta0.99_full_11-26-2021_b01.a2m,1,1273,1273,0.1,0.01,36931,0.998,1271.0,1405.2,1.105586153,medium,2059.0,1.619984264,SPIKE_SARS2_Starr_2020.csv,bind_avg,1,mutation,SPIKE_SARS2_theta_0.01.npy,SPIKE_SARS2.pdb,0.1,,Binding -SPIKE_SARS2_Starr_2020_expression,SPIKE_SARS2_Starr_2020_expression.csv,SPIKE_SARS2,Virus,SARS-COV2,MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT,1273,False,3798,3798,0,-1.0,manual,Starr,Deep Mutational Scanning of SARS-CoV-2 Receptor Binding Domain Reveals Constraints on Folding and ACE2 Binding,2020,10.1016/j.cell.2020.08.012,331-531,Spike RBD,ACE2 binding,Binding,SPIKE_SARS2_theta0.99_full_11-26-2021_b01.a2m,1,1273,1273,0.1,0.01,36931,0.998,1271.0,1405.2,1.105586153,medium,2059.0,1.619984264,SPIKE_SARS2_Starr_2020.csv,expr_avg,1,mutation,SPIKE_SARS2_theta_0.01.npy,SPIKE_SARS2.pdb,0.1,,Expression -SPTN1_CHICK_Tsuboyama_2023_1TUD,SPTN1_CHICK_Tsuboyama_2023_1TUD.csv,SPTN1_CHICK,Eukaryote,Gallus gallus,RQGFVPAAYVKKLDSGTGKELVLALYDYQEKSPREVTMKKGDILTLLNSTNKDWWKVEVN,60,True,3201,1051,2150,-2.360476078,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-56,1-60,"Spectrin alpha chain, non-erythrocytic 1",Stability,cDNA display proteolysis,SPTN1_CHICK_2023-08-07_b03.a2m,1,60,60,0.3,0.2,420793,0.933,56.0,15051.5,268.7767857,High,47.0,0.8392857143,Tsuboyama2023_Dataset2_Dataset53,ddG_ML_float,1,mut_type,SPTN1_CHICK_theta0.2_2023-08-07_b03.npy,SPTN1_CHICK.pdb,1.0,,Stability -SQSTM_MOUSE_Tsuboyama_2023_2RRU,SQSTM_MOUSE_Tsuboyama_2023_2RRU.csv,SQSTM_MOUSE,Eukaryote,Mus musculus,RLIESLSQMLSMGFSDEGGWLTRLLQTKNYDIGAALDTIQ,40,False,707,707,0,-0.8554856463,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-57,1-40,Sequestosome-1,Stability,cDNA display proteolysis,SQSTM_MOUSE_2023-08-07_b05.a2m,1,40,40,0.5,0.2,34660,0.925,37.0,3244.5,87.68918919,Medium,13.0,0.3513513514,Tsuboyama2023_Dataset2_Dataset54,ddG_ML_float,1,mut_type,SQSTM_MOUSE_theta0.2_2023-08-07_b05.npy,SQSTM_MOUSE.pdb,1.0,,Stability -SR43C_ARATH_Tsuboyama_2023_2N88,SR43C_ARATH_Tsuboyama_2023_2N88.csv,SR43C_ARATH,Eukaryote,Arabidopsis thaliana,AVAESVIGKRVGDDGKTIEYLVKWTDMSDATWEPQDNVDSTLVLLYQQ,48,True,1583,889,694,-1.591761235,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-58,1-48,"Signal recognition particle 43 kDa protein, chloroplastic",Stability,cDNA display proteolysis,SR43C_ARATH_2023-08-07_b02.a2m,1,48,48,0.2,0.2,101118,0.917,44.0,12180.6,276.8318182,High,26.0,0.5909090909,Tsuboyama2023_Dataset2_Dataset55,ddG_ML_float,1,mut_type,SR43C_ARATH_theta0.2_2023-08-07_b02.npy,SR43C_ARATH.pdb,1.0,,Stability -SRBS1_HUMAN_Tsuboyama_2023_2O2W,SRBS1_HUMAN_Tsuboyama_2023_2O2W.csv,SRBS1_HUMAN,Human,Homo sapiens,GIDPFTGEAIAKFNFNGDTQVEMSFRKGERITLLRQVDENWYEGRIPGTSRQGIFPITYVDVIKRPL,67,True,1556,1211,345,-1.169019411,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-59,1-67,Sorbin and SH3 domain-containing protein 1,Stability,cDNA display proteolysis,SRBS1_HUMAN_2023-08-07_b03.a2m,1,67,67,0.3,0.2,708655,0.836,56.0,22689.0,405.1607143,High,60.0,1.071428571,Tsuboyama2023_Dataset2_Dataset56,ddG_ML_float,1,mut_type,SRBS1_HUMAN_theta0.2_2023-08-07_b03.npy,SRBS1_HUMAN.pdb,1.0,,Stability -SRC_HUMAN_Ahler_2019,SRC_HUMAN_Ahler_2019.csv,SRC_HUMAN,Human,Homo sapiens,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,False,3372,3372,0,-1.0,manual,Ahler,"A Combined Approach Reveals a Regulatory Mechanism Coupling Src's Kinase Activity, Localization, and Phosphotransferase-Independent Functions",2019,10.1016/j.molcel.2019.02.003,270-519,SRC,growth (surrogate for phosphorylation activity),Growth,SRC_HUMAN_full_11-26-2021_b06.a2m,1,536,536,0.6,0.2,26974,0.808,433.0,1405.1,3.245034642,medium,86.0,0.1986143187,SRC_HUMAN_Ahler_CD_2019.csv,Activity_Score,1,mutant_uniprot_1,SRC_HUMAN_theta_0.2.npy,SRC_HUMAN.pdb,0.1,,Activity -SRC_HUMAN_Chakraborty_2023_binding-DAS_25uM,SRC_HUMAN_Chakraborty_2023_binding-DAS_25uM.csv,SRC_HUMAN,Human,S. Cerevisiae,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,False,3637,3637,0,-0.086077486,median,Chakraborty,Profiling of the drug resistance of thousands of Src tyrosine kinase mutants uncovers a regulatory network that couples autoinhibition to catalytic domain dynamics,2022,10.1101/2021.12.05.471322,270-519,SRC,Fluorescence measurement,,SRC_HUMAN_2023-08-07_b06.a2m,1,536,536,0.6,0.2,37675,0.869,466.0,1789.0,3.839055794,Medium,117.0,0.2510729614,GSE190495_Src_DAS_25_Score.csv,DMS_score,1,mutant,SRC_HUMAN_theta0.2_2023-08-07_b06.npy,SRC_HUMAN.pdb,1.0,,Activity -SRC_HUMAN_Nguyen_2022,SRC_HUMAN_Nguyen_2022.csv,SRC_HUMAN,Human,Human,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,False,3366,3366,0,0.535786927,median,Nguyen,Molecular Determinants of Hsp90 Dependence of Src Kinase Revealed by Deep Mutational Scanning,2022,10.1002/pro.4656,270-519,SRC,growth enrichment,,SRC_HUMAN_2023-08-07_b06.a2m,1,536,536,0.6,0.2,37675,0.869,466.0,1789.0,3.839055794,Medium,117.0,0.2510729614,diffsel_calib.csv,diffsel,-1,mutant,SRC_HUMAN_theta0.2_2023-08-07_b06.npy,SRC_HUMAN.pdb,1.0,,OrganismalFitness -SUMO1_HUMAN_Weile_2017,SUMO1_HUMAN_Weile_2017.csv,SUMO1_HUMAN,Human,Homo sapiens,MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV,101,False,1700,1700,0,0.3,manual,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-97,Small ubiquitin-related modifier 1,Yeast growth,complementation,SUMO1_HUMAN_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,85570,0.703,71.0,13120.2,184.7915493,high,67.0,0.9436619718,SUMO1_HUMAN_Weile_2017.csv,screenscore,1,mutant,SUMO1_HUMAN_theta_0.2.npy,SUMO1_HUMAN.pdb,0.1,,OrganismalFitness -SYUA_HUMAN_Newberry_2020,SYUA_HUMAN_Newberry_2020.csv,SYUA_HUMAN,Human,Homo sapiens,MDVYMKGLSKAKEGVVAAAEKTKQGVAEAAGKTKEGVLFVGSKTKEGVVHGVATVAEKTKEQVTNVGGAVVTGVTAVAQKTVEGAGSIAAATGYVKKDQLGKNEEGAPQEGILEDMPVDPDNEAFEMPSEEGFQDFEPEA,140,False,2497,2497,0,-0.1,manual,Newberry,Robust Sequence Determinants of α-Synuclein Toxicity in Yeast Implicate Membrane Binding,2020,10.1021/acschembio.0c00339,1-140,alpha-synuclein,Growth,Growth,SYUA_HUMAN_full_04-29-2022_b01.a2m,1,140,140,0.1,0.2,15711,0.707,99.0,6509.6,65.75353535,medium,62.0,0.6262626263,SYUA_HUMAN_Newberry_2020.csv,Fitness Score,-1,mutant,SYUA_HUMAN_theta_0.2.npy,SYUA_HUMAN.pdb,0.1,,OrganismalFitness -TADBP_HUMAN_Bolognesi_2019,TADBP_HUMAN_Bolognesi_2019.csv,TADBP_HUMAN,Human,Homo sapiens,MSEYIRVTEDENDEPIEIPSEDDGTVLLSTVTAQFPGACGLRYRNPVSQCMRGVRLVEGILHAPDAGWGNLVYVVNYPKDNKRKMDETDASSAVKVKRAVQKTSDLIVLGLPWKTTEQDLKEYFSTFGEVLMVQVKKDLKTGHSKGFGFVRFTEYETQVKVMSQRHMIDGRWCDCKLPNSKQSQDEPLRSRKVFVGRCTEDMTEDELREFFSQYGDVMDVFIPKPFRAFAFVTFADDQIAQSLCGEDLIIKGISVHISNAEPKHNSNRQLERSGRFGGNPGGFGNQGGFGNSRGGGAGLGNNQGSNMGGGMNFGAFSINPAMMAAAQAALQSSWGMMGMLASQQNQSGPSGNNQNQGNMQREPNQAFGSGNNSYSGSNSGAAIGWGSASNAGSGSGFNGGFGSSMDSKSSGWGM,414,False,1196,1196,0,0.003661517102,median,Bolognesi,The mutational landscape of a prion-like domain,2019,10.1038/s41467-019-12101-z,290-373,TARDBP,growth (surrogate for toxicity),Growth,TADBP_HUMAN_full_11-26-2021_b09.a2m,1,414,414,0.9,0.2,1211,0.911,377.0,147.3,0.3907161804,low,8.0,0.02122015915,TADBP_HUMAN_Bolognesi_2019.csv,toxicity,1,mutant_uniprot_1,TADBP_HUMAN_theta_0.2.npy,TADBP_HUMAN.pdb,0.1,,OrganismalFitness -TAT_HV1BR_Fernandes_2016,TAT_HV1BR_Fernandes_2016.csv,TAT_HV1BR,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRAHQNSQTHQASLSKQPTSQSRGDPTGPKE,86,False,1577,1577,0,-0.2,manual,Fernandes,Functional Segregation of Overlapping Genes in HIV,2016,10.1016/j.cell.2016.11.031,1-86,HIV tat,Viral replication,Growth,TAT_HV1BR_full_theta0.99_04-29-2022_b09.a2m,1,86,86,0.9,0.01,12155,0.988,85.0,9925.0,116.7647059,high,49.0,0.5764705882,TAT_HV1BR_Fernandes_2016.csv,sel_coeff_mean,1,mutant,TAT_HV1BR_theta_0.01.npy,TAT_HV1BR.pdb,0.1,,OrganismalFitness -TCRG1_MOUSE_Tsuboyama_2023_1E0L,TCRG1_MOUSE_Tsuboyama_2023_1E0L.csv,TCRG1_MOUSE,Eukaryote,Mus musculus,GATAVSEWTEYKTADGKTYYYNNRTLESTWEKPQELK,37,True,1058,621,437,-1.2,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-60,1-37,Transcription elongation regulator 1,Stability,cDNA display proteolysis,TCRG1_MOUSE_2023-08-07_b08.a2m,1,37,37,0.8,0.2,43363,0.865,32.0,2819.7,88.115625,Medium,14.0,0.4375,Tsuboyama2023_Dataset2_Dataset57,ddG_ML_float,1,mut_type,TCRG1_MOUSE_theta0.2_2023-08-07_b08.npy,TCRG1_MOUSE.pdb,1.0,,Stability -THO1_YEAST_Tsuboyama_2023_2WQG,THO1_YEAST_Tsuboyama_2023_2WQG.csv,THO1_YEAST,Eukaryote,Saccharomyces cerevisiae,SADYSSLTVVQLKDLLTKRNLSVGGLKNEWVQRLIKDDEES,41,True,1279,656,623,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-61,1-41,Protein THO1,Stability,cDNA display proteolysis,THO1_YEAST_2023-08-07_b05.a2m,1,41,41,0.5,0.2,54877,0.805,33.0,8516.7,258.0818182,High,15.0,0.4545454545,Tsuboyama2023_Dataset2_Dataset58,ddG_ML_float,1,mut_type,THO1_YEAST_theta0.2_2023-08-07_b05.npy,THO1_YEAST.pdb,1.0,,Stability -TNKS2_HUMAN_Tsuboyama_2023_5JRT,TNKS2_HUMAN_Tsuboyama_2023_5JRT.csv,TNKS2_HUMAN,Human,Homo sapiens,FSITQFVRNLGLEHLMDIFEREQITLRVLVEMGHKELKEIGINAYGHREKLIKGVERLI,59,True,1479,1118,361,-0.9451205822,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-62,1-59,Poly [ADP-ribose] polymerase tankyrase-2,Stability,cDNA display proteolysis,TNKS2_HUMAN_2023-08-07_b03.a2m,1,59,59,0.3,0.2,270654,0.949,56.0,11206.0,200.1071429,High,26.0,0.4642857143,Tsuboyama2023_Dataset2_Dataset59,ddG_ML_float,1,mut_type,TNKS2_HUMAN_theta0.2_2023-08-07_b03.npy,TNKS2_HUMAN.pdb,1.0,,Stability -TPK1_HUMAN_Weile_2017,TPK1_HUMAN_Weile_2017.csv,TPK1_HUMAN,Human,Homo sapiens,MEHAFTPLEPLLSTGNLKYCLVILNQPLDNYFRHLWNKALLRACADGGANRLYDITEGERESFLPEFINGDFDSIRPEVREYYATKGCELISTPDQDHTDFTKCLKMLQKKIEEKDLKVDVIVTLGGLAGRFDQIMASVNTLFQATHITPFPIIIIQEESLIYLLQPGKHRLHVDTGMEGDWCGLIPVGQPCMQVTTTGLKWNLTNDVLAFGTLVSTSNTYDGSGVVTVETDHPLLWTMAIKS,243,False,3181,3181,0,0.5,manual,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-243,Thiamin pyrophosphokinase 1,Yeast growth,complementation,TPK1_HUMAN_full_11-26-2021_b02.a2m,1,243,243,0.2,0.2,21515,0.823,200.0,7122.6,35.613,medium,234.0,1.17,TPK1_HUMAN_Weile_2017.csv,screenscore,1,mutant,TPK1_HUMAN_theta_0.2.npy,TPK1_HUMAN.pdb,0.1,,OrganismalFitness -TPMT_HUMAN_Matreyek_2018,TPMT_HUMAN_Matreyek_2018.csv,TPMT_HUMAN,Human,Homo sapiens,MDGTRTSLDIEEYSDTEVQKNQVLTLEEWQDKWVNGKTAFHQEQGHQLLKKHLDTFLKGKSGLRVFFPLCGKAVEMKWFADRGHSVVGVEISELGIQEFFTEQNLSYSEEPITEIPGTKVFKSSSGNISLYCCSIFDLPRTNIGKFDMIWDRGALVAINPGDRKCYADTMFSLLGKKFQYLLCVLSYDPTKHPGPPFYVPHAEIERLFGKICNIRCLEKVDAFEERHKSWGIDCLFEKLYLLTEK,245,False,3648,3648,0,0.5,manual,Matreyek,Multiplex Assessment of Protein Variant Abundance by Massively Parallel Sequencing,2018,10.1038/s41588-018-0122-z,1-245,Thiopurine S-methyltransferase,Protein abundance (FACS sorting for abundance of GFP-fused target),Protein stability,TPMT_HUMAN_full_11-26-2021_b03.a2m,1,245,245,0.3,0.2,19526,0.731,179.0,6296.8,35.17765363,medium,109.0,0.6089385475,TPMT_HUMAN_Matreyek_2018.csv,score,1,mutant,TPMT_HUMAN_theta_0.2.npy,TPMT_HUMAN.pdb,0.1,,Expression -TPOR_HUMAN_Bridgford_2020,TPOR_HUMAN_Bridgford_2020.csv,TPOR_HUMAN,Human,Homo sapiens,MPSWALFMVTSCLLLAPQNLAQVSSQDVSLLASDSEPLKCFSRTFEDLTCFWDEEEAAPSGTYQLLYAYPREKPRACPLSSQSMPHFGTRYVCQFPDQEEVRLFFPLHLWVKNVFLNQTRTQRVLFVDSVGLPAPPSIIKAMGGSQPGELQISWEEPAPEISDFLRYELRYGPRDPKNSTGPTVIQLIATETCCPALQRPHSASALDQSPCAQPTMPWQDGPKQTSPSREASALTAEGGSCLISGLQPGNSYWLQLRSEPDGISLGGSWGSWSLPVTVDLPGDAVALGLQCFTLDLKNVTCQWQQQDHASSQGFFYHSRARCCPRDRYPIWENCEEEEKTNPGLQTPQFSRCHFKSRNDSIIHILVEVTTAPGTVHSYLGSPFWIHQAVRLPTPNLHWREISSGHLELEWQHPSSWAAQETCYQLRYTGEGHQDWKVLEPPLGARGGTLELRPRSRYRLQLRARLNGPTYQGPWSSWSDPTRVETATETAWISLVTALHLVLGLNAVLGLLLLRWQFPAHYRRLRHALWPSLPDLHRVLGQYLRDTAALSPPKATVSDTCEEVEPSLLEILPKSSERTPLPLCSSQAQMDYRRLQPSCLGTMPLSVCPPMAESGSCCTTHIANHSYLPLSYWQQP,635,False,562,562,0,-0.1,manual,Bridgford,Novel drivers and modifiers of MPL-dependent oncogenic transformation identified by deep mutational scanning,2020,10.1182/blood.2019002561,487-517,MPL,growth/survival (surrogate for TpoR/MPL enhanced constitutive activation),Growth,TPOR_HUMAN_full_11-26-2021_b01.a2m,1,635,635,0.1,0.2,937,0.825,524.0,128.4,0.2450381679,low,0.0,0.0,TPOR_HUMAN_Bridgford_S505N_2020.csv,score,1,mutant_uniprot_1,TPOR_HUMAN_theta_0.2.npy,TPOR_HUMAN.pdb,0.1,,OrganismalFitness -TRPC_SACS2_Chan_2017,TRPC_SACS2_Chan_2017.csv,TRPC_SACS2,Prokaryote,Thermus thermophilus,MPRYLKGWLKDVVQLSLRRPSFRASRQRPIISLNERILEFNKRNITAIIAEYKRKSPSGLDVERDPIEYSKFMERYAVGLSILTEEKYFNGSYETLRKIASSVSIPILMKDFIVKESQIDDAYNLGADTVLLIVKILTERELESLLEYARSYGMEPLIEINDENDLDIALRIGARFIGINSRDLETLEINKENQRKLISMIPSNVVKVAESGISERNEIEELRKLGVNAFLIGSSLMRNPEKIKEFIL,248,False,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,44-235,TIM Barrell (S. solfataricus),fitness,Growth,TRPC_SACS2_full_11-26-2021_b07.a2m,1,248,248,0.7,0.2,52935,0.944,234.0,10651.1,45.51752137,medium,364.0,1.555555556,TRPC_SACS2_Chan_2017.csv,fitness,1,mutant,TRPC_SACS2_theta_0.2.npy,TRPC_SACS2.pdb,0.1,,OrganismalFitness -TRPC_THEMA_Chan_2017,TRPC_THEMA_Chan_2017.csv,TRPC_THEMA,Prokaryote,Thermus thermophilus,MRRLWEIVEAKKKDILEIDGENLIVQRRNHRFLEVLSGKERVKIIAEFKKASPSAGDINADASLEDFIRMYDELADAISILTEKHYFKGDPAFVRAARNLTSRPILAKDFYIDTVQVKLASSVGADAILIIARILTAEQIKEIYEAAEELGMDSLVEVHSREDLEKVFSVIRPKIIGINTRDLDTFEIKKNVLWELLPLVPDDTVVVAESGIKDPRELKDLRGKVNAVLVGTSIMKAENPRRFLEEMRAWSE,252,False,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,40-233,TIM Barrell (T. maritima),fitness,Growth,TRPC_THEMA_full_11-26-2021_b07.a2m,1,252,252,0.7,0.2,52988,0.948,239.0,10582.5,44.27824268,medium,380.0,1.589958159,TRPC_THEMA_Chan_2017.csv,fitness,1,mutant,TRPC_THEMA_theta_0.2.npy,TRPC_THEMA.pdb,0.1,,OrganismalFitness -UBC9_HUMAN_Weile_2017,UBC9_HUMAN_Weile_2017.csv,UBC9_HUMAN,Human,Homo sapiens,MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPSY,159,False,2563,2563,0,0.384407289,median,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,1-158,SUMO-conjugating enzyme UBC9,Yeast growth,complementation,UBC9_HUMAN_full_11-26-2021_b03.a2m,1,159,159,0.3,0.2,69788,0.849,135.0,8394.0,62.17777778,medium,89.0,0.6592592593,UBC9_HUMAN_Weile_2017.csv,screenscore,1,mutant,UBC9_HUMAN_theta_0.2.npy,UBC9_HUMAN.pdb,0.1,,OrganismalFitness -UBE4B_HUMAN_Tsuboyama_2023_3L1X,UBE4B_HUMAN_Tsuboyama_2023_3L1X.csv,UBE4B_HUMAN,Human,Homo sapiens,DAPDEFRDPLMDTLMTDPVRLPSGTIMDRSIILRHLLNSPTDPFNRQTLTESMLEPVPELKEQIQAWMR,69,True,3622,1118,2504,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-63,1-69,Ubiquitin conjugation factor E4 B,Stability,cDNA display proteolysis,UBE4B_HUMAN_2023-08-07_b04.a2m,1,69,69,0.4,0.2,310943,0.928,64.0,34185.4,534.146875,High,52.0,0.8125,Tsuboyama2023_Dataset2_Dataset60,ddG_ML_float,1,mut_type,UBE4B_HUMAN_theta0.2_2023-08-07_b04.npy,UBE4B_HUMAN.pdb,1.0,,Stability -UBE4B_MOUSE_Starita_2013,UBE4B_MOUSE_Starita_2013.csv,UBE4B_MOUSE,Eukaryote,Mus musculus,MEELSADEIRRRRLARLAGGQTSQPTTPLTSPQRENPPGPPIAASAPGPSQSLGLNVHNMTPATSPIGAAGVAHRSQSSEGVSSLSSSPSNSLETQSQSLSRSQSMDIDGVSCEKSMSQVDVDSGIENMEVDENDRREKRSLSDKEPSSGPEVSEEQALQLVCKIFRVSWKDRDRDVIFLSSLSAQFKQNPKEVFSDFKDLIGQILMEVLMMSTQTRDENPFASLTATSQPIATAARSPDRNLMLNTGSSSGTSPMFCNMGSFSTSSLSSLGASGGASNWDSYSDHFTIETCKETDMLNYLIECFDRVGIEEKKAPKMCSQPAVSQLLSNIRSQCISHTALVLQGSLTQPRSLQQPSFLVPYMLCRNLPYGFIQELVRTTHQDEEVFKQIFIPILQGLALAAKECSLESDYFKYPLMALGELCETKFGKTHPMCNLVASLPLWLPKSLSPGSGRELQRLSYLGAFFSFSVFAEDDAKVVEKYFSGPAITLENTRVVSQSLQHYLELGRQELFKILHSILLNGETREAALSYMAALVNANMKKAQMQADDRLVSTDGFMLNLLWVLQQLSTKIKLETVDPTYIFHPRCRITLPNDETRINATMEDVNERLTELYGDQPPFSEPKFPTECFFLTLHAHHLSILPSCRRYIRRLRAIRELNRTVEDLKNNESQWKDSPLATRHREMLKRCKTQLKKLVRCKACADAGLLDESFLRRCLNFYGLLIQLMLRILDPAYPDVTLPLNSEVPKVFAALPEFYVEDVAEFLFFIVQYSPQVLYEPCTQDIVMFLVVMLCNQNYIRNPYLVAKLVEVMFMTNPSVQPRTQKFFEMIENHPLSTKLLVPSLMKFYTDVEHTGATSEFYDKFTIRYHISTIFKSLWQNIAHHGTFMEEFNSGKQFVRYINMLINDTTFLLDESLESLKRIHEVQEEMKNKEQWDQLPRDQQQARQSQLAQDERVSRSYLALATETVDMFHLLTKQVQKPFLRPELGPRLAAMLNFNLQQLCGPKCRDLKVENPEKYGFEPKKLLDQLTDIYLQLDCARFAKAIADDQRSYSKELFEEVISKMRKAGIKSTIAIEKFKLLAEKVEEIVAKNARAEIDYSDAPDEFRDPLMDTLMTDPVRLPSGTVMDRSIILRHLLNSPTDPFNRQMLTESMLEPVPELKEQIQAWMREKQSSDH,1173,False,899,899,0,-1.8,manual,Starita,Activity-enhancing mutations in an E3 ubiquitin ligase identified by high-throughput mutagenesis,2013,10.1073/pnas.1303309110,1072-1173,Ube4b,Ligase activity (phage display),Auto-ubiquitination,UBE4B_MOUSE_full_11-26-2021_b05.a2m,1,1173,1173,0.5,0.2,4743,0.765,897.0,679.4,0.7574136009,low,49.0,0.05462653289,UBE4B_MOUSE_Starita_2013.csv,log2_ratio,1,mutant,UBE4B_MOUSE_theta_0.2.npy,UBE4B_MOUSE.pdb,0.1,,Activity -UBR5_HUMAN_Tsuboyama_2023_1I2T,UBR5_HUMAN_Tsuboyama_2023_1I2T.csv,UBR5_HUMAN,Human,Homo sapiens,HRQALGERLYPRVQAMQPAFASKITGMLLELSPAQLLLLLASEDSLRARVDEAMELII,58,True,1453,1094,359,-0.4460165437,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-64,1-58,E3 ubiquitin-protein ligase UBR5,Stability,cDNA display proteolysis,UBR5_HUMAN_2023-08-07_b05.a2m,1,58,58,0.5,0.2,17888,0.966,56.0,1031.7,18.42321429,Medium,14.0,0.25,Tsuboyama2023_Dataset2_Dataset61,ddG_ML_float,1,mut_type,UBR5_HUMAN_theta0.2_2023-08-07_b05.npy,UBR5_HUMAN.pdb,1.0,,Stability -VG08_BPP22_Tsuboyama_2023_2GP8,VG08_BPP22_Tsuboyama_2023_2GP8.csv,VG08_BPP22,Virus,Salmonella phage P22,ITGDVSAANKDAIRKQMDAAASKGDVETYRKLKAKLKGIR,40,False,723,723,0,-0.2013306011,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-65,1-40,Scaffolding protein,Stability,cDNA display proteolysis,VG08_BPP22_2023-08-07_b05.a2m,1,40,40,0.5,0.01,102464,0.875,35.0,12963.6,370.3885714,High,13.0,0.3714285714,Tsuboyama2023_Dataset2_Dataset62,ddG_ML_float,1,mut_type,VG08_BPP22_theta0.01_2023-08-07_b05.npy,VG08_BPP22.pdb,1.0,,Stability -VILI_CHICK_Tsuboyama_2023_1YU5,VILI_CHICK_Tsuboyama_2023_1YU5.csv,VILI_CHICK,Eukaryote,Gallus gallus,KLETFPLDVLVNTAAEDLPRGVDPSRKENHLSDEDFKAVFGMTRSAFANLPLWKQQNLKKEKGLF,65,True,2568,1202,1366,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-66,1-65,Villin-1,Stability,cDNA display proteolysis,VILI_CHICK_2023-08-07_b01.a2m,1,65,65,0.1,0.2,254210,0.769,50.0,46507.8,930.156,High,19.0,0.38,Tsuboyama2023_Dataset2_Dataset63,ddG_ML_float,1,mut_type,VILI_CHICK_theta0.2_2023-08-07_b01.npy,VILI_CHICK.pdb,1.0,,Stability -VKOR1_HUMAN_Chiasson_2020_abundance,VKOR1_HUMAN_Chiasson_2020_abundance.csv,VKOR1_HUMAN,Human,Homo sapiens,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,163,False,2695,2695,0,0.7480893367,median,Chiasson,"Multiplexed measurement of variant abundance and activity reveals VKOR topology, active site and human variant impact",2020,10.7554/eLife.58026,2-163,VKORC1,protein abundance (eGFP fusion reporter),Fluorescence,VKOR1_HUMAN_full_11-26-2021_b03.a2m,1,163,163,0.3,0.2,14510,0.779,127.0,4655.0,36.65354331,medium,97.0,0.7637795276,VKOR1_HUMAN_Chiasson_2020.csv,abundance_score,1,variant,VKOR1_HUMAN_theta_0.2.npy,VKOR1_HUMAN.pdb,0.1,,Expression -VKOR1_HUMAN_Chiasson_2020_activity,VKOR1_HUMAN_Chiasson_2020_activity.csv,VKOR1_HUMAN,Human,Homo sapiens,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,163,False,697,697,0,0.7,manual,Chiasson,"Multiplexed measurement of variant abundance and activity reveals VKOR topology, active site and human variant impact",2020,10.7554/eLife.58026,3-163,VKORC1,carboxylation activity (carboxylation reporter on cell surface),enzymatic activity,VKOR1_HUMAN_full_11-26-2021_b03.a2m,1,163,163,0.3,0.2,14510,0.779,127.0,4655.0,36.65354331,medium,97.0,0.7637795276,VKOR1_HUMAN_Chiasson_2020.csv,activity_score,1,variant,VKOR1_HUMAN_theta_0.2.npy,VKOR1_HUMAN.pdb,0.1,,Activity -VRPI_BPT7_Tsuboyama_2023_2WNM,VRPI_BPT7_Tsuboyama_2023_2WNM.csv,VRPI_BPT7,Virus,Escherichia phage,SLSVDNKKFWATVESSEHSFEVPIYAETLDEALELAEWQYVPAGFEVTRVRPCVAP,56,False,1047,1047,0,-1.1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-67,1-56,Bacterial RNA polymerase inhibitor,Stability,cDNA display proteolysis,VRPI_BPT7_2023-08-07_b02.a2m,1,56,56,0.2,0.01,6266,0.875,49.0,1555.8,31.75102041,Medium,3.0,0.0612244898,Tsuboyama2023_Dataset2_Dataset64,ddG_ML_float,1,mut_type,VRPI_BPT7_theta0.01_2023-08-07_b02.npy,VRPI_BPT7.pdb,1.0,,Stability -YAIA_ECOLI_Tsuboyama_2023_2KVT,YAIA_ECOLI_Tsuboyama_2023_2KVT.csv,YAIA_ECOLI,Prokaryote,Escherichia coli,PREAYIVTIEKGKPGQTVTWYQLRADHPKPDSLISEHPTAQEAMDAKKRYED,52,True,1890,928,962,-1.953132017,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-68,1-52,Uncharacterized protein YaiA,Stability,cDNA display proteolysis,YAIA_ECOLI_2023-08-07_b03.a2m,1,52,52,0.3,0.2,5877,0.788,41.0,737.2,17.9804878,Medium,5.0,0.1219512195,Tsuboyama2023_Dataset2_Dataset65,ddG_ML_float,1,mut_type,YAIA_ECOLI_theta0.2_2023-08-07_b03.npy,YAIA_ECOLI.pdb,1.0,,Stability -YAP1_HUMAN_Araya_2012,YAP1_HUMAN_Araya_2012.csv,YAP1_HUMAN,Human,Homo sapiens,MDPGQQPPPQPAPQGQGQPPSQPPQGQGPPSGPGQPAPAATQAAPQAPPAGHQIVHVRGDSETDLEALFNAVMNPKTANVPQTVPMRLRKLPDSFFKPPEPKSHSRQASTDAGTAGALTPQHVRAHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLRQSSFEIPDDVPLPAGWEMAKTSSGQRYFLNHIDQTTTWQDPRKAMLSQMNVTAPTSPPVQQNMMNSASGPLPDGWEQAMTQDGEIYYINHKNKTTSWLDPRLDPRFAMNQRISQSAPVKQPPPLAPQSPQGGVMGGSNSNQQQQMRLQQLQMEKERLRLKQQELLRQAMRNINPSTANSPKCQELALRSQLPTLEQDGGTQNPVSSPGMSQELRTMTTNSSDPFLNSGTYHSRDESTDSGLSMSSYSVPRTPDDFLNSVDEMDTGDTINQSTLPSQQNRFPDYLEAIPGTNVDLGTLEGDGMNIEGEELMPSLQEALSSDILNDMESVLAATKLDKESFLTWL,504,True,10075,362,9713,0.6236402571,median,Araya,"A fundamental protein property, thermodynamic stability, revealed solely from large-scale measurements of protein function",2012,10.1073/pnas.1209751109,170-203,YAP1,peptide binding,Binding,YAP1_HUMAN_full_11-26-2021_b02.a2m,1,504,504,0.2,0.2,1604,0.859,433.0,132.6,0.3062355658,low,1.0,0.002309468822,YAP1_HUMAN_Araya_2012.csv,W,1,mutant,YAP1_HUMAN_theta_0.2.npy,YAP1_HUMAN.pdb,0.1,,Binding -YNZC_BACSU_Tsuboyama_2023_2JVD,YNZC_BACSU_Tsuboyama_2023_2JVD.csv,YNZC_BACSU,Prokaryote,Bacillus subtilis,MISNAKIARINELAAKAKAGVITEEEKAEQQKLRQEYLK,39,True,2300,714,1586,-1.0,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-69,1-39,UPF0291 protein YnzC,Stability,cDNA display proteolysis,YNZC_BACSU_2023-08-07_b07.a2m,1,39,39,0.7,0.2,7116,0.974,38.0,1588.3,41.79736842,Medium,13.0,0.3421052632,Tsuboyama2023_Dataset2_Dataset66,ddG_ML_float,1,mut_type,YNZC_BACSU_theta0.2_2023-08-07_b07.npy,YNZC_BACSU.pdb,1.0,,Stability +DMS_id,DMS_filename,UniProt_ID,taxon,source_organism,target_seq,seq_len,includes_multiple_mutants,DMS_total_number_mutants,DMS_number_single_mutants,DMS_number_multiple_mutants,DMS_binarization_cutoff,DMS_binarization_method,first_author,title,year,jo,region_mutated,molecule_name,selection_assay,selection_type,MSA_filename,MSA_start,MSA_end,MSA_len,MSA_bitscore,MSA_theta,MSA_num_seqs,MSA_perc_cov,MSA_num_cov,MSA_N_eff,MSA_Neff_L,MSA_Neff_L_category,MSA_num_significant,MSA_num_significant_L,raw_DMS_filename,raw_DMS_phenotype_name,raw_DMS_directionality,raw_DMS_mutant_column,weight_file_name,pdb_file,pdb_range,ProteinGym_version,raw_mut_offset,coarse_selection_type +A0A140D2T1_ZIKV_Sourisseau_2019,A0A140D2T1_ZIKV_Sourisseau_2019.csv,A0A140D2T1_ZIKV,Virus,Zika virus,MKNPKKKSGGFRIVNMLKRGVARVNPLGGLKRLPAGLLLGHGPIRMVLAILAFLRFTAIKPSLGLINRWGSVGKKEAMEIIKKFKKDLAAMLRIINARKERKRRGADTSIGIIGLLLTTAMAAEITRRGSAYYMYLDRSDAGKAISFATTLGVNKCHVQIMDLGHMCDATMSYECPMLDEGVEPDDVDCWCNTTSTWVVYGTCHHKKGEARRSRRAVTLPSHSTRKLQTRSQTWLESREYTKHLIKVENWIFRNPGFALVAVAIAWLLGSSTSQKVIYLVMILLIAPAYSIRCIGVSNRDFVEGMSGGTWVDVVLEHGGCVTVMAQDKPTVDIELVTTTVSNMAEVRSYCYEASISDMASDSRCPTQGEAYLDKQSDTQYVCKRTLVDRGWGNGCGLFGKGSLVTCAKFTCSKKMTGKSIQPENLEYRIMLSVHGSQHSGMIVNDTGYETDENRAKVEVTPNSPRAEATLGGFGSLGLDCEPRTGLDFSDLYYLTMNNKHWLVHKEWFHDIPLPWHAGADTGTPHWNNKEALVEFKDAHAKRQTVVVLGSQEGAVHTALAGALEAEMDGAKGKLFSGHLKCRLKMDKLRLKGVSYSLCTAAFTFTKVPAETLHGTVTVEVQYAGTDGPCKIPVQMAVDMQTLTPVGRLITANPVITESTENSKMMLELDPPFGDSYIVIGVGDKKITHHWHRSGSTIGKAFEATVRGAKRMAVLGDTAWDFGSVGGVFNSLGKGIHQIFGAAFKSLFGGMSWFSQILIGTLLVWLGLNTKNGSISLTCLALGGVMIFLSTAVSADVGCSVDFSKKETRCGTGVFIYNDVEAWRDRYKYHPDSPRRLAAAVKQAWEEGICGISSVSRMENIMWKSVEGELNAILEENGVQLTVVVGSVKNPMWRGPQRLPVPVNELPHGWKAWGKSYFVRAAKTNNSFVVDGDTLKECPLEHRAWNSFLVEDHGFGVFHTSVWLKVREDYSLECDPAVIGTAVKGREAAHSDLGYWIESEKNDTWRLKRAHLIEMKTCEWPKSHTLWTDGVEESDLIIPKSLAGPLSHHNTREGYRTQVKGPWHSEELEIRFEECPGTKVYVEETCGTRGPSLRSTTASGRVIEEWCCRECTMPPLSFRAKDGCWYGMEIRPRKEPESNLVRSMVTAGSTDHMDHFSLGVLVILLMVQEGLKKRMTTKIIMSTSMAVLVVMILGGFSMSDLAKLVILMGATFAEMNTGGDVAHLALVAAFKVRPALLVSFIFRANWTPRESMLLALASCLLQTAISALEGDLMVLINGFALAWLAIRAMAVPRTDNIALPILAALTPLARGTLLVAWRAGLATCGGIMLLSLKGKGSVKKNLPFVMALGLTAVRVVDPINVVGLLLLTRSGKRSWPPSEVLTAVGLICALAGGFAKADIEMAGPMAAVGLLIVSYVVSGKSVDMYIERAGDITWEKDAEVTGNSPRLDVALDESGDFSLVEEDGPPMREIILKVVLMAICGMNPIAIPFAAGAWYVYVKTGKRSGALWDVPAPKEVKKGETTDGVYRVMTRRLLGSTQVGVGVMQEGVFHTMWHVTKGAALRSGEGRLDPYWGDVKQDLVSYCGPWKLDAAWDGLSEVQLLAVPPGERARNIQTLPGIFKTKDGDIGAVALDYPAGTSGSPILDKCGRVIGLYGNGVVIKNGSYVSAITQGKREEETPVECFEPSMLKKKQLTVLDLHPGAGKTRRVLPEIVREAIKKRLRTVILAPTRVVAAEMEEALRGLPVRYMTTAVNVTHSGTEIVDLMCHATFTSRLLQPIRVPNYNLYIMDEAHFTDPSSIAARGYISTRVEMGEAAAIFMTATPPGTRDAFPDSNSPIMDTEVEVPERAWSSGFDWVTDHSGKTVWFVPSVRNGNEIAACLTKAGKRVIQLSRKTFETEFQKTKNQEWDFVITTDISEMGANFKADRVIDSRRCLKPVILDGERVILAGPMPVTHASAAQRRGRIGRNPNKPGDEYMYGGGCAETDEGHAHWLEARMLLDNIYLQDGLIASLYRPEADKVAAIEGEFKLRTEQRKTFVELMKRGDLPVWLAYQVASAGITYTDRRWCFDGTTNNTIMEDSVPAEVWTKYGEKRVLKPRWMDARVCSDHAALKSFKEFAAGKRGAALGVMEALGTLPGHMTERFQEAIDNLAVLMRAETGSRPYKAAAAQLPETLETIMLLGLLGTVSLGIFFVLMRNKGIGKMGFGMVTLGASAWLMWLSEIEPARIACVLIVVFLLLVVLIPEPEKQRSPQDNQMAIIIMVAVGLLGLITANELGWLERTKNDIAHLMGRREEGATMGFSMDIDLRPASAWAIYAALTTLITPAVQHAVTTSYNNYSLMAMATQAGVLFGMGKGMPFYAWDLGVPLLMMGCYSQLTPLTLIVAIILLVAHYMYLIPGLQAAAARAAQKRTAAGIMKNPVVDGIVVTDIDTMTIDPQVEKKMGQVLLIAVAISSAVLLRTAWGWGEAGALITAATSTLWEGSPNKYWNSSTATSLCNIFRGSYLAGASLIYTVTRNAGLVKRRGGGTGETLGEKWKARLNQMSALEFYSYKKSGITEVCREEARRALKDGVATGGHAVSRGSAKLRWLVERGYLQPYGKVVDLGCGRGGWSYYAATIRKVQEVRGYTKGGPGHEEPMLVQSYGWNIVRLKSGVDVFHMAAEPCDTLLCDIGESSSSPEVEETRTLRVLSMVGDWLEKRPGAFCIKVLCPYTSTMMETMERLQRRHGGGLVRVPLSRNSTHEMYWVSGAKSNIIKSVSTTSQLLLGRMDGPRRPVKYEEDVNLGSGTRAVASCAEAPNMKIIGRRIERIRNEHAETWFLDENHPYRTWAYHGSYEAPTQGSASSLVNGVVRLLSKPWDVVTGVTGIAMTDTTPYGQQRVFKEKVDTRVPDPQEGTRQVMNIVSSWLWKELGKRKRPRVCTKEEFINKVRSNAALGAIFEEEKEWKTAVEAVNDPRFWALVDREREHHLRGECHSCVYNMMGKREKKQGEFGKAKGSRAIWYMWLGARFLEFEALGFLNEDHWMGRENSGGGVEGLGLQRLGYILEEMNRAPGGKMYADDTAGWDTRISKFDLENEALITNQMEEGHRTLALAVIKYTYQNKVVKVLRPAEGGKTVMDIISRQDQRGSGQVVTYALNTFTNLVVQLIRNMEAEEVLEMQDLWLLRKPEKVTRWLQSNGWDRLKRMAVSGDDCVVKPIDDRFAHALRFLNDMGKVRKDTQEWKPSTGWSNWEEVPFCSHHFNKLYLKDGRSIVVPCRHQDELIGRARVSPGAGWSIRETACLAKSYAQMWQLLYFHRRDLRLMANAICSAVPVDWVPTGRTTWSIHGKGEWMTTEDMLMVWNRVWIEENDHMEDKTPVTKWTDIPYLGKREDLWCGSLIGHRPRTTWAENIKDTVNMVRRIIGDEEKYMDYLSTQVRYLGEEGSTPGVL,3423,FALSE,9576,9576,0,0.04324892146,median,Sourisseau,Deep Mutational Scanning Comprehensively Maps How Zika Envelope Protein Mutations Affect Viral Growth and Antibody Escape,2019,10.1128/JVI.01291-19,291-794,Zika virus env,Viral replication,Growth,A0A140D2T1_ZIKV_theta0.99_281-804_11-26-2021_b02.a2m,281,804,524,0.2,0.01,16501,0.948,497,1357.9,2.732193159,medium,329,0.661971831,A0A140D2T1_ZIKV_Sourisseau_growth_2019.csv,effect,1,mutant,A0A140D2T1_ZIKV_theta_0.01.npy,A0A140D2T1_ZIKV.pdb,291-794,0.1,,OrganismalFitness +A0A192B1T2_9HIV1_Haddox_2018,A0A192B1T2_9HIV1_Haddox_2018.csv,A0A192B1T2_9HIV1,Virus,HIV,MRVKGIQMNSQHLLRWGIMILGMIMICSVAGNLWVTVYYGVPVWKDAETTLFCASDAKAYDAEVHNIWATHACVPTDPNPQEINLENVTEEFNMWKNNMVEQMHTDIISLWDQGLKPCVKLTPLCVTLDCHNVTYNITSDMKEEITNCSYNVTTVIRDKKQKVSSLFYKLDVVQIGGNNRTNSQYRLINCNTSAITQACPKVTFEPIPIHYCAPAGFAILKCKDEKFNGTGLCKNVSTVQCTHGIKPVVSTQLLLNGSLAEGEVRIRSENITNNAKNIIVQLASPVTINCIRPNNNTRKSVHLGPGQAFYATDGIIGEIRQAHCNVSKKEWNSTLQKVANQLRPYFKNNTIIKFANSSGGDLEITTHSFNCGGEFFYCNTSGLFNSTWEFNSTWNNSNSTENITLQCRIKQIINMWQRAGQAIYAPPIPGVIRCKSNITGLILTRDGGSNKNTSETFRPGGGDMRDNWRSELYKYKVVKIEPIGVAPTRAKRRVVEREKRAVGIGAVFIGFLGAAGSTMGAASVTLTVQARQLLSGIVQQQSNLLRAIEAQQHLLKLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTNVPWNSSWSNKSQDEIWGNMTWLQWDKEVSNYTQIIYTLIEESQNQQEKNEQDLLALDKWASLWNWFNISQWLWYIKIFIIIVGGLIGLRIVFAVLSVINRVRQGYSPLSFQTRTPNPGELDRPGRIEEEGGEQDRGRSIRLVSGFLALAWDDLRSLCLFSYHRLRDFILIATRTVELLGHSSLKGLRLGWESLKYLGNLLVYWGRELKISAINLCDTIAIAVAGWTDRVIELGQRLCRAILHIPRRIRQGFERALL,852,FALSE,12577,12577,0,-2.2,manual,Haddox,Mapping mutational effects along the evolutionary landscape of HIV envelope,2018,10.7554/eLife.34420,30-691,HIV env (BF520),Viral replication,Growth,A0A192B1T2_9HIV1_theta0.99_full_11-26-2021_b09.a2m,1,852,852,0.9,0.01,74854,0.986,840,36319.9,43.23797619,medium,2382,2.835714286,A0A192B1T2_9HIV1_Haddox_2018.csv,fitness,1,mutant,A0A192B1T2_9HIV1_theta_0.01.npy,A0A192B1T2_9HIV1.pdb,1-852,0.1,,OrganismalFitness +A0A1I9GEU1_NEIME_Kennouche_2019,A0A1I9GEU1_NEIME_Kennouche_2019.csv,A0A1I9GEU1_NEIME,Prokaryote,Neisseria meningitidis,FTLIELMIVIAIVGILAAVALPAYQDYTARAQVSEAILLAEGQKSAVTEYYLNHGEWPGDNSSAGVATSADIKGKYVQSVTVANGVITAQMASSNVNNEIKSKKLSLWAKRQNGSVKWFCGQPVTRTTATATDVAAANGKTDDKINTKHLPSTCRDDSSAS,161,FALSE,922,922,0,0.141,median,Kennouche,Deep mutational scanning of the Neisseria meningitidis major pilin reveals the importance of pilus tip-mediated adhesion,2019,10.15252/embj.2019102145,1-161,pilin (PilE),"piliation (20D9 anti-pilus monoclonal Ab), aggregation, adhesion (human umbilical vein endothelial cells (HUVECs))",,A0A1I9GEU1_NEIME_full_11-26-2021_b08.a2m,1,161,161,0.8,0.2,5553,0.857,138,2183.6,15.82318841,medium,72,0.5217391304,A0A1I9GEU1_NEIME_Kennouche_2019.csv,piliation_log2_ratio,1,mutants,A0A1I9GEU1_NEIME_theta_0.2.npy,A0A1I9GEU1_NEIME.pdb,1-161,0.1,,Activity +A0A247D711_LISMN_Stadelmann_2021,A0A247D711_LISMN_Stadelmann_2021.csv,A0A247D711_LISMN,Eukaryote,Listeria monocytogenes,MNINDLIREIKNKDYTVKLSGTDSNSITQLIIRVNNDGNEYVISESENESIVEKFISAFKNGWNQEYEDEEEFYNDMQTITLKSELN,87,FALSE,1653,1653,0,-0.0155627327,median,Stadelmann,A deep mutational scanning platform to characterize the fitness landscape of anti-CRISPR proteins,2021,10.1101/2021.08.21.457204,1-87,Anti-CRISPR protein AcrIIA4,activity against SpyCas9 inducing an RFP reporter,Flow cytometry,A0A247D711_LISMN_full_b0.3.a2m,1,87,87,0.2,0.2,1316890,1,87,188739.9,2169.424138,High,209,2.402298851,A0A247D711_LISMN_Stadelmann_2021.csv,mean_prediction,1,mutant,A0A247D711_LISMN_b03_theta_0.2.npy,A0A247D711_LISMN.pdb,1-87,1,,Activity +A0A2Z5U3Z0_9INFA_Doud_2016,A0A2Z5U3Z0_9INFA_Doud_2016.csv,A0A2Z5U3Z0_9INFA,Virus,influenza H1N1,MKAKLLVLLYAFVATDADTICIGYHANNSTDTVDTILEKNVAVTHSVNLLEDSHNGKLCKLKGIAPLQLGKCNITGWLLGNPECDSLLPARSWSYIVETPNSENGACYPGDLIDYEELREQLSSVSSLERFEIFPKESSWPNHTFNGVTVSCSHRGKSSFYRNLLWLTKKGDSYPKLTNSYVNNKGKEVLVLWGVHHPSSSDEQQSLYSNGNAYVSVASSNYNRRFTPEIAARPKVRDQHGRMNYYWTLLEPGDTIIFEATGNLIAPWYAFALSRGFESGIITSNASMHECNTKCQTPQGAINSNLPFQNIHPVTIGECPKYVRSTKLRMVTGLRNIPSIQYRGLFGAIAGFIEGGWTGMIDGWYGYHHQNEQGSGYAADQKSTQNAINGITNKVNSVIEKMNTQFTAVGKEFNNLEKRMENLNKKVDDGFLDIWTYNAELLVLLENERTLDFHDLNVKNLYEKVKSQLKNNAKEIGNGCFEFYHKCDNECMESVRNGTYDYPKYSEESKLNREKIDGVKLESMGVYQILAIYSTVASSLVLLVSLGAISFWMCSNGSLQCRICI,565,FALSE,10715,10715,0,-2.239942981,median,Doud,Accurate Measurement of the Effects of All Amino-Acid Mutations on Influenza Hemagglutinin,2016,10.3390/v8060155,2-565,Influenza hemagglutinin,viral replication,Growth,A0A2Z5U3Z0_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,565,565,0.9,0.01,57581,0.968,547,9809.4,17.93308958,medium,925,1.691042048,A0A2Z5U3Z0_9INFA_Doud_2016.csv,transformed_pref,1,mutant,A0A2Z5U3Z0_9INFA_theta_0.01.npy,A0A2Z5U3Z0_9INFA.pdb,1-565,0.1,,OrganismalFitness +A0A2Z5U3Z0_9INFA_Wu_2014,A0A2Z5U3Z0_9INFA_Wu_2014.csv,A0A2Z5U3Z0_9INFA,Virus,Influenza A virus (A/WSN/1933(H1N1)),MKAKLLVLLYAFVATDADTICIGYHANNSTDTVDTILEKNVAVTHSVNLLEDSHNGKLCKLKGIAPLQLGKCNITGWLLGNPECDSLLPARSWSYIVETPNSENGACYPGDLIDYEELREQLSSVSSLERFEIFPKESSWPNHTFNGVTVSCSHRGKSSFYRNLLWLTKKGDSYPKLTNSYVNNKGKEVLVLWGVHHPSSSDEQQSLYSNGNAYVSVASSNYNRRFTPEIAARPKVRDQHGRMNYYWTLLEPGDTIIFEATGNLIAPWYAFALSRGFESGIITSNASMHECNTKCQTPQGAINSNLPFQNIHPVTIGECPKYVRSTKLRMVTGLRNIPSIQYRGLFGAIAGFIEGGWTGMIDGWYGYHHQNEQGSGYAADQKSTQNAINGITNKVNSVIEKMNTQFTAVGKEFNNLEKRMENLNKKVDDGFLDIWTYNAELLVLLENERTLDFHDLNVKNLYEKVKSQLKNNAKEIGNGCFEFYHKCDNECMESVRNGTYDYPKYSEESKLNREKIDGVKLESMGVYQILAIYSTVASSLVLLVSLGAISFWMCSNGSLQCRICI,565,FALSE,2350,2350,0,0.0947955855,median,Wu,High-throughput profiling of influenza A virus hemagglutinin gene at single-nucleotide resolution,2014,10.1038/srep04942,6-560,Influenza hemagglutinin,Viral replication,Growth,A0A2Z5U3Z0_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,565,565,0.9,0.01,57581,0.968,547,9809.4,17.93308958,medium,925,1.691042048,A0A2Z5U3Z0_9INFA_Wu_2014.csv,RF Index,1,mutant,A0A2Z5U3Z0_9INFA_theta_0.01.npy,A0A2Z5U3Z0_9INFA.pdb,1-565,0.1,,OrganismalFitness +A4_HUMAN_Seuma_2022,A4_HUMAN_Seuma_2022.csv,A4_HUMAN,Human,Homo sapiens,MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMNVQNGKWDSDPSGTKTCIDTKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPHFVIPYRCLVGEFVSDALLVPDKCKFLHQERMDVCETHLHWHTVAKETCSEKSTNLHDYGMLLPCGIDKFRGVEFVCCPLAEESDNVDSADAEEDDSDVWWGGADTDYADGSEDKVVEVAEEEEVAEVEEEEADDDEDDEDGDEVEEEAEEPYEEATERTTSIATTTTTTTESVEEVVREVCSEQAETGPCRAMISRWYFDVTEGKCAPFFYGGCGGNRNNFDTEEYCMAVCGSAMSQSLLKTTQEPLARDPVKLPTTAASTPDAVDKYLETPGDENEHAHFQKAKERLEAKHRERMSQVMREWEEAERQAKNLPKADKKAVIQHFQEKVESLEQEAANERQQLVETHMARVEAMLNDRRRLALENYITALQAVPPRPRHVFNMLKKYVRAEQKDRQHTLKHFEHVRMVDPKKAAQIRSQVMTHLRVIYERMNQSLSLLYNVPAVAEEIQDEVDELLQKEQNYSDDVLANMISEPRISYGNDALMPSLTETKTTVELLPVNGEFSLDDLQPWHSFGADSVPANTENEVEPVDARPAADRGLTTRPGSGLTNIKTEEISEVKMDAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIATVIVITLVMLKKKQYTSIHHGVVEVDAAVTPEERHLSKMQQNGYENPTYKFFEQMQN,770,TRUE,14811,796,14015,-2,manual,Seuma,"An atlas of amyloid aggregation: the impact of substitutions, insertions, deletions and truncations on amyloid beta fibril nucleation",2022,10.1038/s41467-022-34742-3,672-713,APP,aggregation,survival assessment assay,A4_HUMAN_2023-08-07_b01.a2m,1,770,770,0.1,0.2,5272,0.987,760,99.3,0.1306578947,Low,0,0,MS_BL_BB_indels_processed_data.tsv,nscore,1,mutant,A4_HUMAN_theta0.2_2023-08-07_b01.npy,A4_HUMAN.pdb,1-770,1,,Stability +A4D664_9INFA_Soh_2019,A4D664_9INFA_Soh_2019.csv,A4D664_9INFA,Virus,Influenza A virus,MERIKELRDLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPALRMKWMMAMKYPITADKRIMEMIPERNEQGQTLWSKTNDAGSDRVMVSPLAVTWWNRNGPTTSTVHYPKVYKTYFEKVERLKHGTFGPVHFRNQVKIRRRVDINPGHADLSAKEAQDVIMEVVFPNEVGARILTSESQLTITREKKEELQDCKIAPLMVAYMLERELVRKTRFLPVAGGTSSVYIEVLHLTQGTCWEQMYTPGGEVRNDDVDQSLIIAARNIVRRATVSADPLASLLEMCHSTQIGGIRMVDILRQNPTEEQAVDICKAAMGLRISSSFSFGGFTFKRTSGSSVKREEEVLTGNLQTLKIRVHEGYEEFTMVGRRATAILRKATRRLIQLIVSGRDEQSIAEAIIVALVFSQEDCMIKAVRGDLNFVNRANQRLNPMHQLLRHFQKDAKVLFQNWGIEPIDNVMGMIGILPDMTPSTEMSLRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTEKLTITYSSSMMWEINGPESVLVNTYQWIIRNWETVKIQWSQDPTMLYNKMEFEPFQSLVPKAARGQYSGFVRTLFQQMRDVLGTFDTVQIIKLLPFAAAPPEQSRMQFSSLTVNVRGSGMRILVRGNSPVFNYNKATKRLTVLGKDAGALTEDPDEGTAGVESAVLRGFLILGKEDKRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN,759,FALSE,14421,14421,0,0.2170105627,median,Soh,Comprehensive mapping of adaptation of the avian influenza polymerase protein PB2 to humans,2019,10.7554/eLife.45079,1-759,Influenza polymerase basic protein 2,Viral replication (avian cells: CCL141 (duck)),Growth,A4D664_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,759,759,0.9,0.01,26683,1,759,1730.2,2.279578393,medium,3736,4.92226614,A4D664_9INFA_Soh_2019.csv,effectCCL141,1,mutant,A4D664_9INFA_theta_0.01.npy,A4D664_9INFA.pdb,1-759,0.1,,OrganismalFitness +A4GRB6_PSEAI_Chen_2020,A4GRB6_PSEAI_Chen_2020.csv,A4GRB6_PSEAI,Prokaryote,Pseudomonas aeruginosa,MFKLLSKLLVYLTASIMAIASPLAFSVDSSGEYPTVSEIPVGEVRLYQIADGVWSHIATQSFDGAVYPSNGLIVRDGDELLLIDTAWGAKNTAALLAEIEKQIGLPVTRAVSTHFHDDRVGGVDVLRAAGVATYASPSTRRLAEVEGNEIPTHSLEGLSSSGDAVRFGPVELFYPGAAHSTDNLIVYVPSASVLYGGCAIYELSRTSAGNVADADLAEWPTSIERIQQHYPEAQFVIPGHGLPGGLDLLKHTTNVVKAHTNRSVVE,266,FALSE,5004,5004,0,-2.1,manual,Chen,"Comprehensive exploration of the translocation, stability and substrate recognition requirements in VIM-2 lactamase",2020,10.7554/eLife.56707,1-266,Beta-lactamase VIM-2,"drug resistance (128/16/2.0 ug/mL ampicillin, 4.0/0.5 ug/mL cefotaxime, 0.031 ug/mL meropenem @ 25C, 37C)",Antibiotics resistance,A4GRB6_PSEAI_full_11-26-2021_b03.a2m,1,266,266,0.3,0.2,108496,0.726,193,31234.2,161.8352332,high,317,1.642487047,A4GRB6_PSEAI_Chen_2020.csv,0.031ug_mL_MEM_37C,1,mutant,A4GRB6_PSEAI_theta_0.2.npy,A4GRB6_PSEAI.pdb,1-266,0.1,,OrganismalFitness +AACC1_PSEAI_Dandage_2018,AACC1_PSEAI_Dandage_2018.csv,AACC1_PSEAI,Prokaryote,Pseudomonas aeruginosa,MLRSSNDVTQQGSRPKTKLGGSSMGIIRTCRLGPDQVKSMRAALDLFGREFGDVATYSQHQPDSDYLGNLLRSKTFIALAAFDQEAVVGALAAYVLPKFEQPRSEIYIYDLAVSGEHRRQGIATALINLLKHEANALGAYVIYVQADYGDDPAVALYTKLGIREEVMHFDIDPSTAT,177,FALSE,1801,1801,0,0.7172234411,median,Dandage,Differential strengths of molecular determinants guide environment specific mutational fates,2018,10.1371/journal.pgen.1007419,12-172,GMR (aacC1),"Antibiotic resistance under: heat/cold resistance (32C, 37C (ref), 42C), chemical stability (chemical chaperones TMAO, glycerol), antibiotic resistance (gentamicin), or combo",Antibiotics resistance,AACC1_PSEAI_full_04-29-2022_b03.a2m,1,177,177,0.3,0.2,539868,0.746,132,170256.3,1289.820455,high,235,1.78030303,AACC1_PSEAI_Dandage_2018.csv,30C,1,Mutation,AACC1_PSEAI_theta_0.2.npy,AACC1_PSEAI.pdb,1-177,0.1,,OrganismalFitness +ACE2_HUMAN_Chan_2020,ACE2_HUMAN_Chan_2020.csv,ACE2_HUMAN,Human,Homo sapiens,MSSSSWLLLSLVAVTAAQSTIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADQSIKVRISLKSALGDKAYEWNDNEMYLFRSSVAYAMRQYFLKVKNQMILFGEEDVRVANLKPRISFNFFVTAPKNVSDIIPRTEVEKAIRMSRSRINDAFRLNDNSLEFLGIQPTLGPPNQPPVSIWLIVFGVVMGVIVVGIVILIFTGIRDRKKKNKARSGENPYASIDISKGENNPGFQNTDDVQTSF,805,FALSE,2223,2223,0,-0.266564268,median,Chan,Engineering human ACE2 to optimize binding to the spike protein of SARS coronavirus 2,2020,10.1126/science.abc0870,19-518,ACE2,Binding affinity,Flow Cytometry Assay,ACE2_HUMAN_2023-10-12_b05.a2m,1,805,805,0.5,0.2,11106,0.743,598,1506.7,2.519565217,Medium,349,0.5836120401,,score,1,mutant,ACE2_HUMAN_theta0.2_2023-10-12_b05.npy,ACE2_HUMAN.pdb,1-805,1,,Binding +ADRB2_HUMAN_Jones_2020,ADRB2_HUMAN_Jones_2020.csv,ADRB2_HUMAN,Human,Homo sapiens,MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL,413,FALSE,7800,7800,0,1.859961867,median,Jones,Structural and Functional Characterization of G Protein-Coupled Receptors with Deep Mutational Scanning,2020,10.7554/eLife.54895,2-413,ADRB2,"transcription (luciferase reporter, isoproterenol (beta2AR agonist)-induced)",Receptor activity,ADRB2_HUMAN_full_11-26-2021_b03.a2m,1,413,413,0.3,0.2,204722,0.712,294,25459.6,86.59727891,medium,234,0.7959183673,ADRB2_HUMAN_Jones_2020.csv,0.625,1,mutant_id,ADRB2_HUMAN_theta_0.2.npy,ADRB2_HUMAN.pdb,1-413,0.1,,Activity +AICDA_HUMAN_Gajula_2014_3cycles,AICDA_HUMAN_Gajula_2014_3cycles.csv,AICDA_HUMAN,Human,Homo sapiens,MDSLLMNRRKFLYQFKNVRWAKGRRETYLCYVVKRRDSATSFSLDFGYLRNKNGCHVELLFLRYISDWDLDPGRCYRVTWFTSWSPCYDCARHVADFLRGNPNLSLRIFTARLYFCEDRKAEPEGLRRLHRAGVQIAIMTFKDYFYCWNTFVENHERTFKAWEGLHENSVRLSRQLRRILLPLYEVDDLRDAFRTLGL,198,FALSE,209,209,0,1,manual,Gajula,High-throughput mutagenesis reveals functional determinants for DNA targeting by activation-induced deaminase,2014,10.1093/nar/gku689,113-123,AID,Enzymatic activity,bulk RNA-sequencing,AICDA_HUMAN_2023-08-07_b01.a2m,1,198,198,0.1,0.2,18148,0.879,174,3340,19.1954023,Medium,101,0.5804597701,urn_mavedb_00000106-c-1_scores.csv,DMS_score,1,mutant,AICDA_HUMAN_theta0.2_2023-08-07_b01.npy,AICDA_HUMAN.pdb,1-198,1,,Activity +AMFR_HUMAN_Tsuboyama_2023_4G3O,AMFR_HUMAN_Tsuboyama_2023_4G3O.csv,AMFR_HUMAN,Human,Homo sapiens,YFQGQLNAMAHQIQEMFPQVPYHLVLQDLQLTRSVEITTDNILEGRI,47,TRUE,2972,820,2152,-1.504736022,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-6,1-47,E3 ubiquitin-protein ligase AMFR,Stability,cDNA display proteolysis,AMFR_HUMAN_2023-08-07_b04.a2m,1,47,47,0.4,0.2,17787,0.872,41,1166.9,28.46097561,Medium,12,0.2926829268,Tsuboyama2023_Dataset2_Dataset3,ddG_ML_float,1,mut_type,AMFR_HUMAN_theta0.2_2023-08-07_b04.npy,AMFR_HUMAN.pdb,1-47,1,,Stability +AMIE_PSEAE_Wrenbeck_2017,AMIE_PSEAE_Wrenbeck_2017.csv,AMIE_PSEAE,Prokaryote,Pseudomonas aeruginosa,MRHGDISSSNDTVGVAVVNYKMPRLHTAAEVLDNARKIAEMIVGMKQGLPGMDLVVFPEYSLQGIMYDPAEMMETAVAIPGEETEIFSRACRKANVWGVFSLTGERHEEHPRKAPYNTLVLIDNNGEIVQKYRKIIPWCPIEGWYPGGQTYVSEGPKGMKISLIICDDGNYPEIWRDCAMKGAELIVRCQGYMYPAKDQQVMMAKAMAWANNCYVAVANAAGFDGVYSYFGHSAIIGFDGRTLGECGEEEMGIQYAQLSLSQIRDARANDQSQNHLFKILHRGYSGLQASGDGDRGLAECPFEFYRTWVTDAEKARENVERLTRSTTGVAQCPVGRLPYEGLEKEA,346,FALSE,6227,6227,0,-0.2222,median,Wrenbeck,Single-mutation fitness landscapes for an enzyme on multiple substrates reveal specificity is globally encoded,2017,10.1038/ncomms15695,1-341,Aliphatic amidase,Enzyme function,Growth,AMIE_PSEAE_full_11-26-2021_b02.a2m,1,346,346,0.2,0.2,140703,0.725,251,29959.3,119.359761,high,557,2.219123506,AMIE_PSEAE_Wrenbeck_2017.csv,isobutyramide_normalized_fitness,1,mutant,AMIE_PSEAE_theta_0.2.npy,AMIE_PSEAE.pdb,1-346,0.1,,Activity +ANCSZ_Hobbs_2022,ANCSZ_Hobbs_2022.csv,ANCSZ,Eukaryote,reconstructed ancestor,MADSANHLPYFYGSITREEAEDYLKQGGMSDGLFLLRQSLNSLGGYVLSVVYDRQCHHYTIERQLNGTYAIAGGKPHSGPAELCEYHSQDSDGLVCLLKKPCNRPPGVQPKVGPFEDLKDQLIREYVRQTWNLEGEALEQAIISQRPQLEKLIATTAHEKMPWFHGKISREESERRLLSGAQPNGKFLIRERDENGSYALSLLYEKKVYHYRIDRDKSGKLSIPDGKKFDTLWQLVEHYSHKPDGLLCVLTEPCPNPDSPAGALGAPAPPLPGSHPKLETAGGIISRIKSYSFPKPGFKKKPPSERPKSALNVNGYVPRPKPLGAEGGSRRAMPMDTNVYESPYSDPEELKDKKLYLKREQLMLEEGELGSGNFGTVKKGVYKMRKKEIPVAVKVLKSENDPAVKDELMKEAEFMHQLDNPYIVRMIGICEAESLMLVMELAPLGPLNKFLQKHKDQITVENIVELMHQVSMGMKYLEEKNFVHRDLAARNVLLVNQHYAKISDFGLSKALGADDNYYKAKTAGKWPLKWYAPECINFHKFSSKSDVWSFGVTMWEAFSYGQKPYKGMKGQEVLPFIENGERMECPAECPEEMYELMKDCWTYKADDRPGFVAVELRLRDYYYDISK,627,FALSE,4670,4670,0,-0.0574121626,median,Hobbs,Saturation mutagenesis of a predicted ancestral Syk-family kinase,2022,10.1002/pro.4411,352-627,ancestral spleen tyrosine kinase,successful phosphorylation of bait peptide,enzymatic activity,ANCSZ_b0.4.a2m,1,627,627,0.4,0.2,7424,1,627,1036.7,1.653429027,Medium,109,0.1738437002,ANCSZ_Hobbs_2022.csv,DMS_value,1,mutant,ANCSZ_theta_0.2.npy,ANCSZ.pdb,1-627,1,,Activity +ARGR_ECOLI_Tsuboyama_2023_1AOY,ARGR_ECOLI_Tsuboyama_2023_1AOY.csv,ARGR_ECOLI,Prokaryote,Escherichia coli (strain K12),QEELVKAFKALLKEEKFSSQGEIVAALQEQGFDNINQSKVSRMLTKFGAVRTRNAKMEMVYCLPAELGV,69,FALSE,1287,1287,0,-0.4541373765,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-7,1-69,Arginine repressor,Stability,cDNA display proteolysis,ARGR_ECOLI_2023-08-07_b04.a2m,1,69,69,0.4,0.2,21443,0.913,63,3719.2,59.03492063,Medium,29,0.4603174603,Tsuboyama2023_Dataset2_Dataset4,ddG_ML_float,1,mut_type,ARGR_ECOLI_theta0.2_2023-08-07_b04.npy,ARGR_ECOLI.pdb,1-69,1,,Stability +B2L11_HUMAN_Dutta_2010_binding-Mcl-1,B2L11_HUMAN_Dutta_2010_binding-Mcl-1.csv,B2L11_HUMAN,Human,Homo sapiens,MAKQPSDVSSECDREGRQLQPAERPPQLRPGAPTSLQTEPQGNPEGNHGGEGDSCPHGSPQGPLAPPASPGPFATRSPLFIFMRRSSLLSRSSSGYFSFDTDRSPAPMSCDKSTQTPSPPCQAFNHYLSAMASMRQAEPADMRPEIWIAQELRRIGDEFNAYYARRVFLNNYQAAEDHPRMVILRLLRYIVRLVWRMH,198,FALSE,170,170,0,16002529.37,median,Dutta,Determinants of BH3 Binding Specificity for Mcl-1 versus Bcl-xL,2010,10.1016/j.jmb.2010.03.058,148-159,BCL2L11,Binding to Mcl-1 (FACS; yeast-displayed and antibody stained for binding partner),FACS,B2L11_HUMAN_2023-08-07_b04.a2m,1,198,198,0.4,0.2,660,0.995,197,88.5,0.4492385787,Low,2,0.01015228426,,score,1,mut_proteingym,B2L11_HUMAN_theta0.2_2023-08-07_b04.npy,B2L11_HUMAN.pdb,1-198,1,147,Binding +BBC1_YEAST_Tsuboyama_2023_1TG0,BBC1_YEAST_Tsuboyama_2023_1TG0.csv,BBC1_YEAST,Eukaryote,Saccharomyces cerevisiae (strain ATCC 204508 / S288c) (Baker's yeast),EVPFKVVAQFPYKSDYEDDLNFEKDQEIIVTSVEDAEWYFGEYQDSNGDVIEGIFPKSFVAVQG,64,TRUE,2069,1084,985,-1.271998543,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-8,1-64,Myosin tail region-interacting protein MTI1,Stability,cDNA display proteolysis,BBC1_YEAST_2023-08-07_b05.a2m,1,64,64,0.5,0.2,604824,0.844,54,17529.2,324.6148148,High,55,1.018518519,Tsuboyama2023_Dataset2_Dataset5,ddG_ML_float,1,mut_type,BBC1_YEAST_theta0.2_2023-08-07_b05.npy,BBC1_YEAST.pdb,1-64,1,,Stability +BCHB_CHLTE_Tsuboyama_2023_2KRU,BCHB_CHLTE_Tsuboyama_2023_2KRU.csv,BCHB_CHLTE,Prokaryote,Chlorobaculum tepidum,ELSWTAEAEKMLGKVPFFVRKKVRKNTDNYAREIGEPVVTADVFRKAKEHLG,52,TRUE,1572,890,682,-0.9540616602,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-9,1-52,Light-independent protochlorophyllide reductase subunit B,Stability,cDNA display proteolysis,BCHB_CHLTE_2023-08-07_b04.a2m,1,52,52,0.4,0.2,12079,0.923,48,2630.8,54.80833333,Medium,18,0.375,Tsuboyama2023_Dataset2_Dataset6,ddG_ML_float,1,mut_type,BCHB_CHLTE_theta0.2_2023-08-07_b04.npy,BCHB_CHLTE.pdb,1-52,1,,Stability +BLAT_ECOLX_Deng_2012,BLAT_ECOLX_Deng_2012.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,FALSE,4996,4996,0,-2.913548,median,Deng,Deep Sequencing of Systematic Combinatorial Libraries Reveals Œ≤-Lactamase Sequence Constraints at High Resolution,2012,10.1016/j.jmb.2012.09.014,24-286,Beta-lactamase TEM,"antibiotic resistance, MIC",Amp resistance,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215,47605,221.4186047,high,446,2.074418605,BLAT_ECOLX_Deng_2012.csv,ddG_stat,-1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,1-286,0.1,,OrganismalFitness +BLAT_ECOLX_Firnberg_2014,BLAT_ECOLX_Firnberg_2014.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,FALSE,4783,4783,0,0.4257,median,Firnberg,"A Comprehensive, High-Resolution Map of a Gene's Fitness Landscape",2014,10.1093/molbev/msu081,24-286,Beta-lactamase TEM,Growth (0.25-1024 ug/mL ampicillin) doubling,Growth,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215,47605,221.4186047,high,446,2.074418605,BLAT_ECOLX_Firnberg_2014.csv,linear,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,1-286,0.1,,OrganismalFitness +BLAT_ECOLX_Jacquier_2013,BLAT_ECOLX_Jacquier_2013.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,FALSE,989,989,0,-0.666666667,median,Jacquier,Capturing the mutational landscape of the beta-lactamase TEM-1,2013,10.1073/pnas.1215206110,24-286,Beta-lactamase TEM,MIC,Amoxicillin resistance,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215,47605,221.4186047,high,446,2.074418605,BLAT_ECOLX_Jacquier_2013.csv,MIC_score,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,1-286,0.1,,OrganismalFitness +BLAT_ECOLX_Stiffler_2015,BLAT_ECOLX_Stiffler_2015.csv,BLAT_ECOLX,Prokaryote,Escherichia coli,MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW,286,FALSE,4996,4996,0,-1.159498916,median,Stiffler,Evolvability as a Function of Purifying Selection in TEM-1 β-lactamase,2015,10.1016/j.cell.2015.01.035,24-286,Beta-lactamase TEM,Growth (10-2500 ug/mL ampicillin),Growth,BLAT_ECOLX_full_11-26-2021_b02.a2m,1,286,286,0.2,0.2,209644,0.752,215,47605,221.4186047,high,446,2.074418605,BLAT_ECOLX_Stiffler_2015.csv,2500,1,mutant,BLAT_ECOLX_theta_0.2.npy,BLAT_ECOLX.pdb,1-286,0.1,,OrganismalFitness +BRCA1_HUMAN_Findlay_2018,BRCA1_HUMAN_Findlay_2018.csv,BRCA1_HUMAN,Human,Homo sapiens,MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEISLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENSSLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPCSENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLLASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKRKRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEKESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSEEIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKEFVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLGKAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVSKRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKDKPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSMSPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKLNAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDDLLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELPCFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLFSSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEAASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALEDLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHSCSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRAPESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSGLTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKMLNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTLGTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY,1863,FALSE,1837,1837,0,-1,manual,Findlay,Accurate classification of BRCA1 variants with saturation genome editing,2018,10.1038/s41586-018-0461-z,1-1855,BRCA1,Growth,Growth,BRCA1_HUMAN_full_11-26-2021_b02.a2m,1,1863,1863,0.2,0.2,1008,0.769,1432,108.4,0.07569832402,low,0,0,BRCA1_HUMAN_Findlay_2018.csv,function_score,1,mutant,BRCA1_HUMAN_theta_0.2.npy,BRCA1_HUMAN.pdb,1-1863,0.1,,OrganismalFitness +BRCA2_HUMAN_Erwood_2022_HEK293T,BRCA2_HUMAN_Erwood_2022_HEK293T.csv,BRCA2_HUMAN,Human,Homo sapiens,MPIGSKERPTFFEIFKTRCNKADLGPISLNWFEELSSEAPPYNSEPAEESEHKNNNYEPNLFKTPQRKPSYNQLASTPIIFKEQGLTLPLYQSPVKELDKFKLDLGRNVPNSRHKSLRTVKTKMDQADDVSCPLLNSCLSESPVVLQCTHVTPQRDKSVVCGSLFHTPKFVKGRQTPKHISESLGAEVDPDMSWSSSLATPPTLSSTVLIVRNEEASETVFPHDTTANVKSYFSNHDESLKKNDRFIASVTDSENTNQREAASHGFGKTSGNSFKVNSCKDHIGKSMPNVLEDEVYETVVDTSEEDSFSLCFSKCRTKNLQKVRTSKTRKKIFHEANADECEKSKNQVKEKYSFVSEVEPNDTDPLDSNVANQKPFESGSDKISKEVVPSLACEWSQLTLSGLNGAQMEKIPLLHISSCDQNISEKDLLDTENKRKKDFLTSENSLPRISSLPKSEKPLNEETVVNKRDEEQHLESHTDCILAVKQAISGTSPVASSFQGIKKSIFRIRESPKETFNASFSGHMTDPNFKKETEASESGLEIHTVCSQKEDSLCPNLIDNGSWPATTTQNSVALKNAGLISTLKKKTNKFIYAIHDETSYKGKKIPKDQKSELINCSAQFEANAFEAPLTFANADSGLLHSSVKRSCSQNDSEEPTLSLTSSFGTILRKCSRNETCSNNTVISQDLDYKEAKCNKEKLQLFITPEADSLSCLQEGQCENDPKSKKVSDIKEEVLAAACHPVQHSKVEYSDTDFQSQKSLLYDHENASTLILTPTSKDVLSNLVMISRGKESYKMSDKLKGNNYESDVELTKNIPMEKNQDVCALNENYKNVELLPPEKYMRVASPSRKVQFNQNTNLRVIQKNQEETTSISKITVNPDSEELFSDNENNFVFQVANERNNLALGNTKELHETDLTCVNEPIFKNSTMVLYGDTGDKQATQVSIKKDLVYVLAEENKNSVKQHIKMTLGQDLKSDISLNIDKIPEKNNDYMNKWAGLLGPISNHSFGGSFRTASNKEIKLSEHNIKKSKMFFKDIEEQYPTSLACVEIVNTLALDNQKKLSKPQSINTVSAHLQSSVVVSDCKNSHITPQMLFSKQDFNSNHNLTPSQKAEITELSTILEESGSQFEFTQFRKPSYILQKSTFEVPENQMTILKTTSEECRDADLHVIMNAPSIGQVDSSKQFEGTVEIKRKFAGLLKNDCNKSASGYLTDENEVGFRGFYSAHGTKLNVSTEALQKAVKLFSDIENISEETSAEVHPISLSSSKCHDSVVSMFKIENHNDKTVSEKNNKCQLILQNNIEMTTGTFVEEITENYKRNTENEDNKYTAASRNSHNLEFDGSDSSKNDTVCIHKDETDLLFTDQHNICLKLSGQFMKEGNTQIKEDLSDLTFLEVAKAQEACHGNTSNKEQLTATKTEQNIKDFETSDTFFQTASGKNISVAKESFNKIVNFFDQKPEELHNFSLNSELHSDIRKNKMDILSYEETDIVKHKILKESVPVGTGNQLVTFQGQPERDEKIKEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQGTSEITSFSHQWAKTLKYREACKDLELACETIEITAAPKCKEMQNSLNNDKNLVSIETVVPPKLLSDNLCRQTENLKTSKSIFLKVKVHENVEKETAKSPATCYTNQSPYSVIENSALAFYTSCSRKTSVSQTSLLEAKKWLREGIFDGQPERINTADYVGNYLYENNSNSTIAENDKNHLSEKQDTYLSNSSMSNSYSYHSDEVYNDSGYLSKNKLDSGIEPVLKNVEDQKNTSFSKVISNVKDANAYPQTVNEDICVEELVTSSSPCKNKNAAIKLSISNSNNFEVGPPAFRIASGKIVCVSHETIKKVKDIFTDSFSKVIKENNENKSKICQTKIMAGCYEALDDSEDILHNSLDNDECSTHSHKVFADIQSEEILQHNQNMSGLEKVSKISPCDVSLETSDICKCSIGKLHKSVSSANTCGIFSTASGKSVQVSDASLQNARQVFSEIEDSTKQVFSKVLFKSNEHSDQLTREENTAIRTPEHLISQKGFSYNVVNSSAFSGFSTASGKQVSILESSLHKVKGVLEEFDLIRTEHSLHYSPTSRQNVSKILPRVDKRNPEHCVNSEMEKTCSKEFKLSNNLNVEGGSSENNHSIKVSPYLSQFQQDKQQLVLGTKVSLVENIHVLGKEQASPKNVKMEIGKTETFSDVPVKTNIEVCSTYSKDSENYFETEAVEIAKAFMEDDELTDSKLPSHATHSLFTCPENEEMVLSNSRIGKRRGEPLILVGEPSIKRNLLNEFDRIIENQEKSLKASKSTPDGTIKDRRLFMHHVSLEPITCVPFRTTKERQEIQNPNFTAPGQEFLSKSHLYEHLTLEKSSSNLAVSGHPFYQVSATRNEKMRHLITTGRPTKVFVPPFKTKSHFHRVEQCVRNINLEENRQKQNIDGHGSDDSKNKINDNEIHQFNKNNSNQAVAVTFTKCEEEPLDLITSLQNARDIQDMRIKKKQRQRVFPQPGSLYLAKTSTLPRISLKAAVGGQVPSACSHKQLYTYGVSKHCIKINSKNAESFQFHTEDYFGKESLWTGKGIQLADGGWLIPSNDGKAGKEEFYRALCDTPGVDPKLISRIWVYNHYRWIIWKLAAMECAFPKEFANRCLSPERVLLQLKYRYDTEIDRSRRSAIKKIMERDDTAAKTLVLCVSDIISLSANISETSSNKTSSADTQKVAIIELTDGWYAVKAQLDPPLLAVLKNGRLTVGQKIILHGAELVGSPDACTPLEAPESLMLKISANSTRPARWYTKLGFFPDPRPFPLPLSSLFSDGGNVGCVDVIIQRAYPIQWMEKTSSGLYIFRNEREEEKEAAKYVEAQQKRLEALFTKIQEEFEEHEENTTKPYLPSRALTRQQVRALQDGAELYEAVKNAADPAYLEGYFSEEQLRALNNHRQMLNDKKQAQIQLEIRKAMESAEQKEQGLSRDVTTVWKLRIVSYSKKEKDSVILSIWRPSSDLYSLLTEGKRYRIYHLATSKSKSKSERANIQLAATKKTQYQQLPVSDEILFQIYQPREPLHFSKFLDPDFQPSCSEVDLIGFVVSVVKKTGLAPFVYLSDECYNLLAIKFWIDLNEDIIKPHMLIAASNLQWRPESKSGLLTLFAGDFSVFSASPKEGHFQETFNKMKNTVENIDILCNEAENKLMHILHANDPKWSTPTKDCTSGPYTAQIIPGTGNKLLMSSPNCEIYYQSPLSLCMAKRKSVSTPVSAQMTSKSCKGEKEIDDQKNCKKRRALDFLSRLPLPPPVSPICTFVSPAAQKAFQPPRSCGTKYETPIKKKELNSPQMTPFKKFNEISLLESNSIADEELALINTQALLSGSTGEKQFISVSESTRTAPTSSEDYLRLKRRCTTSLIKEQESSQASTEECEKNKQDTITTKKYI,3418,FALSE,265,265,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,388-2654,BRCA2,Fitness,Growth,BRCA2_HUMAN_2023-10-12_b01.a2m,1,3418,3418,0.1,0.2,933,,,,,,,,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,BRCA2_HUMAN_theta0.2_2023-10-12_b01.npy,BRCA2_HUMAN_1-1000.pdb|BRCA2_HUMAN_1001-2085.pdb|BRCA2_HUMAN_2086-2832.pdb,1-1000|1001-2085|2086-2832,1,,OrganismalFitness +C6KNH7_9INFA_Lee_2018,C6KNH7_9INFA_Lee_2018.csv,C6KNH7_9INFA,Virus,Influenza A virus (A/Perth/16/2009(H3N2)),MKTIIALSYILCLVFAQKLPGNDNSTATLCLGHHAVPNGTIVKTITNDQIEVTNATELVQSSSTGEICDSPHQILDGKNCTLIDALLGDPQCDDFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSKNSFFSRLNWLTHLNFKYPALNVTMPNNEQFDKLYIWGVLHPGTDKDQIFLYAQASGRITVSTKRSQQIVSPNIGSRPRVRNIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCNSECITPNGSIPNDKPFQNVNRITYGACPRYVKQNTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHDVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVALLGFIMWACQKGNIRCNICI,566,FALSE,10754,10754,0,-1.720276237,median,Lee,Deep mutational scanning of hemagglutinin helps predict evolutionary fates of human H3N2 influenza variants,2018,10.1073/pnas.1806133115,1-566,Influenza hemagglutinin,Viral replication,Growth,C6KNH7_9INFA_theta0.99_full_11-26-2021_b09.a2m,1,566,566,0.9,0.01,57453,0.977,553,10569.8,19.11356239,medium,964,1.743218807,C6KNH7_9INFA_Lee_2018.csv,log_fitness_by_syn_mut_fitness,1,mutant,C6KNH7_9INFA_theta_0.01.npy,C6KNH7_9INFA.pdb,1-566,0.1,,OrganismalFitness +CALM1_HUMAN_Weile_2017,CALM1_HUMAN_Weile_2017.csv,CALM1_HUMAN,Human,Homo sapiens,MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREADIDGDGQVNYEEFVQMMTAK,149,FALSE,1813,1813,0,0.872790117,median,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-149,CALM1,Yeast growth,complementation,CALM1_HUMAN_full_11-26-2021_b03.a2m,1,149,149,0.3,0.2,177633,0.893,133,28985.1,217.9330827,high,96,0.7218045113,CALM1_HUMAN_Weile_2017.csv,screenscore,1,mutant,CALM1_HUMAN_theta_0.2.npy,CALM1_HUMAN.pdb,1-149,0.1,,OrganismalFitness +CAPSD_AAV2S_Sinai_2021,CAPSD_AAV2S_Sinai_2021.csv,CAPSD_AAV2S,Virus,Adeno-associated virus 2,MAADGYLPDWLEDTLSEGIRQWWKLKPGPPPPKPAERHKDDSRGLVLPGYKYLGPFNGLDKGEPVNEADAAALEHDKAYDRQLDSGDNPYLKYNHADAEFQERLKEDTSFGGNLGRAVFQAKKRVLEPLGLVEEPVKTAPGKKRPVEHSPVEPDSSSGTGKAGQQPARKRLNFGQTGDADSVPDPQPLGQPPAAPSGLGTNTMATGSGAPMADNNEGADGVGNSSGNWHCDSTWMGDRVITTSTRTWALPTYNNHLYKQISSQSGASNDNHYFGYSTPWGYFDFNRFHCHFSPRDWQRLINNNWGFRPKRLNFKLFNIQVKEVTQNDGTTTIANNLTSTVQVFTDSEYQLPYVLGSAHQGCLPPFPADVFMVPQYGYLTLNNGSQAVGRSSFYCLEYFPSQMLRTGNNFTFSYTFEDVPFHSSYAHSQSLDRLMNPLIDQYLYYLSRTNTPSGTTTQSRLQFSQAGASDIRDQSRNWLPGPCYRQQRVSKTSADNNNSEYSWTGATKYHLNGRDSLVNPGPAMASHKDDEEKFFPQSGVLIFGKQGSEKTNVDIEKVMITDEEEIRTTNPVATEQYGSVSTNLQRGNRQAATADVNTQGVLPGMVWQDRDVYLQGPIWAKIPHTDGHFHPSPLMGGFGLKHPPPQILIKNTPVPANPSTTFSAAKFASFITQYSTGQVSVEIEWELQKENSKRWNPEIQYTSNYNKSVNVDFTVDTNGVYSEPRPIGTRYLTRNL,735,TRUE,42328,532,41796,-1.2,manual,Sinai,Generative AAV capsid diversification by latent interpolation,2021,10.1101/2021.04.16.440236,561-588,AAV,viability for AAV capsid production,,CAPSD_AAV2S_uniprot_t099_msc70_mcc70_b0.8.a2m,1,735,735,0.8,0.01,604,0.782,575,213.8,0.371826087,low,1943,3.379130435,CAPSD_AAV2S_Sinai_substitutions_2021.csv,viral_selection,1,mutant,CAPSD_AAV2S_theta_0.01.npy,CAPSD_AAV2S.pdb,1-735,0.1,,OrganismalFitness +CAR11_HUMAN_Meitlis_2020_gof,CAR11_HUMAN_Meitlis_2020_gof.csv,CAR11_HUMAN,Human,Homo sapiens,MPGGGPEMDDYMETLKDEEDALWENVECNRHMLSRYINPAKLTPYLRQCKVIDEQDEDEVLNAPMLPSKINRAGRLLDILHTKGQRGYVVFLESLEFYYPELYKLVTGKEPTRRFSTIVVEEGHEGLTHFLMNEVIKLQQQMKAKDLQRCELLARLRQLEDEKKQMTLTRVELLTFQERYYKMKEERDSYNDELVKVKDDNYNLAMRYAQLSEEKNMAVMRSRDLQLEIDQLKHRLNKMEEECKLERNQSLKLKNDIENRPKKEQVLELERENEMLKTKNQELQSIIQAGKRSLPDSDKAILDILEHDRKEALEDRQELVNRIYNLQEEARQAEELRDKYLEEKEDLELKCSTLGKDCEMYKHRMNTVMLQLEEVERERDQAFHSRDEAQTQYSQCLIEKDKYRKQIRELEEKNDEMRIEMVRREACIVNLESKLRRLSKDSNNLDQSLPRNLPVTIISQDFGDASPRTNGQEADDSSTSEESPEDSKYFLPYHPPQRRMNLKGIQLQRAKSPISLKRTSDFQAKGHEEEGTDASPSSCGSLPITNSFTKMQPPRSRSSIMSITAEPPGNDSIVRRYKEDAPHRSTVEEDNDSGGFDALDLDDDSHERYSFGPSSIHSSSSSHQSEGLDAYDLEQVNLMFRKFSLERPFRPSVTSVGHVRGPGPSVQHTTLNGDSLTSQLTLLGGNARGSFVHSVKPGSLAEKAGLREGHQLLLLEGCIRGERQSVPLDTCTKEEAHWTIQRCSGPVTLHYKVNHEGYRKLVKDMEDGLITSGDSFYIRLNLNISSQLDACTMSLKCDDVVHVRDTMYQDRHEWLCARVDPFTDHDLDMGTIPSYSRAQQLLLVKLQRLMHRGSREEVDGTHHTLRALRNTLQPEEALSTSDPRVSPRLSRASFLFGQLLQFVSRSENKYKRMNSNERVRIISGSPLGSLARSSLDATKLLTEKQEELDPESELGKNLSLIPYSLVRAFYCERRRPVLFTPTVLAKTLVQRLLNSGGAMEFTICKSDIVTRDEFLRRQKTETIIYSREKNPNAFECIAPANIEAVAAKNKHCLLEAGIGCTRDLIKSNIYPIVLFIRVCEKNIKRFRKLLPRPETEEEFLRVCRLKEKELEALPCLYATVEPDMWGSVEELLRVVKDKIGEEQRKTIWVDEDQL,1154,FALSE,2374,2374,0,0.14475,manual,Meitlis,Multiplexed Functional Assessment of Genetic Variants in CARD11,2020,10.1016/j.ajhg.2020.10.015.,4-146,CARD11,Signaling (in presence of ibrutinib),survival assessment assay,CAR11_HUMAN_2023-10-12_b02.a2m,1,1154,1154,0.2,0.2,1352,0.998,1152,53.7,0.04661458333,Low,0,0,mmc2.xlsx,log2_score,1,mutant,CAR11_HUMAN_theta0.2_2023-10-12_b02.npy,CAR11_HUMAN.pdb,1-1154,1,,OrganismalFitness +CAR11_HUMAN_Meitlis_2020_lof,CAR11_HUMAN_Meitlis_2020_lof.csv,CAR11_HUMAN,Human,Homo sapiens,MPGGGPEMDDYMETLKDEEDALWENVECNRHMLSRYINPAKLTPYLRQCKVIDEQDEDEVLNAPMLPSKINRAGRLLDILHTKGQRGYVVFLESLEFYYPELYKLVTGKEPTRRFSTIVVEEGHEGLTHFLMNEVIKLQQQMKAKDLQRCELLARLRQLEDEKKQMTLTRVELLTFQERYYKMKEERDSYNDELVKVKDDNYNLAMRYAQLSEEKNMAVMRSRDLQLEIDQLKHRLNKMEEECKLERNQSLKLKNDIENRPKKEQVLELERENEMLKTKNQELQSIIQAGKRSLPDSDKAILDILEHDRKEALEDRQELVNRIYNLQEEARQAEELRDKYLEEKEDLELKCSTLGKDCEMYKHRMNTVMLQLEEVERERDQAFHSRDEAQTQYSQCLIEKDKYRKQIRELEEKNDEMRIEMVRREACIVNLESKLRRLSKDSNNLDQSLPRNLPVTIISQDFGDASPRTNGQEADDSSTSEESPEDSKYFLPYHPPQRRMNLKGIQLQRAKSPISLKRTSDFQAKGHEEEGTDASPSSCGSLPITNSFTKMQPPRSRSSIMSITAEPPGNDSIVRRYKEDAPHRSTVEEDNDSGGFDALDLDDDSHERYSFGPSSIHSSSSSHQSEGLDAYDLEQVNLMFRKFSLERPFRPSVTSVGHVRGPGPSVQHTTLNGDSLTSQLTLLGGNARGSFVHSVKPGSLAEKAGLREGHQLLLLEGCIRGERQSVPLDTCTKEEAHWTIQRCSGPVTLHYKVNHEGYRKLVKDMEDGLITSGDSFYIRLNLNISSQLDACTMSLKCDDVVHVRDTMYQDRHEWLCARVDPFTDHDLDMGTIPSYSRAQQLLLVKLQRLMHRGSREEVDGTHHTLRALRNTLQPEEALSTSDPRVSPRLSRASFLFGQLLQFVSRSENKYKRMNSNERVRIISGSPLGSLARSSLDATKLLTEKQEELDPESELGKNLSLIPYSLVRAFYCERRRPVLFTPTVLAKTLVQRLLNSGGAMEFTICKSDIVTRDEFLRRQKTETIIYSREKNPNAFECIAPANIEAVAAKNKHCLLEAGIGCTRDLIKSNIYPIVLFIRVCEKNIKRFRKLLPRPETEEEFLRVCRLKEKELEALPCLYATVEPDMWGSVEELLRVVKDKIGEEQRKTIWVDEDQL,1154,FALSE,2395,2395,0,-0.4635,manual,Meitlis,Multiplexed Functional Assessment of Genetic Variants in CARD11,2020,10.1016/j.ajhg.2020.10.015.,4-146,CARD11,Signaling,survival assessment assay,CAR11_HUMAN_2023-10-12_b02.a2m,1,1154,1154,0.2,0.2,1352,0.998,1152,53.7,0.04661458333,Low,0,0,mmc3.xlsx,log2_score,1,mutant,CAR11_HUMAN_theta0.2_2023-10-12_b02.npy,CAR11_HUMAN.pdb,1-1154,1,,OrganismalFitness +CAS9_STRP1_Spencer_2017_positive,CAS9_STRP1_Spencer_2017_positive.csv,CAS9_STRP1,Eukaryote,Streptococcus pyogenes serotype M1,MDKKYSIGLDIGTNSVGWAVITDEYKVPSKKFKVLGNTDRHSIKKNLIGALLFDSGETAEATRLKRTARRRYTRRKNRICYLQEIFSNEMAKVDDSFFHRLEESFLVEEDKKHERHPIFGNIVDEVAYHEKYPTIYHLRKKLVDSTDKADLRLIYLALAHMIKFRGHFLIEGDLNPDNSDVDKLFIQLVQTYNQLFEENPINASGVDAKAILSARLSKSRRLENLIAQLPGEKKNGLFGNLIALSLGLTPNFKSNFDLAEDAKLQLSKDTYDDDLDNLLAQIGDQYADLFLAAKNLSDAILLSDILRVNTEITKAPLSASMIKRYDEHHQDLTLLKALVRQQLPEKYKEIFFDQSKNGYAGYIDGGASQEEFYKFIKPILEKMDGTEELLVKLNREDLLRKQRTFDNGSIPHQIHLGELHAILRRQEDFYPFLKDNREKIEKILTFRIPYYVGPLARGNSRFAWMTRKSEETITPWNFEEVVDKGASAQSFIERMTNFDKNLPNEKVLPKHSLLYEYFTVYNELTKVKYVTEGMRKPAFLSGEQKKAIVDLLFKTNRKVTVKQLKEDYFKKIECFDSVEISGVEDRFNASLGTYHDLLKIIKDKDFLDNEENEDILEDIVLTLTLFEDREMIEERLKTYAHLFDDKVMKQLKRRRYTGWGRLSRKLINGIRDKQSGKTILDFLKSDGFANRNFMQLIHDDSLTFKEDIQKAQVSGQGDSLHEHIANLAGSPAIKKGILQTVKVVDELVKVMGRHKPENIVIEMARENQTTQKGQKNSRERMKRIEEGIKELGSQILKEHPVENTQLQNEKLYLYYLQNGRDMYVDQELDINRLSDYDVDHIVPQSFLKDDSIDNKVLTRSDKNRGKSDNVPSEEVVKKMKNYWRQLLNAKLITQRKFDNLTKAERGGLSELDKAGFIKRQLVETRQITKHVAQILDSRMNTKYDENDKLIREVKVITLKSKLVSDFRKDFQFYKVREINNYHHAHDAYLNAVVGTALIKKYPKLESEFVYGDYKVYDVRKMIAKSEQEIGKATAKYFFYSNIMNFFKTEITLANGEIRKRPLIETNGETGEIVWDKGRDFATVRKVLSMPQVNIVKKTEVQTGGFSKESILPKRNSDKLIARKKDWDPKKYGGFDSPTVAYSVLVVAKVEKGKSKKLKSVKELLGITIMERSSFEKNPIDFLEAKGYKEVKKDLIIKLPKYSLFELENGRKRMLASAGELQKGNELALPSKYVNFLYLASHYEKLKGSPEDNEQKQLFVEQHKHYLDEIIEQISEFSKRVILADANLDKVLSAYNKHRDKPIREQAENIIHLFTLTNLGAPAAFKYFDTTIDRKRYTSTKEVLDATLIHQSITGLYETRIDLSQLGGD,1368,FALSE,8117,8117,0,-0.2654328586,median,Spencer,Deep mutational scanning of S. pyogenes Cas9 reveals important functional domains,2017,10.1038/s41598-017-17081-y,1-1368,Streptococcus pyogenes Cas9,count of mutation where survival depends on expression of Cas9 and correct cleavage,Flow cytometry,CAS9_STRP1_2023-08-07_b01.a2m,1,1368,1368,0.1,0.2,5349,0.992,1357,1532.3,1.12918,Medium,241,0.17759764,SPCAS9_Spencer_positive_2022.csv,Log2 Fold Change after Positive Selection,1,mutant,CAS9_STRP1_theta0.2_2023-08-07_b01.npy,CAS9_STRP1.pdb,1-1368,1,,Activity +CASP3_HUMAN_Roychowdhury_2020,CASP3_HUMAN_Roychowdhury_2020.csv,CASP3_HUMAN,Human,Homo sapiens,MSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYHLEHHHHHH,258,FALSE,1567,1567,0,0.03725973017,median,Roychowdhury,Microfluidic deep mutational scanning of the human executioner caspases reveals differences in structure and regulation,2022,10.1038/s41420-021-00799-0,2-258,CASP3,Fluorescence measurement,,CASP3_HUMAN_2023-08-07_b01.a2m,1,258,258,0.1,0.2,86012,0.884,228,28096.2,123.2289474,High,307,1.346491228,CASP3_HUMAN_Roychowdhury_2020.csv,coef,1,mutant,CASP3_HUMAN_theta0.2_2023-08-07_b01.npy,CASP3_HUMAN.pdb,1-258,1,,Activity +CASP7_HUMAN_Roychowdhury_2020,CASP7_HUMAN_Roychowdhury_2020.csv,CASP7_HUMAN,Human,Homo sapiens,MAKPDRSSFVPSLFSKKKKNVTMRSIKTTRDRVPTYQYNMNFEKLGKCIIINNKNFDKVTGMGVRNGTDKDAEALFKCFRSLGFDVIVYNDCSCAKMQDLLKKASEEDHTNAACFACILLSHGEENVIYGKDGVTPIKDLTAHFRGDRCKTLLEKPKLFFIQACRGTELDDGIQADSGPINDTDANPRYKIPVEADFLFAYSTVPGYYSWRSPGRGSWFVQALCSILEEHGKDLEIMQILTRVNDRVARHFESQSDDPHFHEKKQIPCVVSMLTKELYFSQ,281,FALSE,1680,1680,0,-0.3340768074,median,Roychowdhury,Microfluidic deep mutational scanning of the human executioner caspases reveals differences in structure and regulation,2022,10.1038/s41420-021-00799-0,2-281,CASP7,Fluorescence measurement,,CASP7_HUMAN_2023-08-07_b01.a2m,1,281,281,0.1,0.2,71075,0.854,240,21588.4,89.95166667,Medium,298,1.241666667,CASP7_HUMAN_Roychowdhury_2020.csv,coef,1,mutant,CASP7_HUMAN_theta0.2_2023-08-07_b01.npy,CASP7_HUMAN.pdb,1-281,1,,Activity +CATR_CHLRE_Tsuboyama_2023_2AMI,CATR_CHLRE_Tsuboyama_2023_2AMI.csv,CATR_CHLRE,Eukaryote,Chlamydomonas reinhardtii,GLTEEQKQEIREAFDLFDTDGSGTIDAKELKVAMRALGFEPKKEEIKKMISEIDKDGSGTIDFEEFLTMMTA,72,TRUE,1903,1340,563,-0.5681612987,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-10,1-72,Caltractin,Stability,cDNA display proteolysis,CATR_CHLRE_2023-08-07_b03.a2m,1,72,72,0.3,0.2,551057,0.903,65,75596.9,1163.029231,High,57,0.8769230769,Tsuboyama2023_Dataset2_Dataset7,ddG_ML_float,1,mut_type,CATR_CHLRE_theta0.2_2023-08-07_b03.npy,CATR_CHLRE.pdb,1-72,1,,Stability +CBPA2_HUMAN_Tsuboyama_2023_1O6X,CBPA2_HUMAN_Tsuboyama_2023_1O6X.csv,CBPA2_HUMAN,Human,Homo sapiens,VGDQVLEIVPSNEEQIKNLLQLEAQEHLQLDFWKSPTTPGETAHVRVPFVNVQAVKVFLESQGIAYSIMIED,72,TRUE,2068,1357,711,-1.221174658,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-11,1-72,Carboxypeptidase A2,Stability,cDNA display proteolysis,CBPA2_HUMAN_2023-08-07_b03.a2m,1,72,72,0.3,0.2,12711,0.986,71,3086.5,43.47183099,Medium,34,0.4788732394,Tsuboyama2023_Dataset2_Dataset8,ddG_ML_float,1,mut_type,CBPA2_HUMAN_theta0.2_2023-08-07_b03.npy,CBPA2_HUMAN.pdb,1-72,1,,Stability +CBS_HUMAN_Sun_2020,CBS_HUMAN_Sun_2020.csv,CBS_HUMAN,Human,Homo sapiens,MPSETPQAEVGPTGCPHRSGPHSAKGSLEKGSPEDKEAKEPLWIRPDAPSRCTWQLGRPASESPHHHTAPAKSPKILPDILKKIGDTPMVRINKIGKKFGLKCELLAKCEFFNAGGSVKDRISLRMIEDAERDGTLKPGDTIIEPTSGNTGIGLALAAAVRGYRCIIVMPEKMSSEKVDVLRALGAEIVRTPTNARFDSPESHVGVAWRLKNEIPNSHILDQYRNASNPLAHYDTTADEILQQCDGKLDMLVASVGTGGTITGIARKLKEKCPGCRIIGVDPEGSILAEPEELNQTEQTTYEVEGIGYDFIPTVLDRTVVDKWFKSNDEEAFTFARMLIAQEGLLCGGSAGSTVAVAVKAAQELQEGQRCVVILPDSVRNYMTKFLSDRWMLQKGFLKEEDLTEKKPWWWHLRVQELGLSAPLTVLPTITCGHTIEILREKGFDQAPVVDEAGVILGMVTLGNMLSSLLAGKVQPSDQVGKVIYKQFKQIRLTDTLGRLSHILEMDHFALVVHEQIQYHSTGKSSQRQMVFGVVTAIDLLNFVAAQERDQK,551,FALSE,7217,7217,0,0.3753910128,median,Sun,A proactive genotype-to-patient-phenotype map for cystathionine beta-synthase,2020,10.1186/s13073-020-0711-1,2-551,cystathionine beta-synthase,Yeast Growth,Growth,CBS_HUMAN_2023-10-12_b08.a2m,1,551,551,0.8,0.2,19563,0.833,459,1886,4.108932462,Medium,289,0.6296296296,,score,1,mutant,CBS_HUMAN_theta0.2_2023-10-12_b08.npy,CBS_HUMAN.pdb,1-551,1,,OrganismalFitness +CBX4_HUMAN_Tsuboyama_2023_2K28,CBX4_HUMAN_Tsuboyama_2023_2K28.csv,CBX4_HUMAN,Human,Homo sapiens,AVESIEKKRIRKGRVEYLVKWRGWSPKYNTWEPEENILDPRLLIAFQNRE,50,TRUE,2282,917,1365,-1.635037732,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-12,1-50,E3 SUMO-protein ligase CBX4,Stability,cDNA display proteolysis,CBX4_HUMAN_2023-08-07_b03.a2m,1,50,50,0.3,0.2,108263,0.96,48,13404.4,279.2583333,High,23,0.4791666667,Tsuboyama2023_Dataset2_Dataset9,ddG_ML_float,1,mut_type,CBX4_HUMAN_theta0.2_2023-08-07_b03.npy,CBX4_HUMAN.pdb,1-50,1,,Stability +CCDB_ECOLI_Adkar_2012,CCDB_ECOLI_Adkar_2012.csv,CCDB_ECOLI,Prokaryote,Escherichia coli,MQFKVYTYKRESRYRLFVDVQSDIIDTPGRRMVIPLASARLLSDKVSRELYPVVHIGDESWRMMTTDMASVPVSVIGEEVADLSHRENDIKNAINLMFWGI,101,FALSE,1176,1176,0,-19,median,Adkar,Protein model discrimination using mutational sensitivity derived from deep sequencing,2012,10.1016/j.str.2011.11.021,2-101,Toxin CcdB,Protein toxicity (negative effect on cell growth),toxin activity,CCDB_ECOLI_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,43564,0.851,86,16821.5,195.5988372,high,61,0.7093023256,CCDB_ECOLI_Adkar_2012.csv,score,-1,mutant,CCDB_ECOLI_theta_0.2.npy,CCDB_ECOLI.pdb,1-101,0.1,,Activity +CCDB_ECOLI_Tripathi_2016,CCDB_ECOLI_Tripathi_2016.csv,CCDB_ECOLI,Prokaryote,Escherichia coli,MQFKVYTYKRESRYRLFVDVQSDIIDTPGRRMVIPLASARLLSDKVSRELYPVVHIGDESWRMMTTDMASVPVSVIGEEVADLSHRENDIKNAINLMFWGI,101,FALSE,1663,1663,0,-3.5,manual,Tripathi,"Molecular Determinants of Mutant Phenotypes, Inferred from Saturation Mutagenesis Data",2016,10.1093/molbev/msw182,2-101,Toxin CcdB,growth (surrogate for toxicity/activity of CCDB),Growth,CCDB_ECOLI_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,43564,0.851,86,16821.5,195.5988372,high,61,0.7093023256,CCDB_ECOLI_Tripathi_2016.csv,score,-1,mutant,CCDB_ECOLI_theta_0.2.npy,CCDB_ECOLI.pdb,1-101,0.1,,OrganismalFitness +CCR5_HUMAN_Gill_2023,CCR5_HUMAN_Gill_2023.csv,CCR5_HUMAN,Human,Homo sapiens,MDYQVSSPIYDINYYTSEPCQKINVKQIAARLLPPLYSLVFIFGFVGNMLVILILINCKRLKSMTDIYLLNLAISDLFFLLTVPFWAHYAAAQWDFGNTMCQLLTGLYFIGFFSGIFFIILLTIDRYLAVVHAVFALKARTVTFGVVTSVITWVVAVFASLPGIIFTRSQKEGLHYTCSSHFPYSQYQFWKNFQTLKIVILGLVLPLLVMVICYSGILKTLLRCRNEKKRHRAVRLIFTIMIVYFLFWAPYNIVLLLNTFQEFFGLNNCSSSNRLDQAMQVTETLGMTHCCINPIIYAFVGEKFRNYLLVFFQKHIAKRFCKCCSIFQQEAPERASSVYTRSTGEQEISVGL,352,FALSE,6137,6137,0,-0.06,median,Gill,Multiple mechanisms of self-association of chemokine receptors CXCR4 and CCR5 demonstrated by deep mutagenesis,2023,10.1101/2023.03.25.534231,2-344,CCR5,"binding affinity, surface expression",FACS,CCR5_HUMAN_2023-08-07_b03.a2m,1,352,352,0.3,0.2,632074,0.83,292,63056,215.9452055,High,309,1.058219178,,avg_score,1,mutant,CCR5_HUMAN_theta0.2_2023-08-07_b03.npy,CCR5_HUMAN.pdb,1-352,1,,Binding +CD19_HUMAN_Klesmith_2019_FMC_singles,CD19_HUMAN_Klesmith_2019_FMC_singles.csv,CD19_HUMAN,Human,Homo sapiens,MPPPRLLFFLLFLTPMEVRPEEPLVVKVEEGDNAVLQCLKGTSDGPTQQLTWSRESPLKPFLKLSLGLPGLGIHMRPLAIWLFIFNVSQQMGGFYLCQPGPPSEKAWQPGWTVNVEGSGELFRWNVSDLGGLGCGLKNRSSEGPSSPSGKLMSPKLYVWAKDRPEIWEGEPPCLPPRDSLNQSLSQDLTMAPGSTLWLSCGVPPDSVSRGPLSWTHVHPKGPKSLLSLELKDDRPARDMWVMETGLLLPRATAQDAGKYYCHRGNLTMSFHLEITARPVLWHWLLRTGGWKVSAVTLAYLIFCLCSLVGILHLQRALVLRRKRKRMTDPTRRFFKVTPPPGSGPQNQYGNVLSLPTPTSGLGRAQRWAAGLGGTAPSYGNPSSDVQADGALGSRSPPGVGPEEEEGEGYEEPDSEEDSEFYENDSNLGQDQLSQDGSGYENPEDEPLGPEDEDSFSNAESYENEDEELTQPVARTMDFLSPHGSAWDPSREATSLGSQSYEDMRGILYAAPQLRSIRGQPGPNHEEDADSYENMDNPDGPDPAWGGGGRMGTWSTR,556,FALSE,3761,3761,0,0,manual,Klesmith,Retargeting CD19 Chimeric Antigen Receptor T Cells via Engineered CD19-Fusion Proteins,2019,10.1021/acs.molpharmaceut.9b00418,20-291,CD19,Binding affinity,FACS,CD19_HUMAN_2023-10-12_b01.a2m,1,556,556,0.1,0.2,1183,1,556,275.2,0.4949640288,Low,11,0.01978417266,single-site/Clinical_FMC_T1_Fitness.tsv,Fitness,1,mutant_offset,CD19_HUMAN_theta0.2_2023-10-12_b01.npy,CD19_HUMAN.pdb,1-556,1,,Binding +CP2C9_HUMAN_Amorosi_2021_abundance,CP2C9_HUMAN_Amorosi_2021_abundance.csv,CP2C9_HUMAN,Human,Homo sapiens,MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,490,FALSE,6370,6370,0,0.7723244345,median,Amorosi,Massively parallel characterization of CYP2C9 variant enzyme activity and abundance,2021,10.1016/j.ajhg.2021.07.001,2-490,Cytochrome P450 2C9,protein abundance,protein abundance,CP2C9_HUMAN_full_11-26-2021_b04.a2m,1,490,490,0.4,0.2,264279,0.886,434,81212.1,187.1246544,high,1092,2.516129032,CP2C9_HUMAN_Amorosi_2021.csv,abundance_score,1,variant,CP2C9_HUMAN_theta_0.2.npy,CP2C9_HUMAN.pdb,1-490,0.1,,Expression +CP2C9_HUMAN_Amorosi_2021_activity,CP2C9_HUMAN_Amorosi_2021_activity.csv,CP2C9_HUMAN,Human,Homo sapiens,MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,490,FALSE,6142,6142,0,0.5476104185,median,Amorosi,Massively parallel characterization of CYP2C9 variant enzyme activity and abundance,2021,10.1016/j.ajhg.2021.07.001,1-490,Cytochrome P450 2C9,"activity, binding (to fluorescent CYP probe)","Activity, binding",CP2C9_HUMAN_full_11-26-2021_b04.a2m,1,490,490,0.4,0.2,264279,0.886,434,81212.1,187.1246544,high,1092,2.516129032,CP2C9_HUMAN_Amorosi_2021.csv,activity_score,1,variant,CP2C9_HUMAN_theta_0.2.npy,CP2C9_HUMAN.pdb,1-490,0.1,,Binding +CSN4_MOUSE_Tsuboyama_2023_1UFM,CSN4_MOUSE_Tsuboyama_2023_1UFM.csv,CSN4_MOUSE,Eukaryote,Mus musculus,SSGGSSILDRAVIEHNLLSASKLYNNITFEELGALLEIPAAKAEKIASQMITEGRMNGFIDQIDGIVHFETR,72,TRUE,3295,1353,1942,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-13,1-72,COP9 signalosome complex subunit 4,Stability,cDNA display proteolysis,CSN4_MOUSE_2023-08-07_b03.a2m,1,72,72,0.3,0.2,39217,0.889,64,3492.9,54.5765625,Medium,9,0.140625,Tsuboyama2023_Dataset2_Dataset10,ddG_ML_float,1,mut_type,CSN4_MOUSE_theta0.2_2023-08-07_b03.npy,CSN4_MOUSE.pdb,1-72,1,,Stability +CUE1_YEAST_Tsuboyama_2023_2MYX,CUE1_YEAST_Tsuboyama_2023_2MYX.csv,CUE1_YEAST,Eukaryote,Saccharomyces cerevisiae,GGHPVTTQMVETVQNLAPNLHPEQIRYSLENTGSVEETVERYLRGDEFSFPP,52,TRUE,1580,955,625,-1.319713733,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-14,1-52,Coupling of ubiquitin conjugation to ER degradation protein 1,Stability,cDNA display proteolysis,CUE1_YEAST_2023-08-07_b08.a2m,1,52,52,0.8,0.2,3213,0.923,48,387.1,8.064583333,Medium,10,0.2083333333,Tsuboyama2023_Dataset2_Dataset11,ddG_ML_float,1,mut_type,CUE1_YEAST_theta0.2_2023-08-07_b08.npy,CUE1_YEAST.pdb,1-52,1,,Stability +D7PM05_CLYGR_Somermeyer_2022,D7PM05_CLYGR_Somermeyer_2022.csv,D7PM05_CLYGR,Eukaryote,Clytia gregaria,MTALTEGAKLFEKEIPYITELEGDVEGMKFIIKGEGTGDATTGTIKAKYICTTGDLPVPWATILSSLSYGVFCFAKYPRHIADFFKSTQPDGYSQDRIISFDNDGQYDVKAKVTYENGTLYNRVTVKGTGFKSNGNILGMRVLYHSPPHAVYILPDRKNGGMKIEYNKAFDVMGGGHQMARHAQFNKPLGAWEEDYPLYHHLTVWTSFGKDPDDDETDHLTIVEVIKAVDLETYR,235,TRUE,24515,1169,23346,12500,manual,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-235,Green fluorescent protein cgreGFP,Fluorescence,FACS,D7PM05_CLYGR_full_b0.2.a2m,1,235,235,0.2,0.2,694,1,235,137.6,0.5855319149,Low,4,0.0170212766,D7PM05_CLYGR_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,D7PM05_CLYGR_theta_0.2.npy,D7PM05_CLYGR.pdb,1-235,1,,Activity +DLG4_HUMAN_Faure_2021,DLG4_HUMAN_Faure_2021.csv,DLG4_HUMAN,Human,Homo sapiens,MDCLCIVTTKKYRYQDEDTPPLEHSPAHLPNQANSPPVIVNTDTLEAPGYELQVNGTEGEMEYEEITLERGNSGLGFSIAGGTDNPHIGDDPSIFITKIIPGGAAAQDGRLRVNDSILFVNEVDVREVTHSAAVEALKEAGSIVRLYVMRRKPPAEKVMEIKLIKGPKGLGFSIAGGVGNQHIPGDNSIYVTKIIEGGAAHKDGRLQIGDKILAVNSVGLEDVMHEDAVAALKNTYDVVYLKVAKPSNAYLSDSYAPPDITTSYSQHLDNEISHSSYLGTDYPTAMTPTSPRRYSPVAKDLLGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAKIHDLREQLMNSSLGSGTASLRSNPKRGFYIRALFDYDKTKDCGFLSQALSFRFGDVLHVIDASDEEWWQARRVHSDSETDDIGFIPSKRRVERREWSRLKAKDWGSSSGSQGREDSVLSYETVTQMEVHYARPIIILGPTKDRANDDLLSEFPDKFGSCVPHTTRPKREYEIDGRDYHFVSSREKMEKDIQAHKFIEAGQYNSHLYGTSVQSVREVAEQGKHCILDVSANAVRRLQAAHLHPIAIFIRPRSLENVLEINKRITEEQARKAFDRATKLEQEFTECFSAIVEGDSFEEIYHKVKRVIEDLSGPYIWVPARERL,724,TRUE,6976,1280,5696,-0.5602585328,median,Faure,Mapping the energetic and allosteric landscapes of protein binding domains,2022,10.1038/s41586-022-04586-4,311-394,PSD95-PDZ3,Yeast growth,Growth,DLG4_HUMAN_full_11-26-2021_b02.a2m,1,724,724,0.2,0.2,25338,0.825,597,354.3,0.5934673367,low,7,0.01172529313,DLG4_HUMAN_Faure_2021.csv,fitness,1,mutant,DLG4_HUMAN_theta_0.2.npy,DLG4_HUMAN.pdb,1-724,0.1,,OrganismalFitness +DLG4_RAT_McLaughlin_2012,DLG4_RAT_McLaughlin_2012.csv,DLG4_RAT,Eukaryote,Rattus norvegicus,MDCLCIVTTKKYRYQDEDTPPLEHSPAHLPNQANSPPVIVNTDTLEAPGYELQVNGTEGEMEYEEITLERGNSGLGFSIAGGTDNPHIGDDPSIFITKIIPGGAAAQDGRLRVNDSILFVNEVDVREVTHSAAVEALKEAGSIVRLYVMRRKPPAEKVMEIKLIKGPKGLGFSIAGGVGNQHIPGDNSIYVTKIIEGGAAHKDGRLQIGDKILAVNSVGLEDVMHEDAVAALKNTYDVVYLKVAKPSNAYLSDSYAPPDITTSYSQHLDNEISHSSYLGTDYPTAMTPTSPRRYSPVAKDLLGEEDIPREPRRIVIHRGSTGLGFNIVGGEDGEGIFISFILAGGPADLSGELRKGDQILSVNGVDLRNASHEQAAIALKNAGQTVTIIAQYKPEEYSRFEAKIHDLREQLMNSSLGSGTASLRSNPKRGFYIRALFDYDKTKDCGFLSQALSFRFGDVLHVIDAGDEEWWQARRVHSDSETDDIGFIPSKRRVERREWSRLKAKDWGSSSGSQGREDSVLSYETVTQMEVHYARPIIILGPTKDRANDDLLSEFPDKFGSCVPHTTRPKREYEIDGRDYHFVSSREKMEKDIQAHKFIEAGQYNSHLYGTSVQSVREVAEQGKHCILDVSANAVRRLQAAHLHPIAIFIRPRSLENVLEINKRITEEQARKAFDRATKLEQEFTECFSAIVEGDSFEEIYHKVKRVIEDLSGPYIWVPARERL,724,FALSE,1576,1576,0,-0.25,manual,McLaughlin,The spatial architecture of protein function and adaptation,2012,10.1038/nature11500,311-393,"Dlg4, (PSD95_PDZ3)",peptide binding - natural ligand,Binding,DLG4_RAT_full_11-26-2021_b03.a2m,1,724,724,0.3,0.2,24705,0.841,609,283.9,0.4661740558,low,6,0.009852216749,DLG4_RAT_McLaughlin_2012.csv,CRIPT,1,mutant,DLG4_RAT_theta_0.2.npy,DLG4_RAT.pdb,1-724,0.1,,Binding +DN7A_SACS2_Tsuboyama_2023_1JIC,DN7A_SACS2_Tsuboyama_2023_1JIC.csv,DN7A_SACS2,Prokaryote,Saccharolobus solfataricus,TVKFKYKGEEKQVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQ,55,FALSE,1008,1008,0,-0.472754253,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-15,1-55,DNA-binding protein 7a,Stability,cDNA display proteolysis,DN7A_SACS2_2023-08-07_b02.a2m,1,55,55,0.2,0.2,42895,0.764,42,1248.1,29.71666667,Medium,13,0.3095238095,Tsuboyama2023_Dataset2_Dataset12,ddG_ML_float,1,mut_type,DN7A_SACS2_theta0.2_2023-08-07_b02.npy,DN7A_SACS2.pdb,1-55,1,,Stability +DNJA1_HUMAN_Tsuboyama_2023_2LO1,DNJA1_HUMAN_Tsuboyama_2023_2LO1.csv,DNJA1_HUMAN,Human,Homo sapiens,TTYYDVLGVKPNATQEELKKAYRKLALKYHPDKNPNEGEKFKQISQAYEVLSDAKKRELYDKGGE,65,TRUE,2264,1216,1048,-2.239788161,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-16,1-65,DnaJ homolog subfamily A member 1,Stability,cDNA display proteolysis,DNJA1_HUMAN_2023-08-07_b07.a2m,1,65,65,0.7,0.2,280284,0.969,63,35361.9,561.3,High,52,0.8253968254,Tsuboyama2023_Dataset2_Dataset13,ddG_ML_float,1,mut_type,DNJA1_HUMAN_theta0.2_2023-08-07_b07.npy,DNJA1_HUMAN.pdb,1-65,1,,Stability +DOCK1_MOUSE_Tsuboyama_2023_2M0Y,DOCK1_MOUSE_Tsuboyama_2023_2M0Y.csv,DOCK1_MOUSE,Eukaryote,Mus musculus,WVPTKREEKYGVAFYNYDARGADELSLQIGDTVHILETYEGWYRGYTLRKKSKKGIFPASYIHLKE,66,TRUE,2915,1213,1702,-1.104437518,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-17,1-66,Dedicator of cytokinesis protein 1,Stability,cDNA display proteolysis,DOCK1_MOUSE_2023-08-07_b03.a2m,1,66,66,0.3,0.2,705447,0.848,56,22172.3,395.9339286,High,55,0.9821428571,Tsuboyama2023_Dataset2_Dataset14,ddG_ML_float,1,mut_type,DOCK1_MOUSE_theta0.2_2023-08-07_b03.npy,DOCK1_MOUSE.pdb,1-66,1,,Stability +DYR_ECOLI_Nguyen_2023,DYR_ECOLI_Nguyen_2023.csv,DYR_ECOLI,Prokaryote,Escherichia coli,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR,159,FALSE,2916,2916,0,0.8,manual,Nguyen,The Genetic Landscape of a Metabolic Interaction,2023,10.1101/2023.05.28.542639,2-159,DHFR,cell growth in ∆DHFR bacteria,Growth,DYR_ECOLI_2023-08-07_b01.a2m,1,159,159,0.1,0.2,188828,0.969,154,47685.7,309.6474026,High,337,2.188311688,542639_file03.xlsx,Avg Growth - WT TYMS,1,Mutation,DYR_ECOLI_theta0.2_2023-08-07_b01.npy,DYR_ECOLI.pdb,1-159,1,,OrganismalFitness +DYR_ECOLI_Thompson_2019,DYR_ECOLI_Thompson_2019.csv,DYR_ECOLI,Prokaryote,Escherichia coli,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR,159,FALSE,2363,2363,0,-0.5,manual,Thompson,Altered expression of a quality control protease in E. coli reshapes the in vivo mutational landscape of a model enzyme,2019,10.7554/eLife.53476,2-159,DHFR reductase,"growth (turbidostat; -Lon for natural absence of Lon protease in E. coli, +Lon for exogenous protease)",Growth,DYR_ECOLI_full_11-26-2021_b08.a2m,1,159,159,0.8,0.2,41921,0.981,156,12203.2,78.22564103,medium,265,1.698717949,DYR_ECOLI_Thompson_plusLon_2019.csv,PlusLon_selection_coefficient,1,mutant,DYR_ECOLI_theta_0.2.npy,DYR_ECOLI.pdb,1-159,0.1,,OrganismalFitness +ENV_HV1B9_DuenasDecamp_2016,ENV_HV1B9_DuenasDecamp_2016.csv,ENV_HV1B9,Virus,Human immunodeficiency virus type 1 group M subtype B (strain 89.6) (HIV-1),MRVKEIRKNWQHLRGGILLLGMLMICSAAKEKTWVTIYYGVPVWREATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLGNVTENFNMWKNNMVDQMHEDIISLWDESLKPCVKLTPLCVTLNCTNLNITKNTTNPTSSSWGMMEKGEIKNCSFYITTSIRNKVKKEYALFNRLDVVPIENTNNTKYRLISCNTSVITQACPKVSFQPIPIHYCVPAGFAMLKCNNKTFNGSGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEDIVIRSENFTDNAKTIIVQLNESVVINCTRPNNNTRRRLSIGPGRAFYARRNIIGDIRQAHCNISRAKWNNTLQQIVIKLREKFRNKTIAFNQPSGGDPEIVRHSFNCGGEFFYCNTAQLFNSTWNVTGGTNGTEGNDIITLQCRIKQIINMWQKVGKAMYAPPITGQIRCSSNITGLLLTRDGGNSTETETEIFRPGGGDMRDNWRSELYKYKVVRIEPIGVAPTRAKRRTVQREKRAVGIGAVFLGFLGAAGSTMGAASVTLTVQARLLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLALERYLRDQQLMGIWGCSGKLICTTSVPWNVSWSNKSVDDIWNNMTWMEWEREIDNYTDYIYDLLEKSQTQQEKNEKELLELDKWASLWNWFDITNWLWYIRLFIMIVGGLIGLRIVFAVLSIVNRVRQGYSPLSFQTLLPASRGPDRPEGTEEEGGERDRDRSGPLVNGFLALFWVDLRNLCLFLYHLLRNLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIKIVQRACRAIRNIPTRIRQGLERALL,853,FALSE,375,375,0,-0.8,manual,Duenas-Decamp,Saturation Mutagenesis of the HIV-1 Envelope CD4 Binding Loop Reveals Residues Controlling Distinct Trimer Conformations,2016,10.1371/journal.ppat.1005988,361-380,HIV env,Viral replication,Growth,ENV_HV1B9_S364P-M373R_b0.3.a2m,1,853,853,0.3,0.01,87271,0.989,844,11807.8,13.99028436,medium,947,1.122037915,ENV_HV1B9_DuenasDecamp_2016.csv,Fitness_Effect,1,mutant,ENV_HV1B9_theta_0.01.npy,ENV_HV1B9.pdb,1-853,0.1,,OrganismalFitness +ENV_HV1BR_Haddox_2016,ENV_HV1BR_Haddox_2016.csv,ENV_HV1BR,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MRVKEKYQHLWRWGWKWGTMLLGILMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLGNATNTNSSNTNSSSGEMMMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYTLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSANFTDNAKTIIVQLNQSVEINCTRPNNNTRKSIRIQRGPGRAFVTIGKIGNMRQAHCNISRAKWNATLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQFINMWQEVGKAMYAPPISGQIRCSSNITGLLLTRDGGNNNNGSEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNNMTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKIFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL,861,FALSE,12863,12863,0,0.0191127558,median,Haddox,Experimental Estimation of the Effects of All Amino-Acid Mutations to HIV‚Äôs Envelope Protein on Viral Replication in Cell Culture,2016,10.1371/journal.ppat.1006114,31-707,HIV env,Viral replication,Growth,ENV_HV1BR_theta0.99_full_11-26-2021_b09.a2m,1,861,861,0.9,0.01,74844,0.98,844,36809.8,43.61350711,medium,2359,2.795023697,ENV_HV1BR_Haddox_2016.csv,score,1,mutant,ENV_HV1BR_theta_0.01.npy,ENV_HV1BR.pdb,1-861,0.1,,OrganismalFitness +ENVZ_ECOLI_Ghose_2023,ENVZ_ECOLI_Ghose_2023.csv,ENVZ_ECOLI,Prokaryote,Escherichia coli,LADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEECNAIIEQFIDYLR,60,FALSE,1121,1121,0,3.5,manual,Ghose,Marginal specificity in protein interactions constrains evolution of a paralogous family,2023,10.1073/pnas.2221163120,1-60,EnvZ kinase,fluorescent reporter,FACS,ENVZ_ECOLI_2023-08-07_b02.a2m,1,60,60,0.2,0.2,1879223,0.933,56,254652.1,4547.358929,High,55,0.9821428571,ENVZ_ECOLI_Ghose_2023.csv,mean_on,1,mutant,ENVZ_ECOLI_theta0.2_2023-08-07_b02.npy,ENVZ_ECOLI.pdb,1-60,1,,Activity +EPHB2_HUMAN_Tsuboyama_2023_1F0M,EPHB2_HUMAN_Tsuboyama_2023_1F0M.csv,EPHB2_HUMAN,Human,Homo sapiens,SFNTVDEWLEAIKMGQYKESFANAGFTSFDVVSQMMMEDILRVGVTLAGHQKKILNSIQVMRAQMN,66,TRUE,1960,1239,721,-1.932053964,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-18,1-66,Ephrin type-B receptor 2,Stability,cDNA display proteolysis,EPHB2_HUMAN_2023-08-07_b04.a2m,1,66,66,0.4,0.2,212234,0.894,59,8426.3,142.8186441,High,29,0.4915254237,Tsuboyama2023_Dataset2_Dataset15,ddG_ML_float,1,mut_type,EPHB2_HUMAN_theta0.2_2023-08-07_b04.npy,EPHB2_HUMAN.pdb,1-66,1,,Stability +ERBB2_HUMAN_Elazar_2016,ERBB2_HUMAN_Elazar_2016.csv,ERBB2_HUMAN,Human,Homo sapiens,MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV,1255,FALSE,326,326,0,0.0678339381,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,651-674,ErbB2 membrane domain,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,ERBB2_HUMAN_2023-10-12_b02.a2m,1,1255,1255,0.2,0.2,8311,0.981,1231,447.9,0.363850528,Low,187,0.1519090171,urn_mavedb_00000051-b-1_scores.csv,score,-1,mutant,ERBB2_HUMAN_theta0.2_2023-10-12_b02.npy,ERBB2_HUMAN.pdb,1-1255,1,650,Expression +ESTA_BACSU_Nutschel_2020,ESTA_BACSU_Nutschel_2020.csv,ESTA_BACSU,Prokaryote,Bacillus subtilis,MKFVKRRIIALVTILMLSVTSLFALQPSAKAAEHNPVVMVHGIGGASFNFAGIKSYLVSQGWSRDKLYAVDFWDKTGTNYNNGPVLSRFVQKVLDETGAKKVDIVAHSMGGANTLYYIKNLDGGNKVANVVTLGGANRLTTGKALPGTDPNQKILYTSIYSSADMIVMNYLSRLDGARNVQIHGVGHIGLLYSSQVNSLIKEGLNGGGQNTN,212,FALSE,2172,2172,0,46.34,median,Nutschel,Systematically Scrutinizing the Impact of Substitution Sites on Thermostability and Detergent Tolerance for Bacillus subtilis Lipase A,2020,10.1021/acs.jcim.9b00954,32-205,estA,thermostability,thermostability,ESTA_BACSU_full_11-26-2021_b03.a2m,1,212,212,0.3,0.2,234310,0.774,164,64492.5,393.2469512,high,292,1.780487805,ESTA_BACSU_Nutschel_2020.csv,T50,1,Variants of BsLipA,ESTA_BACSU_theta_0.2.npy,ESTA_BACSU.pdb,1-212,0.1,,Stability +F7YBW7_MESOW_Ding_2023,F7YBW7_MESOW_Ding_2023.csv,F7YBW7_MESOW,Prokaryote,M. opportunism,MANVEKMSVAVTPQQAAVMREAVEAGEYATASEIVREAVRDWLAKRELRHDDIRRLRQLWDEGKASGRPEPVDFDALRKEARQKLTEVPPNGR,93,TRUE,7922,80,7842,0.3,manual,Ding,Protein design using structure-based residue preferences,2023,10.1101/2022.10.31.514613,48-82,Antitoxin ParD3,growth enrichment,,F7YBW8_MESOW_full_01-07-2022_b02.a2m,1,93,93,0.2,0.2,38613,0.774,72,16262.4,225.8666667,high,31,0.4305555556,df_at_10pos.csv,DMS_score,1,mutant,F7YBW8_MESOW_theta_0.2.npy,F7YBW7_MESOW.pdb,1-93,1,,OrganismalFitness +F7YBW8_MESOW_Aakre_2015,F7YBW8_MESOW_Aakre_2015.csv,F7YBW8_MESOW,Prokaryote,Mesorhizobium opportunistum (strain LMG 24607 / HAMBI 3007 / WSM2075),MANVEKMSVAVTPQQAAVMREAVEAGEYATASEIVREAVRDWLAKRELRHDDIRRLRQLWDEGKASGRPEPVDFDALRKEARQKLTEVPPNGR,93,TRUE,9192,37,9155,-0.001724,median,Aakre,Evolving New Protein-Protein Interaction Specificity through Promiscuous Intermediates,2015,10.1016/j.cell.2015.09.055,59-64,Antitoxin ParD3,fitness,Growth (antitoxin neutralization of ParE3),F7YBW8_MESOW_full_01-07-2022_b02.a2m,1,93,93,0.2,0.2,38613,0.774,72,16262.4,225.8666667,high,31,0.4305555556,F7YBW8_MESOW_Aakre_2015.csv,fitness,1,mutant,F7YBW8_MESOW_theta_0.2.npy,F7YBW8_MESOW.pdb,1-93,0.1,,OrganismalFitness +FECA_ECOLI_Tsuboyama_2023_2D1U,FECA_ECOLI_Tsuboyama_2023_2D1U.csv,FECA_ECOLI,Eukaryote,Escherichia coli,QVNIAPGSLDKALNQYAAHSGFTLSVDASLTRGKQSNGLHGDYDVESGLQQLLDGSGLQVKPLGNNSWTLEP,72,TRUE,1886,1219,667,-0.813576222,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-19,1-72,Fe(3+) dicitrate transport protein FecA,Stability,cDNA display proteolysis,FECA_ECOLI_2023-08-07_b06.a2m,1,72,72,0.6,0.2,74248,0.986,71,9949.9,140.1394366,High,63,0.8873239437,Tsuboyama2023_Dataset2_Dataset16,ddG_ML_float,1,mut_type,FECA_ECOLI_theta0.2_2023-08-07_b06.npy,FECA_ECOLI.pdb,1-72,1,,Stability +FKBP3_HUMAN_Tsuboyama_2023_2KFV,FKBP3_HUMAN_Tsuboyama_2023_2KFV.csv,FKBP3_HUMAN,Human,Homo sapiens,VPQRAWTVEQLRSEQLPKKDIIKFLQEHGSDSFLAEHKLLGNIKNVAKTANKDHLVTAYNHLFETKRFK,69,FALSE,1237,1237,0,-0.1631252002,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-20,1-69,Peptidyl-prolyl cis-trans isomerase FKBP3,Stability,cDNA display proteolysis,FKBP3_HUMAN_2023-08-07_b03.a2m,1,69,69,0.3,0.2,3216,0.957,66,132,2,Medium,7,0.1060606061,Tsuboyama2023_Dataset2_Dataset17,ddG_ML_float,1,mut_type,FKBP3_HUMAN_theta0.2_2023-08-07_b03.npy,FKBP3_HUMAN.pdb,1-69,1,,Stability +GAL4_YEAST_Kitzman_2015,GAL4_YEAST_Kitzman_2015.csv,GAL4_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MKLLSSIEQACDICRLKKLKCSKEKPKCAKCLKNNWECRYSPKTKRSPLTRAHLTEVESRLERLEQLFLLIFPREDLDMILKMDSLQDIKALLTGLFVQDNVNKDAVTDRLASVETDMPLTLRQHRISATSSSEESSNKGQRQLTVSIDSAAHHDNSTIPLDFMPRDALHGFDWSEEDDMSDGLPFLKTDPNNNGFFGDGSLLCILRSIGFKPENYTNSNVNRLPTMITDRYTLASRSTTSRLLQSYLNNFHPYCPIVHSPTLMMLYNNQIEIASKDQWQILFNCILAIGAWCIEGESTDIDVFYYQNAKSHLTSKVFESGSIILVTALHLLSRYTQWRQKTNTSYNFHSFSIRMAISLGLNRDLPSSFSDSSILEQRRRIWWSVYSWEIQLSLLYGRSIQLSQNTISFPSSVDDVQRTTTGPTIYHGIIETARLLQVFTKIYELDKTVTAEKSPICAKKCLMICNEIEEVSRQAPKFLQMDISTTALTNLLKEHPWLSFTRFELKWKQLSLIIYVLRDFFTNFTQKKSQLEQDQNDHQSYEVKRCSIMLSDAAQRTVMSVSSYMDNHNVTPYFAWNCSYYLFNAVLVPIKTLLSNSKSNAENNETAQLLQQINTVLMLLKKLATFKIQTCEKYIQVLEEVCAPFLLSQCAIPLPHISYNNSNGSAIKNIVGSATIAQYPTLPEENVNNISVKYVSPGSVGPSPVPLKSGASFSDLVKLLSNRPPSRNSPVTIPRSTPSHRSVTPFLGQQQQLQSLVPLTPSALFGGANFNQSGNIADSSLSFTFTNSSNGPNLITTQTNSQALSQPIASSNVHDNFMNNEITASKIDDGNNSKPLSPGWTDQTAYNAFGITTGMFNTTTMDDVYNYLFDDEDTPPNPKKE,881,FALSE,1195,1195,0,-8,manual,Kitzman,Massively parallel single-amino-acid mutagenesis,2015,10.1038/nmeth.3223,2-65,GAL4,"Growth (no selection, 24h)",Growth,GAL4_YEAST_full_11-26-2021_b02.a2m,1,881,881,0.2,0.2,16159,0.707,623,7942.3,12.74847512,medium,163,0.2616372392,GAL4_YEAST_Kitzman_2015.csv,SEL_C_64h,1,mutant,GAL4_YEAST_theta_0.2.npy,GAL4_YEAST.pdb,1-881,0.1,,OrganismalFitness +GCN4_YEAST_Staller_2018,GCN4_YEAST_Staller_2018.csv,GCN4_YEAST,Eukaryote,Saccharomyces cerevisiae,MSEYQPSLFALNPMGFSPLDGSKSTNENVSASTSTAKPMVGQLIFDKFIKTEEDPIIKQDTPSNLDFDFALPQTATAPDAKTVLPIPELDDAVVESFFSSSTDSTPMFEYENLEDNSKEWTSLFDNDIPVTTDDVSLADKAIESTEEVSLVPSNLEVSTTSFLPTPVLEDAKLTQTRKVKKPNSVVKKSHHVGKDDESRLDHLGVVAYNRKQRSIPLSPIVPESSDPAALKRARNTEAARRSRARKLQRMKQLEDKVEELLSKNYHLENEVARLKKLVGER,281,TRUE,2638,33,2605,1.293757864,median,Staller,A High-Throughput Mutational Scan of an Intrinsically Disordered Acidic Transcriptional Activation Domain,2018,10.1016/j.cels.2018.01.015,101-144,Gcn4,Binding,FACS,GCN4_YEAST_full_24-02-2022_b03.a2m,1,281,281,0.3,0.2,350,0.719,202,177.9,0.8806930693,low,1,0.00495049505,GCN4_YEAST_Staller_2018.csv,Induction,1,mutant,GCN4_YEAST_theta_0.2.npy,GCN4_YEAST.pdb,1-281,0.1,,Binding +GDIA_HUMAN_Silverstein_2021,GDIA_HUMAN_Silverstein_2021.csv,GDIA_HUMAN,Human,Homo sapiens,MDEEYDVIVLGTGLTECILSGIMSVNGKKVLHMDRNPYYGGESSSITPLEELYKRFQLLEGPPESMGRGRDWNVDLIPKFLMANGQLVKMLLYTEVTRYLDFKVVEGSFVYKGGKIYKVPSTETEALASNLMGMFEKRRFRKFLVFVANFDENDPKTFEGVDPQTTSMRDVYRKFDLGQDVIDFTGHALALYRTDDYLDQPCLETVNRIKLYSESLARYGKSPYLYPLYGLGELPQGFARLSAIYGGTYMLNKPVDDIIMENGKVVGVKSEGEVARCKQLICDPSYIPDRVRKAGQVIRIICILSHPIKNTNDANSCQIIIPQNQVNRKSDIYVCMISYAHNVAAQGKYIAIASTTVETTDPEKEVEPALELLEPIDQKFVAISDLYEPIDDGCESQVFCSCSYDATTHFETTCNDIKDIYKRMAGTAFDFENMKRKQNDVFGEAEQ,447,FALSE,1154,1154,0,0.8425936955,median,Silverstein,A systematic genotype-phenotype map for missense variants in the human intellectual disability-associated gene GDI1,2021,10.1101/2021.10.06.463360,2-447,GDI1,Yeast Growth,Growth,GDIA_HUMAN_2023-10-12_b05.a2m,1,447,447,0.5,0.2,5196,0.996,445,398.1,0.8946067416,Low,86,0.193258427,media-1.xlsx,fitness,1,mutant,GDIA_HUMAN_theta0.2_2023-10-12_b05.npy,GDIA_HUMAN.pdb,1-447,1,,OrganismalFitness +GFP_AEQVI_Sarkisyan_2016,GFP_AEQVI_Sarkisyan_2016.csv,GFP_AEQVI,Eukaryote,Aequorea victoria,MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK,238,TRUE,51714,1084,50630,2.5,manual,Sarkisyan,Local fitness landscape of the green fluorescent protein,2016,10.1038/nature17995,3-237,GFP,Fluorescence,FACS,GFP_AEQVI_full_04-29-2022_b08.a2m,1,238,238,0.8,0.2,396,0.975,232,14.9,0.06422413793,low,0,0,GFP_AEQVI_Sarkisyan_2016.csv,mean_medianBrightness_per_aaseq,1,mutant,GFP_AEQVI_theta_0.2.npy,GFP_AEQVI.pdb,1-238,0.1,,Activity +GLPA_HUMAN_Elazar_2016,GLPA_HUMAN_Elazar_2016.csv,GLPA_HUMAN,Human,Homo sapiens,MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFGVMAGVIGTILLISYGIRRLIKKSPSDVKPLPSPDTDVPLSSVEIENPETSDQ,150,FALSE,245,245,0,0.9321105779,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,94-108,Glycophorin A membrane domain,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,GLPA_HUMAN_2023-10-12_b03.a2m,1,150,150,0.3,0.2,273,1,150,81,0.54,Low,1,0.006666666667,urn_mavedb_00000051-c-1_scores.csv,score,-1,mutant,GLPA_HUMAN_theta0.2_2023-10-12_b03.npy,GLPA_HUMAN.pdb,1-150,1,93,Expression +GRB2_HUMAN_Faure_2021,GRB2_HUMAN_Faure_2021.csv,GRB2_HUMAN,Human,Homo sapiens,MEAIAKYDFKATADDELSFKRGDILKVLNEECDQNWYKAELNGKDGFIPKNYIEMKPHPWFFGKIPRAKAEEMLSKQRHDGAFLIRESESAPGDFSLSVKFGNDVQHFKVLRDGAGKYFLWVVKFNSLNELVDYHRSTSVSRNQQIFLRDIEQVPQQPTYVQALFDFDPQEDGELGFRRGDFIHVMDNSDPNWWKGACHGQTGMFPRNYVTPVNRNV,217,TRUE,63366,1034,62332,-0.7,manual,Faure,Mapping the energetic and allosteric landscapes of protein binding domains,2022,10.1038/s41586-022-04586-4,159-214,GRB2-SH3,Yeast growth,Growth,GRB2_HUMAN_full_11-26-2021_b05.a2m,1,217,217,0.5,0.2,33228,0.816,177,1485.9,8.394915254,medium,42,0.2372881356,GRB2_HUMAN_Faure_2021.csv,fitness,1,mutant,GRB2_HUMAN_theta_0.2.npy,GRB2_HUMAN.pdb,1-217,0.1,,OrganismalFitness +HCP_LAMBD_Tsuboyama_2023_2L6Q,HCP_LAMBD_Tsuboyama_2023_2L6Q.csv,HCP_LAMBD,Virus,Escherichia phage lambda,VRQEELAAARAALHDLMTGKRVATVQKDGRRVEFTATSVSDLKKYIAELEVQTGM,55,FALSE,1040,1040,0,-0.350614016,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-21,1-55,Head completion protein,Stability,cDNA display proteolysis,HCP_LAMBD_2023-08-07_b05.a2m,1,55,55,0.5,0.01,2128,0.945,52,606.5,11.66346154,Medium,15,0.2884615385,Tsuboyama2023_Dataset2_Dataset18,ddG_ML_float,1,mut_type,HCP_LAMBD_theta0.01_2023-08-07_b05.npy,HCP_LAMBD.pdb,1-55,1,,Stability +HECD1_HUMAN_Tsuboyama_2023_3DKM,HECD1_HUMAN_Tsuboyama_2023_3DKM.csv,HECD1_HUMAN,Human,Homo sapiens,NLYFQGLKYMVPGARVTRGLDWKWRDQDGSPQGEGTVTGELHNGWIDVTWDAGGSNSYRMGAEGKFDLKLAP,72,TRUE,5586,1244,4342,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-22,1-72,E3 ubiquitin-protein ligase HECTD1,Stability,cDNA display proteolysis,HECD1_HUMAN_2023-08-07_b03.a2m,1,72,72,0.3,0.2,18660,0.903,65,1192.3,18.34307692,Medium,24,0.3692307692,Tsuboyama2023_Dataset2_Dataset19,ddG_ML_float,1,mut_type,HECD1_HUMAN_theta0.2_2023-08-07_b03.npy,HECD1_HUMAN.pdb,1-72,1,,Stability +HEM3_HUMAN_Loggerenberg_2023,HEM3_HUMAN_Loggerenberg_2023.csv,HEM3_HUMAN,Human,Homo sapiens,MSGNGNAAATAEENSPKMRVIRVGTRKSQLARIQTDSVVATLKASYPGLQFEIIAMSTTGDKILDTALSKIGEKSLFTKELEHALEKNEVDLVVHSLKDLPTVLPPGFTIGAICKRENPHDAVVFHPKFVGKTLETLPEKSVVGTSSLRRAAQLQRKFPHLEFRSIRGNLNTRLRKLDEQQEFSAIILATAGLQRMGWHNRVGQILHPEECMYAVGQGALGVEVRAKDQDILDLVGVLHDPETLLRCIAERAFLRHLEGGCSVPVAVHTAMKDGQLYLTGGVWSLDGSDSIQETMQATIHVPAQHEDGPEDDPQLVGITARNIPRGPQLAAQNLGISLANLLLSKGAKNILDVARQLNDAH,361,FALSE,5689,5689,0,0.6142990455,median,van Loggerenberg,Systematically testing human HMBS missense variants to reveal mechanism and pathogenic variation,2023,10.1101/2023.02.06.527353,19-360,hydroxymethylbilane synthase,activity,Yeast complementation,HEM3_HUMAN_2023-08-07_b02.a2m,1,361,361,0.2,0.2,59544,0.85,307,11510.2,37.49250814,Medium,500,1.628664495,,score,1,mutant,HEM3_HUMAN_theta0.2_2023-08-07_b02.npy,HEM3_HUMAN.pdb,1-361,1,,Activity +HIS7_YEAST_Pokusaeva_2019,HIS7_YEAST_Pokusaeva_2019.csv,HIS7_YEAST,Eukaryote,Saccharomyces cerevisiae,MTEQKALVKRITNETKIQIAISLKGGPLAIEHSIFPEKEAEAVAEQATQSQVINVHTGIGFLDHMIHALAKHSGWSLIVECIGDLHIDDHHTTEDCGIALGQAFKEALGAVRGVKRFGSGFAPLDEALSRAVVDLSNRPYAVVELGLQREKVGDLSCEMIPHFLESFAEASRITLHVDCLRGKNDHHRSESAFKALAVAIREATSPNGTNDVPSTKGVLM,220,TRUE,496137,168,495969,0.3,manual,Pokusaeva,An experimental assay of the interactions of amino acids from orthologous sequences shaping a complex fitness landscape,2019,10.1371/journal.pgen.1008079,6-211,IGP dehydratase (HIS3),Growth,Growth,HIS7_YEAST_full_11-26-2021_b09.a2m,1,220,220,0.9,0.2,40154,0.873,192,5191.3,27.03802083,medium,318,1.65625,HIS7_YEAST_Pokusaeva_2019.csv,selection,1,mutant,HIS7_YEAST_theta_0.2.npy,HIS7_YEAST.pdb,1-220,0.1,,OrganismalFitness +HMDH_HUMAN_Jiang_2019,HMDH_HUMAN_Jiang_2019.csv,HMDH_HUMAN,Human,Homo sapiens,MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGWNYECPKFEEDVLSSDIIILTITRCIAILYIYFQFQNLRQLGSKYILGIAGLFTIFSSFVFSTVVIHFLDKELTGLNEALPFFLLLIDLSRASTLAKFALSSNSQDEVRENIARGMAILGPTFTLDALVECLVIGVGTMSGVRQLEIMCCFGCMSVLANYFVFMTFFPACVSLVLELSRESREGRPIWQLSHFARVLEEEENKPNPVTQRVKMIMSLGLVLVHAHSRWIADPSPQNSTADTSKVSLGLDENVSKRIEPSVSLWQFYLSKMISMDIEQVITLSLALLLAVKYIFFEQTETESTLSLKNPITSPVVTQKKVPDNCCRREPMLVRNNQKCDSVEEETGINRERKVEVIKPLVAETDTPNRATFVVGNSSLLDTSSVLVTQEPEIELPREPRPNEECLQILGNAEKGAKFLSDAEIIQLVNAKHIPAYKLETLMETHERGVSIRRQLLSKKLSEPSSLQYLPYRDYNYSLVMGACCENVIGYMPIPVGVAGPLCLDEKEFQVPMATTEGCLVASTNRGCRAIGLGGGASSRVLADGMTRGPVVRLPRACDSAEVKAWLETSEGFAVIKEAFDSTSRFARLQKLHTSIAGRNLYIRFQSRSGDAMGMNMISKGTEKALSKLHEYFPEMQILAVSGNYCTDKKPAAINWIEGRGKSVVCEAVIPAKVVREVLKTTTEAMIEVNINKNLVGSAMAGSIGGYNAHAANIVTAIYIACGQDAAQNVGSSNCITLMEASGPTNEDLYISCTMPSIEIGTVGGGTNLLPQQACLQMLGVQGACKDNPGENARQLARIVCGTVMAGELSLMAALAAGHLVKSHMIHNRSKINLQDLQGACTKKTA,888,FALSE,16853,16853,0,0.48275,median,Jiang,Exhaustive mapping of missense variation in coronary heart disease-related genes,2019,https://hdl.handle.net/1807/98076,2-888,3-hydroxy-3-methylglutaryl-coenzyme A reductase,Fitness,Resistance to statin inhibition,HMDH_HUMAN_2023-10-12_b05.a2m,1,888,888,0.5,0.2,3153,0.995,884,554.6,0.6273755656,Low,778,0.8800904977,urn_mavedb_00000035-a-1_scores.csv,score,1,mutant,HMDH_HUMAN_theta0.2_2023-10-12_b05.npy,HMDH_HUMAN.pdb,1-888,1,,OrganismalFitness +HSP82_YEAST_Cote-Hammarlof_2020_growth-H2O2,HSP82_YEAST_Cote-Hammarlof_2020_growth-H2O2.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,FALSE,2252,2252,0,-0.0020874765,median,Cote-Hammarlof,The Adaptive Potential of the Middle Domain of Yeast Hsp90,2020,10.1093/molbev/msaa211,291-409,HSP82,Growth (H2O2 stress),,HSP82_YEAST_2023-08-07_b01.a2m,1,709,709,0.1,0.2,48695,0.917,650,4395.2,6.761846154,Medium,531,0.8169230769,,score,1,mut_proteingym,HSP82_YEAST_theta0.2_2023-08-07_b01.npy,HSP82_YEAST.pdb,1-709,1,290,OrganismalFitness +HSP82_YEAST_Flynn_2019,HSP82_YEAST_Flynn_2019.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,FALSE,13294,13294,0,-0.3,manual,Flynn,Comprehensive fitness maps of Hsp90 show widespread environmental dependence,2019,10.7554/eLife.53810,2-709,HSP82,"growth, nitrogen depletion (0.0125% ammonium sulfate), hyperosmotic shock (0.8 M NaCl), alcohol stress (7.5% ethanol), sulfhydryl-oxidation (0.85 mM diamide), temperature shock (37C)",,HSP82_YEAST_full_11-26-2021_b01.a2m,1,709,709,0.1,0.2,38923,0.862,611,3684.8,6.030769231,medium,433,0.7086743044,HSP82_YEAST_Flynn_2019.csv,s (37°C),1,mutant,HSP82_YEAST_theta_0.2.npy,HSP82_YEAST.pdb,1-709,1,,OrganismalFitness +HSP82_YEAST_Mishra_2016,HSP82_YEAST_Mishra_2016.csv,HSP82_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MASETFEFQAEITQLMSLIINTVYSNKEIFLRELISNASDALDKIRYKSLSDPKQLETEPDLFIRITPKPEQKVLEIRDSGIGMTKAELINNLGTIAKSGTKAFMEALSAGADVSMIGQFGVGFYSLFLVADRVQVISKSNDDEQYIWESNAGGSFTVTLDEVNERIGRGTILRLFLKDDQLEYLEEKRIKEVIKRHSEFVAYPIQLVVTKEVEKEVPIPEEEKKDEEKKDEEKKDEDDKKPKLEEVDEEEEKKPKTKKVKEEVQEIEELNKTKPLWTRNPSDITQEEYNAFYKSISNDWEDPLYVKHFSVEGQLEFRAILFIPKRAPFDLFESKKKKNNIKLYVRRVFITDEAEDLIPEWLSFVKGVVDSEDLPLNLSREMLQQNKIMKVIRKNIVKKLIEAFNEIAEDSEQFEKFYSAFSKNIKLGVHEDTQNRAALAKLLRYNSTKSVDELTSLTDYVTRMPEHQKNIYYITGESLKAVEKSPFLDALKAKNFEVLFLTDPIDEYAFTQLKEFEGKTLVDITKDFELEETDEEKAEREKEIKEYEPLTKALKEILGDQVEKVVVSYKLLDAPAAIRTGQFGWSANMERIMKAQALRDSSMSSYMSSKKTFEISPKSPIIKELKKRVDEGGAQDKTVKDLTKLLYETALLTSGFSLDEPTSFASRINRLISLGLNIDEDEETETAPEASTAAPVEEVPADTEMEEVD,709,FALSE,4323,4323,0,-0.4,manual,Mishra,Systematic Mutant Analyses Elucidate General and Client-Specific Aspects of Hsp90 Function,2016,10.1016/j.celrep.2016.03.046,2-231,HSP82,Growth,Growth,HSP82_YEAST_full_11-26-2021_b01.a2m,1,709,709,0.1,0.2,38923,0.862,611,3684.8,6.030769231,medium,433,0.7086743044,HSP82_YEAST_Mishra_2016.csv,selection_coefficient,1,mutant,HSP82_YEAST_theta_0.2.npy,HSP82_YEAST.pdb,1-709,0.1,,OrganismalFitness +HXK4_HUMAN_Gersing_2022_activity,HXK4_HUMAN_Gersing_2022_activity.csv,HXK4_HUMAN,Human,Homo sapiens (Human),MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,465,FALSE,8570,8570,0,0.5631652235,median,Gersing,A comprehensive map of human glucokinase variant activity,2022,10.1101/2022.05.04.490571,2-465,glucokinase regulatory protein,functional complementation to reduced growth on glucose medium,enzymatic activity,HXK4_HUMAN_b0.1.a2m,1,465,465,0.1,0.2,23354,1,465,2336.1,5.023870968,Medium,181,0.3892473118,HXK4_HUMAN_Gersing_2022.csv,score,1,mutant,HXK4_HUMAN_theta_0.2.npy,HXK4_HUMAN.pdb,1-465,1,,OrganismalFitness +HXK4_HUMAN_Gersing_2023_abundance,HXK4_HUMAN_Gersing_2023_abundance.csv,HXK4_HUMAN,Human,Homo sapiens,MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,465,FALSE,8396,8396,0,0.6,manual,Gersing,Characterizing glucokinase variant mechanisms using a multiplexed abundance assay,2023,10.1101/2023.05.24.542036,2-465,GCK,abundance,Growth,HXK4_HUMAN_2023-08-07_b01.a2m,1,465,465,0.1,0.2,24177,0.966,449,2626.4,5.849443207,Medium,170,0.3786191537,HXK4_HUMAN_Gersing_2022.csv,score,1,mutant,HXK4_HUMAN_theta0.2_2023-08-07_b01.npy,HXK4_HUMAN.pdb,1-465,1,,Expression +I6TAH8_I68A0_Doud_2015,I6TAH8_I68A0_Doud_2015.csv,I6TAH8_I68A0,Virus,"Influenza A virus (strain A/Puerto Rico/8/1934 H1N1), Influenza A virus (strain A/Aichi/2/1968 H3N2)",MASQGTKRSYEQMETDGERQNATEIRASVGKMIDGIGRFYIQMCTELKLSDYEGRLIQNSLTIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYKRVDRKWMRELVLYDKEEIRRIWRQANNGDDATAGLTHMMIWHSNLNDTTYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELIRMIKRGINDRNFWRGENGRKTRSAYERMCNILKGKFQTAAQRAMMDQVRESRNPGNAEIEDLIFLARSALILRGSVAHKSCLPACVYGPAVASGYDFEKEGYSLVGIDPFKLLQNSQVYSLIRPNENPAHKSQLVWMACNSAAFEDLRVLSFIRGTKVSPRGKLSTRGVQIASNENMDAMESSTLELRSRYWAIRTRSGGNTNQQRASAGQISVQPAFSVQRNLPFDKPTIMAAFTGNTEGRTSDMRAEIIRMMEGAKPEEMSFQGRGVFELSDERAANPIVPSFDMSNEGSYFFGDNAEEYDN,498,FALSE,9462,9462,0,-2.329469119,median,Doud,Site-Specific Amino Acid Preferences Are Mostly Conserved in Two Closely Related Protein Homologs,2015,10.1093/molbev/msv167,1-498,Influenza nucleoprotein,,Growth,I6TAH8_I68A0_theta0.99_full_11-26-2021_b09.a2m,1,498,498,0.9,0.01,15390,1,498,1493.3,2.998594378,medium,2118,4.253012048,I6TAH8_I68A0_Doud_2015.csv,log_fitness_by_syn_mut_fitness,1,mutant,I6TAH8_I68A0_theta_0.01.npy,I6TAH8_I68A0.pdb,1-498,0.1,,OrganismalFitness +IF1_ECOLI_Kelsic_2016,IF1_ECOLI_Kelsic_2016.csv,IF1_ECOLI,Prokaryote,Escherichia coli,MAKEDNIEMQGTVLETLPNTMFRVELENGHVVTAHISGKMRKNYIRILTGDKVTVELTPYDLSKGRIVFRSR,72,FALSE,1367,1367,0,0.8,manual,Kelsic,RNA Structural Determinants of Optimal Codons Revealed by MAGE-Seq,2016,10.1016/j.cels.2016.11.004,1-72,infA,Growth,Growth,IF1_ECOLI_full_11-26-2021_b02.a2m,1,72,72,0.2,0.2,361806,0.806,58,38189,658.4310345,high,46,0.7931034483,IF1_ECOLI_Kelsic_2016.csv,fitness_rich,1,mutant,IF1_ECOLI_theta_0.2.npy,IF1_ECOLI.pdb,1-72,0.1,,OrganismalFitness +ILF3_HUMAN_Tsuboyama_2023_2L33,ILF3_HUMAN_Tsuboyama_2023_2L33.csv,ILF3_HUMAN,Human,Homo sapiens,MLTKHGKNPVMELNEKRRGLKYELISETGGSHDKRFVMEVEVDGQKFQGAGSNKKVAKAYAALAALEKLFP,71,FALSE,1329,1329,0,-0.4,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-23,1-71,Interleukin enhancer-binding factor 3,Stability,cDNA display proteolysis,ILF3_HUMAN_2023-08-07_b03.a2m,1,71,71,0.3,0.2,145438,0.915,65,21228,326.5846154,High,57,0.8769230769,Tsuboyama2023_Dataset2_Dataset20,ddG_ML_float,1,mut_type,ILF3_HUMAN_theta0.2_2023-08-07_b03.npy,ILF3_HUMAN.pdb,1-71,1,,Stability +ISDH_STAAW_Tsuboyama_2023_2LHR,ISDH_STAAW_Tsuboyama_2023_2LHR.csv,ISDH_STAAW,Prokaryote,Staphylococcus aureus,YNLQKLLAPYHKAKTLERQVYELEKLQEKLPEKYKAEYKKKLDQTRVELADQVKS,55,TRUE,1944,940,1004,-0.7942702247,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-24,1-55,Iron-regulated surface determinant protein H,Stability,cDNA display proteolysis,ISDH_STAAW_2023-08-07_b01.a2m,1,55,55,0.1,0.2,115488,0.818,45,38123.1,847.18,High,6,0.1333333333,Tsuboyama2023_Dataset2_Dataset21,ddG_ML_float,1,mut_type,ISDH_STAAW_theta0.2_2023-08-07_b01.npy,ISDH_STAAW.pdb,1-55,1,,Stability +KCNE1_HUMAN_Muhammad_2023_expression,KCNE1_HUMAN_Muhammad_2023_expression.csv,KCNE1_HUMAN,Human,Homo sapiens,MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSGDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP,129,FALSE,2339,2339,0,0.75,manual,Muhammad,"High-throughput functional mapping of variants in an arrhythmia gene, KCNE1, reveals novel biology",2023,10.1101/2023.04.28.538612,1-128,KCNE1,cell surface expression,FACS,KCNE1_HUMAN_2023-08-07_b02.a2m,1,129,129,0.2,0.2,2118,0.969,125,213.7,1.7096,Medium,5,0.04,KCNE1_HUMAN_Muhammad_2023.csv,TrafScore,1,mutant,KCNE1_HUMAN_theta0.2_2023-08-07_b02.npy,KCNE1_HUMAN.pdb,1-129,1,,Expression +KCNE1_HUMAN_Muhammad_2023_function,KCNE1_HUMAN_Muhammad_2023_function.csv,KCNE1_HUMAN,Human,Homo sapiens,MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSGDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP,129,FALSE,2315,2315,0,0.9043514345,median,Muhammad,"High-throughput functional mapping of variants in an arrhythmia gene, KCNE1, reveals novel biology",2023,10.1101/2023.04.28.538612,1-128,KCNE1,potassium channel function,Growth,KCNE1_HUMAN_2023-08-07_b02.a2m,1,129,129,0.2,0.2,2118,0.969,125,213.7,1.7096,Medium,5,0.04,KCNE1_HUMAN_Muhammad_2023.csv,funcScore,1,mutant,KCNE1_HUMAN_theta0.2_2023-08-07_b02.npy,KCNE1_HUMAN.pdb,1-129,1,,Activity +KCNH2_HUMAN_Kozek_2020,KCNH2_HUMAN_Kozek_2020.csv,KCNH2_HUMAN,Human,Homo sapiens,MPVRRGHVAPQNTFLDTIIRKFEGQSRKFIIANARVENCAVIYCNDGFCELCGYSRAEVMQRPCTCDFLHGPRTQRRAAAQIAQALLGAEERKVEIAFYRKDGSCFLCLVDVVPVKNEDGAVIMFILNFEVVMEKDMVGSPAHDTNHRGPPTSWLAPGRAKTFRLKLPALLALTARESSVRSGGAGGAGAPGAVVVDVDLTPAAPSSESLALDEVTAMDNHVAGLGPAEERRALVGPGSPPRSAPGQLPSPRAHSLNPDASGSSCSLARTRSRESCASVRRASSADDIEAMRAGVLPPPPRHASTGAMHPLRSGLLNSTSDSDLVRYRTISKIPQITLNFVDLKGDPFLASPTSDREIIAPKIKERTHNVTEKVTQVLSLGADVLPEYKLQAPRIHRWTILHYSPFKAVWDWLILLLVIYTAVFTPYSAAFLLKETEEGPPATECGYACQPLAVVDLIVDIMFIVDILINFRTTYVNANEEVVSHPGRIAVHYFKGWFLIDMVAAIPFDLLIFGSGSEELIGLLKTARLLRLVRVARKLDRYSEYGAAVLFLLMCTFALIAHWLACIWYAIGNMEQPHMDSRIGWLHNLGDQIGKPYNSSGLGGPSIKDKYVTALYFTFSSLTSVGFGNVSPNTNSEKIFSICVMLIGSLMYASIFGNVSAIIQRLYSGTARYHTQMLRVREFIRFHQIPNPLRQRLEEYFQHAWSYTNGIDMNAVLKGFPECLQADICLHLNRSLLQHCKPFRGATKGCLRALAMKFKTTHAPPGDTLVHAGDLLTALYFISRGSIEILRGDVVVAILGKNDIFGEPLNLYARPGKSNGDVRALTYCDLHKIHRDDLLEVLDMYPEFSDHFWSSLEITFNLRDTNMIPGSPGSTELEGGFSRQRKRKLSFRRRTDKDTEQPGEVSALGPGRAGAGPSSRGRPGGPWGESPSSGPSSPESSEDEGPGRSSSPLRLVPFSSPRPPGEPPGGEPLMEDCEKSSDTCNPLSGAFSGVSNIFSFWGDSRGRQYQELPRCPAPTPSLLNIPLSSPGRRPRGDVESRLDALQRQLNRLETRLSADMATVLQLLQRQMTLVPPAYSAVTTPGPGPTSTSPLLPVSPLPTLTLDSLSQVSQFMACEELPPGAPELPQEGPTRRLSLPGQLGALTSQPLHRHGSDPGS,1159,FALSE,200,200,0,58.87492867,median,Kozek,High-throughput discovery of trafficking-deficient variants in the cardiac potassium channel KCNH2: Deep mutational scan of KCNH2 trafficking,2020,10.1016/j.hrthm.2020.05.041,545-555,KCNH2,Voltage,Voltage,KCNH2_HUMAN_535-565_11-26-2021_b05.a2m,535,565,31,0.5,0.2,13907,1,31,186.6,6.019354839,medium,1,0.03225806452,KCNH2_HUMAN_Kozek_2020.csv,score.ave,1,var,KCNH2_HUMAN_theta_0.2.npy,KCNH2_HUMAN.pdb,1-1159,0.1,,Activity +KCNJ2_MOUSE_Coyote-Maestas_2022_function,KCNJ2_MOUSE_Coyote-Maestas_2022_function.csv,KCNJ2_MOUSE,Human,Homo sapiens,MGSVRTNRYSIVSSEEDGMKLATMAVANGFGNGKSKVHTRQQCRSRFVKKDGHCNVQFINVGEKGQRYLADIFTTCVDIRWRWMLVIFCLAFVLSWLFFGCVFWLIALLHGDLDTSKVSKACVSEVNSFTAAFLFSIETQTTIGYGFRCVTDECPIAVFMVVFQSIVGCIIDAFIIGAVMAKMAKPKKRNETLVFSHNAVIAMRDGKLCLMWRVGNLRKSHLVEAHVRAQLLKSRITSEGEYIPLDQIDINVGFDSGIDRIFLVSPITIVHEIDEDSPLYDLSKQDIDNADFEIVVILEGMVEATAMTTQCRSSYLANEILWGHRYEPVLFEEKHYYKVDYSRFHKTYEVPNTPLCSARDLAEKKYILSNANSFCYENEVALTSKEEEEDSENGVPESTSTDSPPGIDLHNQASVPLEPRPLRRESEI,428,FALSE,6963,6963,0,0.039,median,Coyote-Maestas,"Determinants of trafficking, conduction, and disease within a K+ channel revealed through multiparametric deep mutational scanning",2022,10.7554/eLife.76903,2-392,Kir2.1,Ion conduction,FACS,KCNJ2_MOUSE_b01.a2m,1,428,428,0.1,0.2,20953,0.86,370,986.7,2.666756757,Medium,94,0.2540540541,,function_score,1,mutant_noflag,KCNJ2_MOUSE_b01_theta_0.2.npy,KCNJ2_MOUSE.pdb,1-428,1,,Activity +KCNJ2_MOUSE_Coyote-Maestas_2022_surface,KCNJ2_MOUSE_Coyote-Maestas_2022_surface.csv,KCNJ2_MOUSE,Human,Homo sapiens,MGSVRTNRYSIVSSEEDGMKLATMAVANGFGNGKSKVHTRQQCRSRFVKKDGHCNVQFINVGEKGQRYLADIFTTCVDIRWRWMLVIFCLAFVLSWLFFGCVFWLIALLHGDLDTSKVSKACVSEVNSFTAAFLFSIETQTTIGYGFRCVTDECPIAVFMVVFQSIVGCIIDAFIIGAVMAKMAKPKKRNETLVFSHNAVIAMRDGKLCLMWRVGNLRKSHLVEAHVRAQLLKSRITSEGEYIPLDQIDINVGFDSGIDRIFLVSPITIVHEIDEDSPLYDLSKQDIDNADFEIVVILEGMVEATAMTTQCRSSYLANEILWGHRYEPVLFEEKHYYKVDYSRFHKTYEVPNTPLCSARDLAEKKYILSNANSFCYENEVALTSKEEEEDSENGVPESTSTDSPPGIDLHNQASVPLEPRPLRRESEI,428,FALSE,6917,6917,0,-0.157352583,median,Coyote-Maestas,"Determinants of trafficking, conduction, and disease within a K+ channel revealed through multiparametric deep mutational scanning",2022,10.7554/eLife.76903,2-392,Kir2.1,Surface trafficking,FACS,KCNJ2_MOUSE_b01.a2m,1,428,428,0.1,0.2,20953,0.86,370,986.7,2.666756757,Medium,94,0.2540540541,,surface_score,1,mutant_noflag,KCNJ2_MOUSE_b01_theta_0.2.npy,KCNJ2_MOUSE.pdb,1-428,1,,Expression +KKA2_KLEPN_Melnikov_2014,KKA2_KLEPN_Melnikov_2014.csv,KKA2_KLEPN,Prokaryote,Klebsiella pneumoniae,MIEQDGLHAGSPAAWVERLFGYDWAQQTIGCSDAAVFRLSAQGRPVLFVKTDLSGALNELQDEAARLSWLATTGVPCAAVLDVVTEAGRDWLLLGEVPGQDLLSSHLAPAEKVSIMADAMRRLHTLDPATCPFDHQAKHRIERARTRMEAGLVDQDDLDEEHQGLAPAELFARLKARMPDGEDLVVTHGDACLPNIMVENGRFSGFIDCGRLGVADRYQDIALATRDIAEELGGEWADRFLVLYGIAAPDSQRIAFYRLLDEFF,264,FALSE,4960,4960,0,0.5,manual,Melnikov,Comprehensive mutational scanning of a kinasein vivoreveals substrate-dependent fitness landscapes,2014,10.1093/nar/gku511,1-264,"APH(3’)II, neo","Growth (225 ug/mL kanamycin) 1:1, 1:2, 1:4, 1:8 dilutions",Growth,KKA2_KLEPN_full_11-26-2021_b02.a2m,1,264,264,0.2,0.2,234760,0.795,210,76876.7,366.0795238,high,377,1.795238095,KKA2_KLEPN_Melnikov_2014.csv,Kan18_avg,1,mutant,KKA2_KLEPN_theta_0.2.npy,KKA2_KLEPN.pdb,1-264,0.1,,OrganismalFitness +LGK_LIPST_Klesmith_2015,LGK_LIPST_Klesmith_2015.csv,LGK_LIPST,Eukaryote,Lipomyces starkeyi (Oleaginous yeast),MPIATSTGDNVLDFTVLGLNSGTSMDGIDCALCHFYQKTPDAPMEFELLEYGEVPLAQPIKQRVMRMILEDTTSPSELSEVNVILGEHFADAVRQFAAERNVDLSTIDAIASHGQTIWLLSMPEEGQVKSALTMAEGAIIAARTGITSITDFRISDQAAGRQGAPLIAFFDALLLHHPTKLRACQNIGGIANVCFIPPDVDGRRTDEYYDFDTGPGNVFIDAVVRHFTNGEQEYDKDGAMGKRGKVDQELVDDFLKMPYFQLDPPKTTGREVFRDTLAHDLIRRAEAKGLSPDDIVATTTRITAQAIVDHYRRYAPSQEIDEIFMCGGGAYNPNIVEFIQQSYPNTKIMMLDEAGVPAGAKEAITFAWQGMECLVGRSIPVPTRVETRQHYVLGKVSPGLNYRSVMKKGMAFGGDAQQLPWVSEMIVKKKGKVITNNWA,439,FALSE,7890,7890,0,-0.6245,median,Klesmith,Comprehensive Sequence-Flux Mapping of a Levoglucosan Utilization Pathway in E. coli,2015,10.1021/acssynbio.5b00131,1-439,LGK (levoglucosan kinase),Growth,Growth,LGK_LIPST_full_11-26-2021_b03.a2m,1,439,439,0.3,0.2,31069,0.813,357,7971,22.32773109,medium,588,1.647058824,B3VI55_LIPST_Klesmith_2015.csv,SelectionTwo,1,mutant,LGK_LIPST_theta_0.2.npy,LGK_LIPST.pdb,1-439,0.1,,Activity +LYAM1_HUMAN_Elazar_2016,LYAM1_HUMAN_Elazar_2016.csv,LYAM1_HUMAN,Human,Homo sapiens,MIFPWKCQSTQRDLWNIFKLWGWTMLCCDFLAHHGTDCWTYHYSEKPMNWQRARRFCRDNYTDLVAIQNKAEIEYLEKTLPFSRSYYWIGIRKIGGIWTWVGTNKSLTEEAENWGDGEPNNKKNKEDCVEIYIKRNKDAGKWNDDACHKLKAALCYTASCQPWSCSGHGECVEIINNYTCNCDVGYYGPQCQFVIQCEPLEAPELGTMDCTHPLGNFSFSSQCAFSCSEGTNLTGIEETTCGPFGNWSSPEPTCQVIQCEPLSAPDLGIMNCSHPLASFSFTSACTFICSEGTELIGKKKTICESSGIWSNPSPICQKLDKSFSMIKEGDYNPLFIPVAVMVTAFSGLAFIIWLARRLKKGKKSKRSMNDPY,372,FALSE,359,359,0,1.306138768,median,Elazar,Mutational scanning reveals the determinants of protein insertion and association energetics in the plasma membrane,2016,10.7554/eLife.12125,333-355,L-selectin,Membrane-protein insertion,TOXCAT-Beta-lactamase (TbL) screen,LYAM1_HUMAN_2023-10-12_b04.a2m,1,372,372,0.4,0.2,3974,0.825,307,412.3,1.342996743,Medium,41,0.1335504886,urn_mavedb_00000051-a-1_scores.csv,score,-1,mutant,LYAM1_HUMAN_theta0.2_2023-10-12_b04.npy,LYAM1_HUMAN.pdb,1-372,1,332,Expression +MAFG_MOUSE_Tsuboyama_2023_1K1V,MAFG_MOUSE_Tsuboyama_2023_1K1V.csv,MAFG_MOUSE,Eukaryote,Mus musculus,LTDEELVTMSVRELNQHLRGLSKEEIIQLKQRRRTLKNRGY,41,TRUE,1429,762,667,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-25,1-41,Transcription factor MafG,Stability,cDNA display proteolysis,MAFG_MOUSE_2023-08-07_b07.a2m,1,41,41,0.7,0.2,6178,1,41,156.7,3.82195122,Medium,4,0.09756097561,Tsuboyama2023_Dataset2_Dataset22,ddG_ML_float,1,mut_type,MAFG_MOUSE_theta0.2_2023-08-07_b07.npy,MAFG_MOUSE.pdb,1-41,1,,Stability +MBD11_ARATH_Tsuboyama_2023_6ACV,MBD11_ARATH_Tsuboyama_2023_6ACV.csv,MBD11_ARATH,Eukaryote,Arabidopsis thaliana,VSVELPAPSSWKKLFYPNKVGSVKKTEVVFVAPTGEEISNRKQLEQYLKSHPGNPAIAEFDWTTSG,66,TRUE,2116,1155,961,-1.578921171,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-26,1-66,Methyl-CpG-binding domain-containing protein 11,Stability,cDNA display proteolysis,MBD11_ARATH_2023-08-07_b03.a2m,1,66,66,0.3,0.2,26035,0.909,60,1510.5,25.175,Medium,11,0.1833333333,Tsuboyama2023_Dataset2_Dataset23,ddG_ML_float,1,mut_type,MBD11_ARATH_theta0.2_2023-08-07_b03.npy,MBD11_ARATH.pdb,1-66,1,,Stability +MET_HUMAN_Estevam_2023,MET_HUMAN_Estevam_2023.csv,MET_HUMAN,Human,Homo sapiens,NPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFI,287,FALSE,5393,5393,0,-2,manual,Estevam,Conserved regulatory motifs in the juxtamembrane domain and kinase N-lobe revealed through deep mutational scanning of the MET receptor tyrosine kinase domain,2023,10.1101/2023.08.03.551866,1-287,MET RTK,Human cell line with growth linked to kinase activity in the absense of IL-3,Growth,MET_HUMAN_2023-08-07_b09.a2m,1,287,287,0.9,0.2,185885,0.951,273,5338.5,19.5549451,Medium,200,0.73,ex14_scores.csv,IL3_withdrawal_score,1,mutant,MET_HUMAN_theta0.2_2023-08-07_b09.npy,MET_HUMAN.pdb,1-287,1,,Activity +MK01_HUMAN_Brenan_2016,MK01_HUMAN_Brenan_2016.csv,MK01_HUMAN,Human,Homo sapiens,MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,360,FALSE,6809,6809,0,-8.040790936,median,Brenan,Phenotypic Characterization of a Comprehensive Set of MAPK1 /ERK2 Missense Mutants,2016,10.1016/j.celrep.2016.09.061,2-360,MAPK1,Growth,inhibitor resistance,MK01_HUMAN_full_11-26-2021_b06.a2m,1,360,360,0.6,0.2,124248,0.806,290,8815.9,30.39965517,medium,287,0.9896551724,MK01_HUMAN_Brenan_2016.csv,DOX_Average,-1,mutant,MK01_HUMAN_theta_0.2.npy,MK01_HUMAN.pdb,1-360,0.1,,OrganismalFitness +MLAC_ECOLI_MacRae_2023,MLAC_ECOLI_MacRae_2023.csv,MLAC_ECOLI,Prokaryote,Escherichia coli,MFKRLMMVALLVIAPLSAATAADQTNPYKLMDEAAQKTFDRLKNEQPQIRANPDYLRTIVDQELLPYVQVKYAGALVLGQYYKSATPAQREAYFAAFREYLKQAYGQALAMYHGQTYQIAPEQPLGDKTIVPIRVTIIDPNGRPPVRLDFQWRKNSQTGNWQAYDMIAEGVSMITTKQNEWGTLLRTKGIDGLTAQLKSISQQKITLEEKK,211,FALSE,4007,4007,0,-0.1041157905,median,MacRae,Protein-protein interactions in the Mla lipid transport system probed by computational structure prediction and deep mutational scanning,2023,10.1016/j.jbc.2023.104744,1-211,MlaC lipid transporter,cell growth in ∆mlaC and selective medium,Growth,MLAC_ECOLI_2023-08-07_b02.a2m,1,211,211,0.2,0.2,22874,0.934,197,7904.3,40.12335025,Medium,126,0.6395939086,MLAC_ECOLI_MacRae_2023.csv,score,1,mutant,MLAC_ECOLI_theta0.2_2023-08-07_b02.npy,MLAC_ECOLI.pdb,1-211,1,,OrganismalFitness +MSH2_HUMAN_Jia_2020,MSH2_HUMAN_Jia_2020.csv,MSH2_HUMAN,Human,Homo sapiens,MAVQPKETLQLESAAEVGFVRFFQGMPEKPTTTVRLFDRGDFYTAHGEDALLAAREVFKTQGVIKYMGPAGAKNLQSVVLSKMNFESFVKDLLLVRQYRVEVYKNRAGNKASKENDWYLAYKASPGNLSQFEDILFGNNDMSASIGVVGVKMSAVDGQRQVGVGYVDSIQRKLGLCEFPDNDQFSNLEALLIQIGPKECVLPGGETAGDMGKLRQIIQRGGILITERKKADFSTKDIYQDLNRLLKGKKGEQMNSAVLPEMENQVAVSSLSAVIKFLELLSDDSNFGQFELTTFDFSQYMKLDIAAVRALNLFQGSVEDTTGSQSLAALLNKCKTPQGQRLVNQWIKQPLMDKNRIEERLNLVEAFVEDAELRQTLQEDLLRRFPDLNRLAKKFQRQAANLQDCYRLYQGINQLPNVIQALEKHEGKHQKLLLAVFVTPLTDLRSDFSKFQEMIETTLDMDQVENHEFLVKPSFDPNLSELREIMNDLEKKMQSTLISAARDLGLDPGKQIKLDSSAQFGYYFRVTCKEEKVLRNNKNFSTVDIQKNGVKFTNSKLTSLNEEYTKNKTEYEEAQDAIVKEIVNISSGYVEPMQTLNDVLAQLDAVVSFAHVSNGAPVPYVRPAILEKGQGRIILKASRHACVEVQDEIAFIPNDVYFEKDKQMFHIITGPNMGGKSTYIRQTGVIVLMAQIGCFVPCESAEVSIVDCILARVGAGDSQLKGVSTFMAEMLETASILRSATKDSLIIIDELGRGTSTYDGFGLAWAISEYIATKIGAFCMFATHFHELTALANQIPTVNNLHVTALTTEETLTMLYQVKKGVCDQSFGIHVAELANFPKHVIECAKQKALELEEFQYIGESQGYDIMEPAAKKCYLEREQGEKIIQEFLSKVKQMPFTEMSEENITIKLKQLKAEVIAKNNSFVNEIISRIKVTT,934,FALSE,16749,16749,0,1,manual,Jia,Massively parallel functional testing of MSH2 missense variants conferring Lynch Syndrome risk,2020,10.1016/j.ajhg.2020.12.003,1-934,MSH2,"drug resistance (surrogate for protein activity, 6-thioguanine (6-TG))",,MSH2_HUMAN_full_11-26-2021_b05.a2m,1,934,934,0.5,0.2,61226,0.901,842,10716.4,12.72731591,medium,1035,1.229216152,MSH2_HUMAN_Jia_2020.csv,LOF score,-1,Variant,MSH2_HUMAN_theta_0.2.npy,MSH2_HUMAN.pdb,1-934,0.1,,OrganismalFitness +MTH3_HAEAE_RockahShmuel_2015,MTH3_HAEAE_RockahShmuel_2015.csv,MTH3_HAEAE,Prokaryote,Haemophilus aegyptius,MNLISLFSGAGGLDLGFQKAGFRIIAANEYDKSIWKTYESNHSAKLIKGDISKISSDEFPKCDGIIGGPPCQSWSEGGSLRGIDDPRGKLFYEYIRILKQKKPKFFLAENVKGMLAQRHNKAVQEFIQEFDNAGYDVHIILLNANDYGVAQDRKRVFYIGFRKELNINYLPPIPHLIKPTLKDVIWDLKDNPIPALDKNKTNGNKCIYPNHEYFIGSYSTIFMSRNRVRQWNEPAFTVQASGRQCQLHPQAPVMLKVSKNLNKFVEGKEHLYRRLTVRECARVQGFPDDFIFHYESLNDGYKMIGNAVPVNLAYEIAKTIKSALEIRKGN,330,FALSE,1777,1777,0,0.01,manual,Rockah-Shmuel,Systematic Mapping of Protein Mutational Space by Prolonged Drift Reveals the Deleterious Effects of Seemingly Neutral Mutations,2015,10.1371/journal.pcbi.1004421,2-330,DNA methylase HaeIII,Growth,Activity,MTH3_HAEAE_full_11-26-2021_b02.a2m,1,330,330,0.2,0.2,82734,0.891,294,26962.4,91.70884354,medium,582,1.979591837,MTH3_HAEAE_Rockah-Shmuel_2015.csv,Wrel_G17_filtered,1,mutant,MTH3_HAEAE_theta_0.2.npy,MTH3_HAEAE.pdb,1-330,0.1,,OrganismalFitness +MTHR_HUMAN_Weile_2021,MTHR_HUMAN_Weile_2021.csv,MTHR_HUMAN,Human,Homo sapiens,MVNEARGNSSLNPCLEGSASSGSESSKDSSRCSTPGLDPERHERLREKMRRRLESGDKWFSLEFFPPRTAEGAVNLISRFDRMAAGGPLYIDVTWHPAGDPGSDKETSSMMIASTAVNYCGLETILHMTCCRQRLEEITGHLHKAKQLGLKNIMALRGDPIGDQWEEEEGGFNYAVDLVKHIRSEFGDYFDICVAGYPKGHPEAGSFEADLKHLKEKVSAGADFIITQLFFEADTFFRFVKACTDMGITCPIVPGIFPIQGYHSLRQLVKLSKLEVPQEIKDVIEPIKDNDAAIRNYGIELAVSLCQELLASGLVPGLHFYTLNREMATTEVLKRLGMWTEDPRRPLPWALSAHPKRREEDVRPIFWASRPKSYIYRTQEWDEFPNGRWGNSSSPAFGELKDYYLFYLKSKSPKEELLKMWGEELTSEESVFEVFVLYLSGEPNRNGHKVTCLPWNDEPLAAETSLLKEELLRVNRQGILTINSQPNINGKPSSDPIVGWGPSGGYVFQKAYLEFFTSRETAEALLQVLKKYELRVNYHLVNVKGENITNAPELQPNAVTWGIFPGREIIQPTVVDPVSFMFWKDEAFALWIERWGKLYEEESPSRTIIQYIHDNYFLVNLVDNDFPLDNCLWQVVEDTLELLNRPTQNARETEAP,656,FALSE,12464,12464,0,0.746,median,Weile,Shifting landscapes of human MTHFR missense-variant effects,2021,10.1016/j.ajhg.2021.05.009,1-656,MTHFR reductase,Growth,,MTHR_HUMAN_2023-08-07_b02.a2m,1,656,656,0.2,0.2,4783,0.96,630,614.5,0.9753968254,Low,65,0.1031746032,urn_mavedb_00000049-a-6_scores.csv,score,1,mutant,MTHR_HUMAN_theta0.2_2023-08-07_b02.npy,MTHR_HUMAN.pdb,1-656,1,,OrganismalFitness +MYO3_YEAST_Tsuboyama_2023_2BTT,MYO3_YEAST_Tsuboyama_2023_2BTT.csv,MYO3_YEAST,Eukaryote,Saccharomyces cerevisiae,KDPKFEAAYDFPGSGSSSELPLKKGDIVFISRDEPSGWSLAKLLDGSKEGWVPTAYMTPYK,61,TRUE,3297,947,2350,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-27,1-61,Myosin-3,Stability,cDNA display proteolysis,MYO3_YEAST_2023-08-07_b07.a2m,1,61,61,0.7,0.2,442941,0.885,54,12893.2,238.762963,High,51,0.9444444444,Tsuboyama2023_Dataset2_Dataset24,ddG_ML_float,1,mut_type,MYO3_YEAST_theta0.2_2023-08-07_b07.npy,MYO3_YEAST.pdb,1-61,1,,Stability +NCAP_I34A1_Doud_2015,NCAP_I34A1_Doud_2015.csv,NCAP_I34A1,Virus,"Influenza A virus (strain A/Puerto Rico/8/1934 H1N1), Influenza A virus (strain A/Aichi/2/1968 H3N2)",MASQGTKRSYEQMETDGERQNATEIRASVGKMIGGIGRFYIQMCTELKLSDYEGRLIQNSLTIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYRRVNGKWMRELILYDKEEIRRIWRQANNGDDATAGLTHMMIWHSNLNDATYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELVRMIKRGINDRNFWRGENGRKTRIAYERMCNILKGKFQTAAQKAMMDQVRESRNPGNAEFEDLTFLARSALILRGSVAHKSCLPACVYGPAVASGYDFEREGYSLVGIDPFRLLQNSQVYSLIRPNENPAHKSQLVWMACHSAAFEDLRVLSFIKGTKVLPRGKLSTRGVQIASNENMETMESSTLELRSRYWAIRTRSGGNTNQQRASAGQISIQPTFSVQRNLPFDRTTIMAAFNGNTEGRTSDMRTEIIRMMESARPEDVSFQGRGVFELSDEKAASPIVPSFDMSNEGSYFFGDNAEEYDN,498,FALSE,9462,9462,0,-2.872717233,median,Doud,Site-Specific Amino Acid Preferences Are Mostly Conserved in Two Closely Related Protein Homologs,2015,10.1093/molbev/msv167,1-498,Influenza nucleoprotein,,Growth,NCAP_I34A1_theta0.99_full_11-26-2021_b09.a2m,1,498,498,0.9,0.01,15390,1,498,1493.2,2.998393574,medium,2116,4.248995984,NCAP_I34A1_Doud_2015.csv,log_fitness_by_syn_mut_fitness,1,mutant,NCAP_I34A1_theta_0.01.npy,NCAP_I34A1.pdb,1-498,0.1,,OrganismalFitness +NKX31_HUMAN_Tsuboyama_2023_2L9R,NKX31_HUMAN_Tsuboyama_2023_2L9R.csv,NKX31_HUMAN,Human,Homo sapiens,HSHMSHTQVIELERKFSHQKYLSAPERAHLAKNLKLTETQVKIWFQNRRYKTKRKQLSSEL,61,TRUE,2482,1149,1333,-0.3,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-28,1-61,Homeobox protein Nkx-3.1,Stability,cDNA display proteolysis,NKX31_HUMAN_2023-08-07_b04.a2m,1,61,61,0.4,0.2,319273,0.902,55,8440.8,153.4690909,High,27,0.4909090909,Tsuboyama2023_Dataset2_Dataset25,ddG_ML_float,1,mut_type,NKX31_HUMAN_theta0.2_2023-08-07_b04.npy,NKX31_HUMAN.pdb,1-61,1,,Stability +NPC1_HUMAN_Erwood_2022_HEK293T,NPC1_HUMAN_Erwood_2022_HEK293T.csv,NPC1_HUMAN,Human,Homo sapiens,MTARGLALGLLLLLLCPAQVFSQSCVWYGECGIAYGDKRYNCEYSGPPKPLPKDGYDLVQELCPGFFFGNVSLCCDVRQLQTLKDNLQLPLQFLSRCPSCFYNLLNLFCELTCSPRQSQFLNVTATEDYVDPVTNQTKTNVKELQYYVGQSFANAMYNACRDVEAPSSNDKALGLLCGKDADACNATNWIEYMFNKDNGQAPFTITPVFSDFPVHGMEPMNNATKGCDESVDEVTAPCSCQDCSIVCGPKPQPPPPPAPWTILGLDAMYVIMWITYMAFLLVFFGAFFAVWCYRKRYFVSEYTPIDSNIAFSVNASDKGEASCCDPVSAAFEGCLRRLFTRWGSFCVRNPGCVIFFSLVFITACSSGLVFVRVTTNPVDLWSAPSSQARLEKEYFDQHFGPFFRTEQLIIRAPLTDKHIYQPYPSGADVPFGPPLDIQILHQVLDLQIAIENITASYDNETVTLQDICLAPLSPYNTNCTILSVLNYFQNSHSVLDHKKGDDFFVYADYHTHFLYCVRAPASLNDTSLLHDPCLGTFGGPVFPWLVLGGYDDQNYNNATALVITFPVNNYYNDTEKLQRAQAWEKEFINFVKNYKNPNLTISFTAERSIEDELNRESDSDVFTVVISYAIMFLYISLALGHMKSCRRLLVDSKVSLGIAGILIVLSSVACSLGVFSYIGLPLTLIVIEVIPFLVLAVGVDNIFILVQAYQRDERLQGETLDQQLGRVLGEVAPSMFLSSFSETVAFFLGALSVMPAVHTFSLFAGLAVFIDFLLQITCFVSLLGLDIKRQEKNRLDIFCCVRGAEDGTSVQASESCLFRFFKNSYSPLLLKDWMRPIVIAIFVGVLSFSIAVLNKVDIGLDQSLSMPDDSYMVDYFKSISQYLHAGPPVYFVLEEGHDYTSSKGQNMVCGGMGCNNDSLVQQIFNAAQLDNYTRIGFAPSSWIDDYFDWVKPQSSCCRVDNITDQFCNASVVDPACVRCRPLTPEGKQRPQGGDFMRFLPMFLSDNPNPKCGKGGHAAYSSAVNILLGHGTRVGATYFMTYHTVLQTSADFIDALKKARLIASNVTETMGINGSAYRVFPYSVFYVFYEQYLTIIDDTIFNLGVSLGAIFLVTMVLLGCELWSAVIMCATIAMVLVNMFGVMWLWGISLNAVSLVNLVMSCGISVEFCSHITRAFTVSMKGSRVERAEEALAHMGSSVFSGITLTKFGGIVVLAFAKSQIFQIFYFRMYLAMVLLGATHGLIFLPVLLSYIGPSVNKAKSCATEERYKGTERERLLNF,1278,FALSE,637,637,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,347-1190,NPC intracellular cholesterol transporter,Fluorescence measurement,Flow Cytometry Assay,NPC1_HUMAN_2023-10-12_b07.a2m,1,1278,1278,0.7,0.2,6333,0.987,1261,918.9,0.7287073751,Low,137,0.1086439334,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,NPC1_HUMAN_theta0.2_2023-10-12_b07.npy,NPC1_HUMAN.pdb,1-1278,1,,Activity +NPC1_HUMAN_Erwood_2022_RPE1,NPC1_HUMAN_Erwood_2022_RPE1.csv,NPC1_HUMAN,Human,Homo sapiens,MTARGLALGLLLLLLCPAQVFSQSCVWYGECGIAYGDKRYNCEYSGPPKPLPKDGYDLVQELCPGFFFGNVSLCCDVRQLQTLKDNLQLPLQFLSRCPSCFYNLLNLFCELTCSPRQSQFLNVTATEDYVDPVTNQTKTNVKELQYYVGQSFANAMYNACRDVEAPSSNDKALGLLCGKDADACNATNWIEYMFNKDNGQAPFTITPVFSDFPVHGMEPMNNATKGCDESVDEVTAPCSCQDCSIVCGPKPQPPPPPAPWTILGLDAMYVIMWITYMAFLLVFFGAFFAVWCYRKRYFVSEYTPIDSNIAFSVNASDKGEASCCDPVSAAFEGCLRRLFTRWGSFCVRNPGCVIFFSLVFITACSSGLVFVRVTTNPVDLWSAPSSQARLEKEYFDQHFGPFFRTEQLIIRAPLTDKHIYQPYPSGADVPFGPPLDIQILHQVLDLQIAIENITASYDNETVTLQDICLAPLSPYNTNCTILSVLNYFQNSHSVLDHKKGDDFFVYADYHTHFLYCVRAPASLNDTSLLHDPCLGTFGGPVFPWLVLGGYDDQNYNNATALVITFPVNNYYNDTEKLQRAQAWEKEFINFVKNYKNPNLTISFTAERSIEDELNRESDSDVFTVVISYAIMFLYISLALGHMKSCRRLLVDSKVSLGIAGILIVLSSVACSLGVFSYIGLPLTLIVIEVIPFLVLAVGVDNIFILVQAYQRDERLQGETLDQQLGRVLGEVAPSMFLSSFSETVAFFLGALSVMPAVHTFSLFAGLAVFIDFLLQITCFVSLLGLDIKRQEKNRLDIFCCVRGAEDGTSVQASESCLFRFFKNSYSPLLLKDWMRPIVIAIFVGVLSFSIAVLNKVDIGLDQSLSMPDDSYMVDYFKSISQYLHAGPPVYFVLEEGHDYTSSKGQNMVCGGMGCNNDSLVQQIFNAAQLDNYTRIGFAPSSWIDDYFDWVKPQSSCCRVDNITDQFCNASVVDPACVRCRPLTPEGKQRPQGGDFMRFLPMFLSDNPNPKCGKGGHAAYSSAVNILLGHGTRVGATYFMTYHTVLQTSADFIDALKKARLIASNVTETMGINGSAYRVFPYSVFYVFYEQYLTIIDDTIFNLGVSLGAIFLVTMVLLGCELWSAVIMCATIAMVLVNMFGVMWLWGISLNAVSLVNLVMSCGISVEFCSHITRAFTVSMKGSRVERAEEALAHMGSSVFSGITLTKFGGIVVLAFAKSQIFQIFYFRMYLAMVLLGATHGLIFLPVLLSYIGPSVNKAKSCATEERYKGTERERLLNF,1278,FALSE,63,63,0,0.8,manual,Erwood,Saturation variant interpretation using CRISPR prime editing,2022,10.1038/s41587-021-01201-1,420-920,NPC intracellular cholesterol transporter,Fluorescence measurement,Flow Cytometry Assay,NPC1_HUMAN_2023-10-12_b07.a2m,1,1278,1278,0.7,0.2,6333,0.987,1261,918.9,0.7287073751,Low,137,0.1086439334,41587_2021_1201_MOESM3_ESM.xlsx,Function Score,1,Protein Annotation,NPC1_HUMAN_theta0.2_2023-10-12_b07.npy,NPC1_HUMAN.pdb,1-1278,1,,Activity +NRAM_I33A0_Jiang_2016,NRAM_I33A0_Jiang_2016.csv,NRAM_I33A0,Virus,Influenza A virus (A/WSN/1933(H1N1)),MNPNQKIITIGSICMVVGIISLILQIGNIISIWISHSIQTGNQNHTGICNQGIITYNVVAGQDSTSVILTGNSSLCPIRGWAIHSKDNGIRIGSKGDVFVIREPFISCSHLECRTFFLTQGALLNDKHSNGTVKDRSPYRALMSCPVGEAPSPYNSRFESVAWSASACHDGMGWLTIGISGPDNGAVAVLKYNGIITETIKSWRKKILRTQESECTCVNGSCFTIMTDGPSNGLASYKIFKIEKGKVTKSIELNAPNSHYEECSCYPDTGKVMCVCRDNWHGSNRPWVSFDQNLDYQIGYICSGVFGDNPRPKDGPGSCGPVSADGANGVKGFSYRYGNGVWIGRTKSDSSRHGFEMIWDPNGWTETDSRFSVRQDVVAMTDRSGYSGSFVQHPELTGLDCMRPCFWVELIRGRPEEETIWTSGSIISFCGVNSDTVDWSWPDGAELPFTIDK,453,FALSE,298,298,0,-0.7772013612,median,Jiang,A Balance between Inhibitor Binding and Substrate Processing Confers Influenza Drug Resistance,2016,10.1016/j.jmb.2015.11.027,67-285,Influenza neuraminidase,,Growth,NRAM_I33A0_full_11-26-2021_b01.a2m,1,453,453,0.1,0.01,47174,0.976,442,33.1,0.07488687783,low,0,0,NRAM_I33A0_Jiang_2016.csv,Standard Conditions,1,mutant,NRAM_I33A0_theta_0.01.npy,NRAM_I33A0.pdb,1-453,0.1,,OrganismalFitness +NUD15_HUMAN_Suiter_2020,NUD15_HUMAN_Suiter_2020.csv,NUD15_HUMAN,Human,Homo sapiens,MTASAQPRGRRPGVGVGVVVTSCKHPRCVLLGKRKGSVGAGSFQLPGGHLEFGETWEECAQRETWEEAALHLKNVHFASVVNSFIEKENYHYVTILMKGEVDVTHDSEPKNVEPEKNESWEWVPWEELPPLDQLFWGLRCLKEQGYDPFKEDLNHLVGYKGNHL,164,FALSE,2844,2844,0,0.25,manual,Suiter,Massively parallel variant characterization identifies NUDT15 alleles associated with thiopurine toxicity,2020,10.1073/pnas.1915680117,2-164,NUDT15,,"VAMP-seq, drug sensitivity",NUD15_HUMAN_full_11-26-2021_b04.a2m,1,164,164,0.4,0.2,153922,0.72,118,43847.8,371.5915254,high,151,1.279661017,NUD15_HUMAN_Suiter_2020.csv,Final NUDT15 activity Score,1,mutant,NUD15_HUMAN_theta_0.2.npy,NUD15_HUMAN.pdb,1-164,0.1,,Expression +NUSA_ECOLI_Tsuboyama_2023_1WCL,NUSA_ECOLI_Tsuboyama_2023_1WCL.csv,NUSA_ECOLI,Prokaryote,Escherichia coli,EAHAAIDTFTKYLDIDEDFATVLVEEGFSTLEELAYVPMKELLEIEGLDEPTVEALRERAKNALATIAQ,69,TRUE,2028,1306,722,-1.318069467,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-29,1-69,Transcription termination/antitermination protein NusA,Stability,cDNA display proteolysis,NUSA_ECOLI_2023-08-07_b03.a2m,1,69,69,0.3,0.2,205612,0.812,56,39002.5,696.4732143,High,38,0.6785714286,Tsuboyama2023_Dataset2_Dataset26,ddG_ML_float,1,mut_type,NUSA_ECOLI_theta0.2_2023-08-07_b03.npy,NUSA_ECOLI.pdb,1-69,1,,Stability +NUSG_MYCTU_Tsuboyama_2023_2MI6,NUSG_MYCTU_Tsuboyama_2023_2MI6.csv,NUSG_MYCTU,Prokaryote,Mycobacterium tuberculosis,DYEVGESVTVMDGPFATLPATISEVNAEQQKLKVLVSIFGRETPVELTFGQVSKI,55,TRUE,1380,1019,361,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-30,1-55,Transcription termination/antitermination protein NusG,Stability,cDNA display proteolysis,NUSG_MYCTU_2023-08-07_b03.a2m,1,55,55,0.3,0.2,102004,0.964,53,16625.7,313.6924528,High,41,0.7735849057,Tsuboyama2023_Dataset2_Dataset27,ddG_ML_float,1,mut_type,NUSG_MYCTU_theta0.2_2023-08-07_b03.npy,NUSG_MYCTU.pdb,1-55,1,,Stability +OBSCN_HUMAN_Tsuboyama_2023_1V1C,OBSCN_HUMAN_Tsuboyama_2023_1V1C.csv,OBSCN_HUMAN,Human,Homo sapiens,FDIYVVTADYLPLGAEQDAITLREGQYVEVLDAAHPLRWLVRTKPTKSSPSRQGWVSPAYLDRRL,65,TRUE,3197,1213,1984,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-31,1-65,Obscurin,Stability,cDNA display proteolysis,OBSCN_HUMAN_2023-08-07_b02.a2m,1,65,65,0.2,0.2,718751,0.815,53,23710.7,447.3716981,High,54,1.018867925,Tsuboyama2023_Dataset2_Dataset28,ddG_ML_float,1,mut_type,OBSCN_HUMAN_theta0.2_2023-08-07_b02.npy,OBSCN_HUMAN.pdb,1-65,1,,Stability +ODP2_GEOSE_Tsuboyama_2023_1W4G,ODP2_GEOSE_Tsuboyama_2023_1W4G.csv,ODP2_GEOSE,Prokaryote,Geobacillus stearothermophilus,NRRVIAMPSVRKWAREKGVDIRLVQGTGKNGRVLKEDIDAFLAG,44,TRUE,1134,669,465,-0.4168227551,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-32,1-44,Dihydrolipoyllysine-residue acetyltransferase component of pyruvate dehydrogenase complex,Stability,cDNA display proteolysis,ODP2_GEOSE_2023-08-07_b07.a2m,1,44,44,0.7,0.2,163835,0.909,40,14834.6,370.865,High,21,0.525,Tsuboyama2023_Dataset2_Dataset29,ddG_ML_float,1,mut_type,ODP2_GEOSE_theta0.2_2023-08-07_b07.npy,ODP2_GEOSE.pdb,1-44,1,,Stability +OPSD_HUMAN_Wan_2019,OPSD_HUMAN_Wan_2019.csv,OPSD_HUMAN,Human,Homo sapiens,MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA,348,FALSE,165,165,0,0.5795144905,median,Wan,Characterizing variants of unknown significance in rhodopsin: A functional genomics approach,2019,10.1002/humu.23762,4-347,Rhodopsin,Expression,Flow Cytometry Assay,OPSD_HUMAN_2023-10-12_b04.a2m,1,348,348,0.4,0.2,342311,0.876,305,36900.5,120.9852459,High,247,0.8098360656,urn_mavedb_00000099-a-1_scores.csv,score,1,mutant,OPSD_HUMAN_theta0.2_2023-10-12_b04.npy,OPSD_HUMAN.pdb,1-348,1,,Expression +OTC_HUMAN_Lo_2023,OTC_HUMAN_Lo_2023.csv,OTC_HUMAN,Human,Homo sapiens,MLFNLRILLNNAAFRNGHNFMVRNFRCGQPLQNKVQLKGRDLLTLKNFTGEEIKYMLWLSADLKFRIKQKGEYLPLLQGKSLGMIFEKRSTRTRLSTETGFALLGGHPCFLTTQDIHLGVNESLTDTARVLSSMADAVLARVYKQSDLDTLAKEASIPIINGLSDLYHPIQILADYLTLQEHYSSLKGLTLSWIGDGNNILHSIMMSAAKFGMHLQAATPKGYEPDASVTKLAEQYAKENGTKLLLTNDPLEAAHGGNVLITDTWISMGQEEEKKKRLQAFQGYQVTMKTAKVAASDWTFLHCLPRKPEEVDDEVFYSPRSLVFPEAENRKWTIMAVMVSLLTDYSPQLQKPKF,354,FALSE,1570,1570,0,0.417,median,Lo,"The functional impact of 1,570 individual amino acid substitutions in human OTC",2023,10.1016/j.ajhg.2023.03.019,33-354,OTC,Enzymatic activity,,OTC_HUMAN_2023-08-07_b02.a2m,1,354,354,0.2,0.2,135607,0.87,308,18646.2,60.53961039,Medium,641,2.081168831,urn_mavedb_00000112-a-1_scores.csv,DMS_score,1,mutant,OTC_HUMAN_theta0.2_2023-08-07_b02.npy,OTC_HUMAN.pdb,1-354,1,,Activity +OTU7A_HUMAN_Tsuboyama_2023_2L2D,OTU7A_HUMAN_Tsuboyama_2023_2L2D.csv,OTU7A_HUMAN,Human,Homo sapiens,TLDMDAVLSDFVRSTGAEPGLARDLLEGKNWDLTAALSDYEQ,42,FALSE,635,635,0,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-33,1-42,OTU domain-containing protein 7A,Stability,cDNA display proteolysis,OTU7A_HUMAN_2023-08-07_b02.a2m,1,42,42,0.2,0.2,1359071,0.881,37,514715.2,13911.22162,High,28,0.7567567568,Tsuboyama2023_Dataset2_Dataset30,ddG_ML_float,1,mut_type,OTU7A_HUMAN_theta0.2_2023-08-07_b02.npy,OTU7A_HUMAN.pdb,1-42,1,,Stability +OXDA_RHOTO_Vanella_2023_activity,OXDA_RHOTO_Vanella_2023_activity.csv,OXDA_RHOTO,Eukaryote,Rhodotorula gracilis,HSQKRVVVLGSGVIGLSSALILARKGYSVHILARDLPEDVSSQTFASPWAGANWTPFMTLTDGPRQAKWEESTFKKWVELVPTGHAMWLKGTRRFAQNEDGLLGHWYKDITPNYRPLPSSECPPGAIGVTYDTLSVHAPKYCQYLARELQKLGATFERRTVTSLEQAFDGADLVVNATGLGAKSIAGIDDQAAEPIRGQTVLVKSPCKRCTMDSSDPASPAYIIPRPGGEVICGGTYGVGDWDLSVNPETVQRILKHCLRLDPTISSDGTIEGIEVLRHNVGLRPARRGGPRVEAERIVLPLDRTKSPLSLGRGSARAAKEKEVTLVHAYGFSSAGYQQSWGAAEDVAQLVDEAFQRYHGAARE,364,FALSE,6396,6396,0,-0.2,manual,Vanella,Understanding Activity-Stability Tradeoffs in Biocatalysts by Enzyme Proximity Sequencing,2023,10.1101/2023.02.24.529916,1-364,D-amino acid oxidase (DAOx),fluorescent label of enzyme product,FACS,OXDA_RHOTO_2023-08-07_b02.a2m,1,364,364,0.2,0.2,520184,0.876,319,98000.4,307.2112853,High,892,2.796238245,Figure_2.xlsx,activity fitness,1,mutant,OXDA_RHOTO_theta0.2_2023-08-07_b02.npy,OXDA_RHOTO.pdb,1-364,1,,Activity +OXDA_RHOTO_Vanella_2023_expression,OXDA_RHOTO_Vanella_2023_expression.csv,OXDA_RHOTO,Eukaryote,Rhodotorula gracilis,HSQKRVVVLGSGVIGLSSALILARKGYSVHILARDLPEDVSSQTFASPWAGANWTPFMTLTDGPRQAKWEESTFKKWVELVPTGHAMWLKGTRRFAQNEDGLLGHWYKDITPNYRPLPSSECPPGAIGVTYDTLSVHAPKYCQYLARELQKLGATFERRTVTSLEQAFDGADLVVNATGLGAKSIAGIDDQAAEPIRGQTVLVKSPCKRCTMDSSDPASPAYIIPRPGGEVICGGTYGVGDWDLSVNPETVQRILKHCLRLDPTISSDGTIEGIEVLRHNVGLRPARRGGPRVEAERIVLPLDRTKSPLSLGRGSARAAKEKEVTLVHAYGFSSAGYQQSWGAAEDVAQLVDEAFQRYHGAARE,364,FALSE,6769,6769,0,-0.2,manual,Vanella,Understanding Activity-Stability Tradeoffs in Biocatalysts by Enzyme Proximity Sequencing,2023,10.1101/2023.02.24.529916,1-364,D-amino acid oxidase (DAOx),cell surface expression,FACS,OXDA_RHOTO_2023-08-07_b02.a2m,1,364,364,0.2,0.2,520184,0.876,319,98000.4,307.2112853,High,892,2.796238245,Figure_2.xlsx,expression fitness,1,mutant,OXDA_RHOTO_theta0.2_2023-08-07_b02.npy,OXDA_RHOTO.pdb,1-364,1,,Expression +P53_HUMAN_Giacomelli_2018_Null_Etoposide,P53_HUMAN_Giacomelli_2018_Null_Etoposide.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,FALSE,7467,7467,0,-0.5,manual,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337,153.2,0.4545994065,low,7,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53NULL_Etoposide_Z-score,1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,1-393,0.1,,OrganismalFitness +P53_HUMAN_Giacomelli_2018_Null_Nutlin,P53_HUMAN_Giacomelli_2018_Null_Nutlin.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,FALSE,7467,7467,0,0.04438920187,median,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337,153.2,0.4545994065,low,7,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53NULL_Nutlin-3_Z-score,-1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,1-393,0.1,,OrganismalFitness +P53_HUMAN_Giacomelli_2018_WT_Nutlin,P53_HUMAN_Giacomelli_2018_WT_Nutlin.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,FALSE,7467,7467,0,-1,manual,Giacomelli,Mutational processes shape the landscape of TP53 mutations in human cancer,2018,10.1038/s41588-018-0204-y,1-393,p53,"drug resistance (nutlin-3, etoposide)",Growth,P53_HUMAN_full_04-29-2022_b09.a2m,1,393,393,0.9,0.2,5069,0.858,337,153.2,0.4545994065,low,7,0.02077151335,P53_HUMAN_Giacomelli_2018.csv,A549_p53WT_Nutlin-3_Z-score,-1,Allele,P53_HUMAN_theta_0.2.npy,P53_HUMAN.pdb,1-393,0.1,,OrganismalFitness +P53_HUMAN_Kotler_2018,P53_HUMAN_Kotler_2018.csv,P53_HUMAN,Human,Homo sapiens,MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD,393,FALSE,1048,1048,0,1,manual,Kotler,A Systematic p53 Mutation Library Links Differential Functional Impact to Cancer Mutation Pattern and Evolutionary Conservation,2018,10.1016/j.molcel.2018.06.012,102-292,p53,growth,Growth,P53_HUMAN_full_11-26-2021_b09.a2m,1,393,393,0.9,0.2,4129,0.863,339,148,0.4365781711,low,15,0.04424778761,P53_HUMAN_Kotler_2018.csv,RFS_H1299,-1,mutant,P53_HUMAN_Kotler_theta_0.2.npy,P53_HUMAN.pdb,1-393,0.1,,OrganismalFitness +P84126_THETH_Chan_2017,P84126_THETH_Chan_2017.csv,P84126_THETH,Prokaryote,Thermus thermophilus,MRPDLSRVPGVLGEIARKRASEVAPYPLPEPPSVPSFKEALLRPGLSVIAEVKRQSPSEGLIREVDPVEAALAYARGGARAVSVLTEPHRFGGSLLDLKRVREAVDLPLLRKDFVVDPFMLEEARAFGASAALLIVALLGELTGAYLEEARRLGLEALVEVHTERELEIALEAGAEVLGINNRDLATLHINLETAPRLGRLARKRGFGGVLVAESGYSRKEELKALEGLFDAVLIGTSLMRAPDLEAALRELVG,254,FALSE,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,44-238,TIM Barrell (T. thermophilus),fitness,Growth,P84126_THETH_full_11-26-2021_b04.a2m,1,254,254,0.4,0.2,53441,0.941,239,10704.6,44.78912134,medium,390,1.631799163,P84126_THETH_Chan_2017.csv,fitness,1,mutant,P84126_THETH_theta_0.2.npy,P84126_THETH.pdb,1-254,0.1,,OrganismalFitness +PA_I34A1_Wu_2015,PA_I34A1_Wu_2015.csv,PA_I34A1,Virus,influenza subtype?,MEDFVRQCFNPMIVELAEKAMKEYGEDLKIETNKFAAICTHLEVCFMYSDFHFIDEQGESIVVELGDPNALLKHRFEIIEGRDRTIAWTVVNSICNTTGAEKPKFLPDLYDYKKNRFIEIGVTRREVHIYYLEKANKIKSEKTHIHIFSFTGEEMATKADYTLDEESRARIKTRLFTIRQEMASRGLWDSFRQSERGEETIEERFEITGTMRKLADQSLPPNFSSLEKFRAYVDGFEPNGYIEGKLSQMSKEVNARIEPFLKSTPRPLRLPDGPPCSQRSKFLLMDALKLSIEDPSHEGEGIPLYDAIKCMRTFFGWKEPNVVKPHEKGINPNYLLSWKQVLAELQDIENEEKIPRTKNMKKTSQLKWALGENMAPEKVDFDDCKDVGDLKQYDSDEPELRSLASWIQNEFNKACELTDSSWIELDEIGEDAAPIEHIASMRRNYFTAEVSHCRATEYIMKGVYINTALLNASCAAMDDFQLIPMISKCRTKEGRRKTNLYGFIIKGRSHLRNDTDVVNFVSMEFSLTDPRLEPHKWEKYCVLEVGDMLLRSAIGHVSRPMFLYVRTNGTSKIKMKWGMEMRRCLLQSLQQIESMIEAESSVKEKDMTKEFFENKSETWPVGESPKGVEEGSIGKVCRTLLAKSVFNSLYASPQLEGFSAESRKLLLIVQALRDNLEPGTFDLGGLYEAIEECLINDPWVLLNASWFNSFLTHALR,716,FALSE,1820,1820,0,0.290683953,median,Wu,Functional Constraint Profiling of a Viral Protein Reveals Discordance of Evolutionary Conservation and Functionality,2015,10.1371/journal.pgen.1005310,8-716,Influenza polymerase acidic protein,Viral replication,Growth,PA_I34A1_full_theta0.99_04-29-2022_b09.a2m,1,716,716,0.9,0.01,26750,1,716,1608,2.245810056,medium,3706,5.175977654,PA_I34A1_Wu_2015.csv,RF_index,1,mutant,PA_I34A1_theta_0.01.npy,PA_I34A1.pdb,1-716,0.1,,OrganismalFitness +PABP_YEAST_Melamed_2013,PABP_YEAST_Melamed_2013.csv,PABP_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MADITDKTAEQLENLNIQDDQKQAATGSESQSVENSSASLYVGDLEPSVSEAHLYDIFSPIGSVSSIRVCRDAITKTSLGYAYVNFNDHEAGRKAIEQLNYTPIKGRLCRIMWSQRDPSLRKKGSGNIFIKNLHPDIDNKALYDTFSVFGDILSSKIATDENGKSKGFGFVHFEEEGAAKEAIDALNGMLLNGQEIYVAPHLSRKERDSQLEETKAHYTNLYVKNINSETTDEQFQELFAKFGPIVSASLEKDADGKLKGFGFVNYEKHEDAVKAVEALNDSELNGEKLYVGRAQKKNERMHVLKKQYEAYRLEKMAKYQGVNLFVKNLDDSVDDEKLEEEFAPYGTITSAKVMRTENGKSKGFGFVCFSTPEEATKAITEKNQQIVAGKPLYVAIAQRKDVRRSQLAQQIQARNQMRYQQATAAAAAAAAGMPGQFMPPMFYGVMPPRGVPFNGPNPQQMNPMGGMPKNGMPPQFRNGPVYGVPPQGGFPRNANDNNQFYQQKQRQALGEQLYKKVSAKTSNEEAAGKITGMILDLPPQEVFPLLESDELFEQHYKEASAAYESFKKEQEQQTEQA,577,TRUE,37708,1187,36521,0.3,manual,Melamed,Deep mutational scanning of an RRM domain of the Saccharomyces cerevisiae poly(A)-binding protein,2013,10.1261/rna.040709.113,126-200,PAB1,"Growth (essential function), RNA binding",Growth,PABP_YEAST_full_11-26-2021_b07.a2m,1,577,577,0.7,0.2,7866,0.919,530,855.1,1.613396226,medium,83,0.1566037736,PABP_YEAST_Melamed_2013.csv,linear,1,mutant,PABP_YEAST_theta_0.2.npy,PABP_YEAST.pdb,1-577,0.1,,OrganismalFitness +PAI1_HUMAN_Huttinger_2021,PAI1_HUMAN_Huttinger_2021.csv,PAI1_HUMAN,Human,Homo sapiens,MQMSPALTCLVLGLALVFGEGSAVHHPPSYVAHLASDFGVRVFQQVAQASKDRNVVFSPYGVASVLAMLQLTTGGETQQQIQAAMGFKIDDKGMAPALRHLYKELMGPWNKDEISTTDAIFVQRDLKLVQGFMPHFFRLFRSTVKQVDFSEVERARFIINDWVKTHTKGMISNLLGKGAVDQLTRLVLVNALYFNGQWKTPFPDSSTHRRLFHKSDGSTVSVPMMAQTNKFNYTEFTTPDGHYYDILELPYHGDTLSMFIAAPYEKEVPLSALTNILSAQLISHWKGNMTRLPRLLVLPKFSLETEVDLRKPLENLGMTDMFRQFQADFTSLSDQEPLHVAQALQKVKIEVNESGTVASSSTAVIVSARMAPEEIIMDRPFLFVVRHNPTGTVLFMGQVMEP,402,FALSE,5345,5345,0,0.029313547,median,Huttinger,Deep mutational scanning of the plasminogen activator inhibitor-1 functional landscape,2021,10.1038/s41598-021-97871-7,24-402,"PAI-1, SERPINE1",PAI-1 inhibition of uPA,phage fitness,PAI1_HUMAN_2023-10-12_b05.a2m,1,402,402,0.5,0.2,52528,,,,,,,,PAI1_HUMAN_Huttinger_2021,log2FoldChange,1,mutation,PAI1_HUMAN_theta0.2_2023-10-12_b05.npy,PAI1_HUMAN.pdb,1-402,1,,Activity +PHOT_CHLRE_Chen_2023,PHOT_CHLRE_Chen_2023.csv,PHOT_CHLRE,Eukaryote,Chlamydomonas reinhardtii,AGLRHTFVVADATLPDCPLVYASEGFYAMTGYGPDEVLGHNARFLQGEGTDPKEVQKIRDAIKKGEACSVRLLNYRKDGTPFWNLLTVTPIKTPDGRVSKFVGVQVDVTSKTEGKALA,118,TRUE,167529,2122,165407,0.6317018878,median,Chen,Deep Mutational Scanning of an Oxygen-Independent Fluorescent Protein CreiLOV for Comprehensive Profiling of Mutational and Epistatic Effects,2023,10.1021/acssynbio.2c00662,1-118,Phototropin,Fluorescence,FACS,PHOT_CHLRE_2023-08-07_b02.a2m,1,118,118,0.2,0.2,1627150,0.873,103,610128.5,5923.57767,High,232,2.252427184,sb2c00662_si_001.xlsx,mean,1,mutant,PHOT_CHLRE_theta0.2_2023-08-07_b02.npy,PHOT_CHLRE.pdb,1-118,1,,Activity +PIN1_HUMAN_Tsuboyama_2023_1I6C,PIN1_HUMAN_Tsuboyama_2023_1I6C.csv,PIN1_HUMAN,Human,Homo sapiens,KLPPGWEKRMSRSSGRVYYFNHITNASQWERPSGNSSSG,39,TRUE,802,686,116,-0.6844420472,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-34,1-39,Peptidyl-prolyl cis-trans isomerase NIMA-interacting 1,Stability,cDNA display proteolysis,PIN1_HUMAN_2023-08-07_b02.a2m,1,39,39,0.2,0.2,248269,0.821,32,10833.2,338.5375,High,13,0.40625,Tsuboyama2023_Dataset2_Dataset31,ddG_ML_float,1,mut_type,PIN1_HUMAN_theta0.2_2023-08-07_b02.npy,PIN1_HUMAN.pdb,1-39,1,,Stability +PITX2_HUMAN_Tsuboyama_2023_2L7M,PITX2_HUMAN_Tsuboyama_2023_2L7M.csv,PITX2_HUMAN,Human,Homo sapiens,THFTSQQLQELEATFQRNHYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWR,52,TRUE,1824,938,886,-1.201366007,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-35,1-52,Pituitary homeobox 2,Stability,cDNA display proteolysis,PITX2_HUMAN_2023-08-07_b04.a2m,1,52,52,0.4,0.2,344174,1,52,9819.6,188.8384615,High,25,0.4807692308,Tsuboyama2023_Dataset2_Dataset32,ddG_ML_float,1,mut_type,PITX2_HUMAN_theta0.2_2023-08-07_b04.npy,PITX2_HUMAN.pdb,1-52,1,,Stability +PKN1_HUMAN_Tsuboyama_2023_1URF,PKN1_HUMAN_Tsuboyama_2023_1URF.csv,PKN1_HUMAN,Human,Homo sapiens,GIPATNLSRVAGLEKQLAIELKVKQGAENMIQTYSNGSTKDRKLLLTAQQMLQDSKTKIDIIRMQLRRALQ,71,FALSE,1301,1301,0,-0.5,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-36,1-71,Serine/threonine-protein kinase N1,Stability,cDNA display proteolysis,PKN1_HUMAN_2023-08-07_b01.a2m,1,71,71,0.1,0.2,187829,0.845,60,53755.8,895.93,High,13,0.2166666667,Tsuboyama2023_Dataset2_Dataset33,ddG_ML_float,1,mut_type,PKN1_HUMAN_theta0.2_2023-08-07_b01.npy,PKN1_HUMAN.pdb,1-71,1,,Stability +POLG_CXB3N_Mattenberger_2021,POLG_CXB3N_Mattenberger_2021.csv,POLG_CXB3N,Virus,Coxsackievirus B3,MGAQVSTQKTGAHETRLNASGNSIIHYTNINYYKDAASNSANRQDFTQDPGKFTEPVKDIMIKSLPALNSPTVEECGYSDRARSITLGNSTITTQECANVVVGYGVWPDYLKDSEATAEDQPTQPDVATCRFYTLDSVQWQKTSPGWWWKLPDALSNLGLFGQNMQYHYLGRTGYTVHVQCNASKFHQGCLLVVCVPEAEMGCATLDNTPSSAELLGGDSAKEFADKPVASGSNKLVQRVVYNAGMGVGVGNLTIFPHQWINLRTNNSATIVMPYTNSVPMDNMFRHNNVTLMVIPFVPLDYCPGSTTYVPITVTIAPMCAEYNGLRLAGHQGLPTMNTPGSCQFLTSDDFQSPSAMPQYDVTPEMRIPGEVKNLMEIAEVDSVVPVQNVGEKVNSMEAYQIPVRSNEGSGTQVFGFPLQPGYSSVFSRTLLGEILNYYTHWSGSIKLTFMFCGSAMATGKFLLAYSPPGAGAPTKRVDAMLGTHVIWDVGLQSSCVLCIPWISQTHYRFVASDEYTAGGFITCWYQTNIVVPADAQSSCYIMCFVSACNDFSVRLLKDTPFISQQNFFQGPVEDAITAAIGRVADTVGTGPTNSEAIPALTAAETGHTSQVVPGDTMQTRHVKNYHSRSESTIENFLCRSACVYFTEYKNSGAKRYAEWVLTPRQAAQLRRKLEFFTYVRFDLELTFVITSTQQPSTTQNQDAQILTHQIMYVPPGGPVPDKVDSYVWQTSTNPSVFWTEGNAPPRMSIPFLSIGNAYSNFYDGWSEFSRNGVYGINTLNNMGTLYARHVNAGSTGPIKSTIRIYFKPKHVKAWIPRPPRLCQYEKAKNVNFQPSGVTTTRQSITTMTNTGAFGQQSGAVYVGNYRVVNRHLATSADWQNCVWESYNRDLLVSTTTAHGCDIIARCQCTTGVYFCASKNKHYPISFEGPGLVEVQESEYYPRRYQSHVLLAAGFSEPGDCGGILRCEHGVIGIVTMGGEGVVGFADIRDLLWLEDDAMEQGVKDYVEQLGNAFGSGFTNQICEQVNLLKESLVGQDSILEKSLKALVKIISALVIVVRNHDDLITVTATLALIGCTSSPWRWLKQKVSQYYGIPMAERQNNSWLKKFTEMTNACKGMEWIAVKIQKFIEWLKVKILPEVREKHEFLNRLKQLPLLESQIATIEQSAPSQSDQEQLFSNVQYFAHYCRKYAPLYAAEAKRVFSLEKKMSNYIQFKSKCRIEPVCLLLHGSPGAGKSVATNLIGRSLAEKLNSSVYSLPPDPDHFDGYKQQAVVIMDDLCQNPDGKDVSLFCQMVSSVDFVPPMAALEEKGILFTSPFVLASTNAGSINAPTVSDSRALARRFHFDMNIEVISMYSQNGKINMPMSVKTCDDECCPVNFKKCCPLVCGKAIQFIDRRTQVRYSLDMLVTEMFREYNHRHSVGTTLEALFQGPPVYREIKISVAPETPPPPAIADLLKSVDSEAVREYCKEKGWLVPEINSTLQIEKHVSRAFICLQALTTFVSVAGIIYIIYKLFAGFQGAYTGVPNQKPRVPTLRQAKVQGPAFEFAVAMMKRNSSTVKTEYGEFTMLGIYDRWAVLPRHAKPGPTILMNDQEVGVLDAKELVDKDGTNLELTLLKLNRNEKFRDIRGFLAKEEVEVNEAVLAINTSKFPNMYIPVGQVTEYGFLNLGGTPTKRMLMYNFPTRAGQCGGVLMSTGKVLGIHVGGNGHQGFSAALLKHYFNDEQGEIEFIESSKDAGFPVINTPSKTKLEPSVFHQVFEGNKEPAVLRSGDPRLKANFEEAIFSKYIGNVNTHVDEYMLEAVDHYAGQLATLDISTEPMKLEDAVYGTEGLEALDLTTSAGYPYVALGIKKRDILSKKTKDLTKLKECMDKYGLNLPMVTYVKDELRSIEKVAKGKSRLIEASSLNDSVAMRQTFGNLYKTFHLNPGVVTGSAVGCDPDLFWSKIPVMLDGHLIAFDYSGYDASLSPVWFACLKMLLEKLGYTHKETNYIDYLCNSHHLYRDKHYFVRGGMPSGCSGTSIFNSMINNIIIRTLMLKVYKGIDLDQFRMIAYGDDVIASYPWPIDASLLAEAGKGYGLIMTPADKGECFNEVTWTNATFLKRYFRADEQYPFLVHPVMPMKDIHESIRWTKDPKNTQDHVRSLCLLAWHNGEHEYEEFIRKIRSVPVGRCLTLPAFSTLRRKWLDSF,2185,FALSE,15711,15711,0,-2.76355725,median,Mattenberger,Globally defining the effects of mutations in a picornavirus capsid,2021,10.7554/eLife.64256,1-851,Picornavirus capsid,Viral replication,Growth,POLG_CXB3N_1-861_theta0.99_04-29-2022_b07.a2m,1,861,861,0.7,0.01,7909,0.959,826,1515.2,1.834382567,medium,94,0.1138014528,POLG_CXB3N_Mattenberger_2021.csv,log_fitness_by_syn_mut_fitness,1,mutant,POLG_CXB3N_theta_0.01.npy,POLG_CXB3N.pdb,1-2185,0.1,,OrganismalFitness +POLG_DEN26_Suphatrakul_2023,POLG_DEN26_Suphatrakul_2023.csv,POLG_DEN26,Virus,Dengue virus,GTGNIGETLGEKWKSRLNALGKSEFQIYKKSGIQEVDRTLAKEGIKRGETDHHAVSRGSAKLRWFVERNMVTPEGKVVDLGCGRGGWSYYCGGLKNVREVKGLTKGGPGHEEPIPMSTYGWNLVRLQSGVDVFFIPPEKCDTLLCDIGESSPNPTVEAGRTLRVLNLVENWLNNNTQFCIKVLNPYMPSVIEKMEALQRKYGGALVRNPLSRNSTHEMYWVSNASGNIVSSVNMISRMLINRFTMRYKKATYEPDVDLGSGTRNIGIESEIPNLDIIGKRIEKIKQEHETSWHYDQDHPYKTWAYHGSYETKQTGSASSMVNGVVRLLTKPWDVVPMVTQMAMTDTTPFGQQRVFKEKVDTRTQEPKEGTKKLMKITAEWLWKELGKKKTPRMCTREEFTRKVRSNAALGAIFTDENKWKSAREAVEDSRFWELVDKERNLHLEGKCETCVYNMMGKREKKLGEFGKAKGSRAIWYMWLGARFLEFEALGFLNEDHWFSRENSLSGVEGEGLHKLGYILRDVSKKEGGAMYADDTAGWDTRITLEDLKNEEMVTNHMEGEHKKLAEAIFKLTYQNKVVRVQRPTPRGTVMDIISRRDQRGSGQVGTYGLNTFTNMEAQLIRQMEGEGVFKSIQHLTITEEIAVQNWLARVGRERLSRMAISGDDCVVKPLDDRFASALTALNDMGKIRKDIQQWEPSRGWNDWTQVPFCSHHFHELIMKDGRVLVVPCRNQDELIGRARISQGAGWSLRETACLGKSYAQMWSLMYFHRRDLRLAANAICSAVPSHWVPTSRTTWSIHAKHEWMTTEDMLTVWNRVWIQENPWMEDKTPVESWEEIPYLGKREDQWCGSLIGLTSRATWAKNIQAAINQVRSLIGNEEYTDYMPSMKRFRREEEEAGVLW,900,FALSE,16897,16897,0,-5.373371442,median,Suphatrakul,Functional analysis of flavivirus replicase by deep mutational scanning of dengue NS5,2023,10.1101/2023.03.07.531617,1-900,Flavivirus NS5,Viral replication,Growth,POLG_DEN26_2023-08-07_b01.a2m,1,900,900,0.1,0.01,10676,1,900,114.5,0.1272222222,Low,0,0,POLG_DEN26_Suphatrakul_2023.csv,score,1,mutant,POLG_DEN26_theta0.01_2023-08-07_b01.npy,POLG_DEN26.pdb,1-900,1,,OrganismalFitness +POLG_HCVJF_Qi_2014,POLG_HCVJF_Qi_2014.csv,POLG_HCVJF,Virus,Hepatitis C virus genotype 2a (isolate JFH-1) (HCV),MSTNPKPQRKTKRNTNRRPEDVKFPGGGQIVGGVYLLPRRGPRLGVRTTRKTSERSQPRGRRQPIPKDRRSTGKAWGKPGRPWPLYGNEGLGWAGWLLSPRGSRPSWGPTDPRHRSRNVGKVIDTLTCGFADLMGYIPVVGAPLSGAARAVAHGVRVLEDGVNYATGNLPGFPFSIFLLALLSCITVPVSAAQVKNTSSSYMVTNDCSNDSITWQLEAAVLHVPGCVPCERVGNTSRCWVPVSPNMAVRQPGALTQGLRTHIDMVVMSATFCSALYVGDLCGGVMLAAQVFIVSPQYHWFVQECNCSIYPGTITGHRMAWDMMMNWSPTATMILAYVMRVPEVIIDIVSGAHWGVMFGLAYFSMQGAWAKVIVILLLAAGVDAGTTTVGGAVARSTNVIAGVFSHGPQQNIQLINTNGSWHINRTALNCNDSLNTGFLAALFYTNRFNSSGCPGRLSACRNIEAFRIGWGTLQYEDNVTNPEDMRPYCWHYPPKPCGVVPARSVCGPVYCFTPSPVVVGTTDRRGVPTYTWGENETDVFLLNSTRPPQGSWFGCTWMNSTGFTKTCGAPPCRTRADFNASTDLLCPTDCFRKHPDATYIKCGSGPWLTPKCLVHYPYRLWHYPCTVNFTIFKIRMYVGGVEHRLTAACNFTRGDRCDLEDRDRSQLSPLLHSTTEWAILPCTYSDLPALSTGLLHLHQNIVDVQYMYGLSPAITKYVVRWEWVVLLFLLLADARVCACLWMLILLGQAEAALEKLVVLHAASAANCHGLLYFAIFFVAAWHIRGRVVPLTTYCLTGLWPFCLLLMALPRQAYAYDAPVHGQIGVGLLILITLFTLTPGYKTLLGQCLWWLCYLLTLGEAMIQEWVPPMQVRGGRDGIAWAVTIFCPGVVFDITKWLLALLGPAYLLRAALTHVPYFVRAHALIRVCALVKQLAGGRYVQVALLALGRWTGTYIYDHLTPMSDWAASGLRDLAVAVEPIIFSPMEKKVIVWGAETAACGDILHGLPVSARLGQEILLGPADGYTSKGWKLLAPITAYAQQTRGLLGAIVVSMTGRDRTEQAGEVQILSTVSQSFLGTTISGVLWTVYHGAGNKTLAGLRGPVTQMYSSAEGDLVGWPSPPGTKSLEPCKCGAVDLYLVTRNADVIPARRRGDKRGALLSPRPISTLKGSSGGPVLCPRGHVVGLFRAAVCSRGVAKSIDFIPVETLDVVTRSPTFSDNSTPPAVPQTYQVGYLHAPTGSGKSTKVPVAYAAQGYKVLVLNPSVAATLGFGAYLSKAHGINPNIRTGVRTVMTGEAITYSTYGKFLADGGCASGAYDIIICDECHAVDATSILGIGTVLDQAETAGVRLTVLATATPPGSVTTPHPDIEEVGLGREGEIPFYGRAIPLSCIKGGRHLIFCHSKKKCDELAAALRGMGLNAVAYYRGLDVSIIPAQGDVVVVATDALMTGYTGDFDSVIDCNVAVTQAVDFSLDPTFTITTQTVPQDAVSRSQRRGRTGRGRQGTYRYVSTGERASGMFDSVVLCECYDAGAAWYDLTPAETTVRLRAYFNTPGLPVCQDHLEFWEAVFTGLTHIDAHFLSQTKQAGENFAYLVAYQATVCARAKAPPPSWDAMWKCLARLKPTLAGPTPLLYRLGPITNEVTLTHPGTKYIATCMQADLEVMTSTWVLAGGVLAAVAAYCLATGCVSIIGRLHVNQRVVVAPDKEVLYEAFDEMEECASRAALIEEGQRIAEMLKSKIQGLLQQASKQAQDIQPAMQASWPKVEQFWARHMWNFISGIQYLAGLSTLPGNPAVASMMAFSAALTSPLSTSTTILLNIMGGWLASQIAPPAGATGFVVSGLVGAAVGSIGLGKVLVDILAGYGAGISGALVAFKIMSGEKPSMEDVINLLPGILSPGALVVGVICAAILRRHVGPGEGAVQWMNRLIAFASRGNHVAPTHYVTESDASQRVTQLLGSLTITSLLRRLHNWITEDCPIPCSGSWLRDVWDWVCTILTDFKNWLTSKLFPKLPGLPFISCQKGYKGVWAGTGIMTTRCPCGANISGNVRLGSMRITGPKTCMNTWQGTFPINCYTEGQCAPKPPTNYKTAIWRVAASEYAEVTQHGSYSYVTGLTTDNLKIPCQLPSPEFFSWVDGVQIHRFAPTPKPFFRDEVSFCVGLNSYAVGSQLPCEPEPDADVLRSMLTDPPHITAETAARRLARGSPPSEASSSVSQLSAPSLRATCTTHSNTYDVDMVDANLLMEGGVAQTEPESRVPVLDFLEPMAEEESDLEPSIPSECMLPRSGFPRALPAWARPDYNPPLVESWRRPDYQPPTVAGCALPPPKKAPTPPPRRRRTVGLSESTISEALQQLAIKTFGQPPSSGDAGSSTGAGAAESGGPTSPGEPAPSETGSASSMPPLEGEPGDPDLESDQVELQPPPQGGGVAPGSGSGSWSTCSEEDDTTVCCSMSYSWTGALITPCSPEEEKLPINPLSNSLLRYHNKVYCTTSKSASQRAKKVTFDRTQVLDAHYDSVLKDIKLAASKVSARLLTLEEACQLTPPHSARSKYGFGAKEVRSLSGRAVNHIKSVWKDLLEDPQTPIPTTIMAKNEVFCVDPAKGGKKPARLIVYPDLGVRVCEKMALYDITQKLPQAVMGASYGFQYSPAQRVEYLLKAWAEKKDPMGFSYDTRCFDSTVTERDIRTEESIYQACSLPEEARTAIHSLTERLYVGGPMFNSKGQTCGYRRCRASGVLTTSMGNTITCYVKALAACKAAGIVAPTMLVCGDDLVVISESQGTEEDERNLRAFTEAMTRYSAPPGDPPRPEYDLELITSCSSNVSVALGPRGRRRYYLTRDPTTPLARAAWETVRHSPINSWLGNIIQYAPTIWVRMVLMTHFFSILMVQDTLDQNLNFEMYGSVYSVNPLDLPAIIERLHGLDAFSMHTYSHHELTRVASALRKLGAPPLRVWKSRARAVRASLISRGGKAAVCGRYLFNWAVKTKLKLTPLPEARLLDLSSWFTVGAGGGDIFHSVSRARPRSLLFGLLLLFVGVGLFLLPAR,3033,FALSE,1630,1630,0,-0.95,manual,Qi,A Quantitative High-Resolution Genetic Profile Rapidly Identifies Sequence Determinants of Hepatitis C Viral Fitness and Drug Sensitivity,2014,10.1371/journal.ppat.1004064,1994-2079,NS5A,Viral replication,Growth,POLG_HCVJF_theta0.99_1984-2089_11-26-2021_b08.a2m,1984,2089,106,0.8,0.01,16556,1,106,4421.2,41.70943396,medium,93,0.8773584906,POLG_HCVJF_Qi_2014.csv,fitness,1,mutant,POLG_HCVJF_theta_0.01.npy,POLG_HCVJF.pdb,1981-2224,0.1,,OrganismalFitness +POLG_PESV_Tsuboyama_2023_2MXD,POLG_PESV_Tsuboyama_2023_2MXD.csv,POLG_PESV,Virus,Porcine enteric sapovirus,ALRDDEYDEWQDIIRDWRKEMTVQQFLDLKERALSGASDPDSQRYNAWLELRA,53,TRUE,5130,995,4135,-1.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-37,1-53,Genome polyprotein,Stability,cDNA display proteolysis,POLG_PESV_2023-08-07_b03.a2m,1,53,53,0.3,0.01,20190,0.887,47,3718.4,79.11489362,Medium,12,0.2553191489,Tsuboyama2023_Dataset2_Dataset34,ddG_ML_float,1,mut_type,POLG_PESV_theta0.01_2023-08-07_b03.npy,POLG_PESV.pdb,1-53,1,,Stability +PPARG_HUMAN_Majithia_2016,PPARG_HUMAN_Majithia_2016.csv,PPARG_HUMAN,Human,Homo sapiens,MGETLGDSPIDPESDSFTDTLSANISQEMTMVDTEMPFWPTNFGISSVDLSVMEDHSHSFDIKPFTTVDFSSISTPHYEDIPFTRTDPVVADYKYDLKLQEYQSAIKVEPASPPYYSEKTQLYNKPHEEPSNSLMAIECRVCGDKASGFHYGVHACEGCKGFFRRTIRLKLIYDRCDLNCRIHKKSRNKCQYCRFQKCLAVGMSHNAIRFGRMPQAEKEKLLAEISSDIDQLNPESADLRALAKHLYDSYIKSFPLTKAKARAILTGKTTDKSPFVIYDMNSLMMGEDKIKFKHITPLQEQSKEVAIRIFQGCQFRSVEAVQEITEYAKSIPGFVNLDLNDQVTLLKYGVHEIIYTMLASLMNKDGVLISEGQGFMTREFLKSLRKPFGDFMEPKFEFAVKFNALELDDSDLAIFIAVIILSGDRPGLLNVKPIEDIQDNLLQALELQLKLNHPESSQLFAKLLQKMTDLRQIVTEHVQLLQVIKKTETDMSLHPLLQEIYKDLY,505,FALSE,9576,9576,0,-2.5,manual,Majithia,Prospective functional classification of all possible missense variants in PPARG,2016,10.1038/ng.3700,2-505,PPARG,Expression of CD36,FACS,PPARG_HUMAN_2023-10-12_b04.a2m,1,505,505,0.4,0.2,39993,0.8,404,3092.1,7.653712871,Medium,86,0.2128712871,https://miter.broadinstitute.org/mitergrade/?query=p.Y505A&prevalence=1.0e-5,Experimental function score,1,mutant,PPARG_HUMAN_theta0.2_2023-10-12_b04.npy,PPARG_HUMAN.pdb,1-505,1,,Activity +PPM1D_HUMAN_Miller_2022,PPM1D_HUMAN_Miller_2022.csv,PPM1D_HUMAN,Human,Homo sapiens,MAGLYSLGVSVFSDQGGRKYMEDVTQIVVEPEPTAEEKPSPRRSLSQPLPPRPSPAALPGGEVSGKGPAVAAREARDPLPDAGASPAPSRCCRRRSSVAFFAVCDGHGGREAAQFAREHLWGFIKKQKGFTSSEPAKVCAAIRKGFLACHLAMWKKLAEWPKTMTGLPSTSGTTASVVIIRGMKMYVAHVGDSGVVLGIQDDPKDDFVRAVEVTQDHKPELPKERERIEGLGGSVMNKSGVNRVVWKRPRLTHNGPVRRSTVIDQIPFLAVARALGDLWSYDFFSGEFVVSPEPDTSVHTLDPQKHKYIILGSDGLWNMIPPQDAISMCQDQEEKKYLMGEHGQSCAKMLVNRALGRWRQRMLRADNTSAIVICISPEVDNQGNFTNEDELYLNLTDSPSYNSQETCVMTPSPCSTPPVKSLEEDPWPRVNSKDHIPALVRSNAFSENFLEVSAEIARENVQGVVIPSKDPEPLEENCAKALTLRIHDSLNNSLPIGLVPTNSTNTVMDQKNLKMSTPGQMKAQEIERTPPTNFKRTLEESNSGPLMKKHRRNGLSRSSGAQPASLPTTSQRKNSVKLTMRRRLRGQKKIGNPLLHQHRKTVCVC,605,FALSE,7889,7889,0,0.01275,median,Miller,Allosteric inhibition of PPM1D serine/threonine phosphatase via an altered conformational state,2022,10.1038/s41467-022-30463-9,2-421,Protein phosphatase 1D,Fitness with GFP reporter,quantification and selection of GFP-positive cells by flow cytometry after DNA damage induced by daunorubicin,PPM1D_HUMAN_2023-10-12_b01.a2m,1,605,605,0.1,0.2,1844,0.993,601,346.3,0.5762063228,Low,27,0.04492512479,PPM1D_HUMAN_Miller_2022_raw.xlsx,fitness,1,mutant,PPM1D_HUMAN_theta0.2_2023-10-12_b01.npy,PPM1D_HUMAN.pdb,1-605,1,,OrganismalFitness +PR40A_HUMAN_Tsuboyama_2023_1UZC,PR40A_HUMAN_Tsuboyama_2023_1UZC.csv,PR40A_HUMAN,Human,Homo sapiens,TYTWNTKEEAKQAFKELLKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTE,63,TRUE,2033,1163,870,-1.362579422,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-38,1-63,Pre-mRNA-processing factor 40 homolog A,Stability,cDNA display proteolysis,PR40A_HUMAN_2023-08-07_b03.a2m,1,63,63,0.3,0.2,63560,0.857,54,3663.8,67.84814815,Medium,16,0.2962962963,Tsuboyama2023_Dataset2_Dataset35,ddG_ML_float,1,mut_type,PR40A_HUMAN_theta0.2_2023-08-07_b03.npy,PR40A_HUMAN.pdb,1-63,1,,Stability +PRKN_HUMAN_Clausen_2023,PRKN_HUMAN_Clausen_2023.csv,PRKN_HUMAN,Human,Homo sapiens,MIVFVRFNSSHGFPVEVDSDTSIFQLKEVVAKRQGVPADQLRVIFAGKELRNDWTVQNCDLDQQSIVHIVQRPWRKGQEMNATGGDDPRNAAGGCEREPQSLTRVDLSSSVLPGDSVGLAVILHTDSRKDSPPAGSPAGRSIYNSFYVYCKGPCQRVQPGKLRVQCSTCRQATLTLTQGPSCWDDVLIPNRMSGECQSPHCPGTSAEFFFKCGAHPTSDKETSVALHLIATNSRNITCITCTDVRSPVLVFQCNSRHVICLDCFHLYCVTRLNDRQFVHDPQLGYSLPCVAGCPNSLIKELHHFRILGEEQYNRYQQYGAEECVLQMGGVLCPRPGCGAGLLPEPDQRKVTCEGGNGLGCGFAFCRECKEAYHEGECSAVFEASGTTTQAYRVDERAAEQARWEAASKETIKKTTKPCPRCHVPVEKNGGCMHMKCPQPQCRLEWCWNCGCEWNRVCMGDHWFDV,465,FALSE,8756,8756,0,0.75,manual,Clausen,A mutational atlas for Parkin proteostasis,2023,10.1101/2023.06.08.544160,1-465,Parkin,protein stability,FACS,PRKN_HUMAN_2023-08-07_b05.a2m,1,465,465,0.5,0.2,1457,0.998,464,195.2,0.4206896552,Low,21,0.04525862069,urn_mavedb_00000114-a-1_scores.csv,score,1,mutant,PRKN_HUMAN_theta0.2_2023-08-07_b05.npy,PRKN_HUMAN.pdb,1-465,1,,Expression +PSAE_SYNP2_Tsuboyama_2023_1PSE,PSAE_SYNP2_Tsuboyama_2023_1PSE.csv,PSAE_SYNP2,Prokaryote,Synechococcus sp.,AIERGSKVKILRKESYWYGDVGTVASIDKSGIIYPVIVRFNKVNYNGFSGSAGGLNTNNFAEHELEVV,68,TRUE,1579,1219,360,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-39,1-68,Photosystem I reaction center subunit IV,Stability,cDNA display proteolysis,PSAE_SYNP2_2023-08-07_b09.a2m,1,68,68,0.9,0.2,1785,0.868,59,130.7,2.215254237,Medium,9,0.1525423729,Tsuboyama2023_Dataset2_Dataset36,ddG_ML_float,1,mut_type,PSAE_SYNP2_theta0.2_2023-08-07_b09.npy,PSAE_SYNP2.pdb,1-68,1,,Stability +PTEN_HUMAN_Matreyek_2021,PTEN_HUMAN_Matreyek_2021.csv,PTEN_HUMAN,Human,Homo sapiens,MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV,403,FALSE,5083,5083,0,0.7708605475,median,Matreyek,Integrating thousands of PTEN variant activity and abundance measurements reveals variant subgroups and new dominant negatives in cancers,2021,10.1186/s13073-021-00984-x,1-403,PTEN,Protein abundance (FACS sorting for abundance of GFP-fused target),Protein stability,PTEN_HUMAN_full_11-26-2021_b01.a2m,1,403,403,0.1,0.2,19058,0.752,303,1425.3,4.703960396,medium,52,0.1716171617,PTEN_HUMAN_Matreyek_2021.csv,score_total,1,variant,PTEN_HUMAN_theta_0.2.npy,PTEN_HUMAN.pdb,1-403,0.1,,Expression +PTEN_HUMAN_Mighell_2018,PTEN_HUMAN_Mighell_2018.csv,PTEN_HUMAN,Human,Homo sapiens,MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV,403,FALSE,7260,7260,0,-1.5,manual,Mighell,A Saturation Mutagenesis Approach to Understanding PTEN Lipid Phosphatase Activity and Genotype-Phenotype Relationships,2018,10.1016/j.ajhg.2018.03.018,1-403,PTEN,"growth (surrogate for enzymatic activity/hydrolysis of lipid phosphates to restore PIP2, which affects proliferation rate)",lipid phosphatase activity,PTEN_HUMAN_full_11-26-2021_b01.a2m,1,403,403,0.1,0.2,19058,0.752,303,1425.3,4.703960396,medium,52,0.1716171617,PTEN_HUMAN_Mighell_2018.csv,Fitness_score,1,mutant,PTEN_HUMAN_theta_0.2.npy,PTEN_HUMAN.pdb,1-403,0.1,,Activity +Q2N0S5_9HIV1_Haddox_2018,Q2N0S5_9HIV1_Haddox_2018.csv,Q2N0S5_9HIV1,Virus,HIV,MRVMGIQRNCQHLFRWGTMILGMIIICSAAENLWVTVYYGVPVWKDAETTLFCASDAKAYETEKHNVWATHACVPTDPNPQEIHLENVTEEFNMWKNNMVEQMHTDIISLWDQSLKPCVKLTPLCVTLQCTNVTNNITDDMRGELKNCSFNMTTELRDKKQKVYSLFYRLDVVQINENQGNRSNNSNKEYRLINCNTSAITQACPKVSFEPIPIHYCAPAGFAILKCKDKKFNGTGPCPSVSTVQCTHGIKPVVSTQLLLNGSLAEEEVMIRSENITNNAKNILVQFNTPVQINCTRPNNNTRKSIRIGPGQAFYATGDIIGDIRQAHCNVSKATWNETLGKVVKQLRKHFGNNTIIRFANSSGGDLEVTTHSFNCGGEFFYCNTSGLFNSTWISNTSVQGSNSTGSNDSITLPCRIKQIINMWQRIGQAMYAPPIQGVIRCVSNITGLILTRDGGSTNSTTETFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTRAKRRVVGREKRAVGIGAVFLGFLGAAGSTMGAASMTLTVQARNLLSGIVQQQSNLLRAIEAQQHLLKLTVWGIKQLQARVLAVERYLRDQQLLGIWGCSGKLICTTNVPWNSSWSNRNLSEIWDNMTWLQWDKEISNYTQIIYGLLEESQNQQEKNEQDLLALDKWASLWNWFDISNWLWYIKIFIMIVGGLIGLRIVFAVLSVIHRVRQGYSPLSFQTHTPNPRGLDRPERIEEEDGEQDRGRSTRLVSGFLALAWDDLRSLCLFCYHRLRDFILIAARIVELLGHSSLKGLRLGWEGLKYLWNLLAYWGRELKISAINLFDTIAIAVAEWTDRVIEIGQRLCRAFLHIPRRIRQGLERALL,860,FALSE,12729,12729,0,-2,manual,Haddox,Mapping mutational effects along the evolutionary landscape of HIV envelope,2018,10.7554/eLife.34420,30-699,HIV env (BG505),Viral replication,Growth,Q2N0S5_9HIV1_full_theta0.99_04-29-2022_b09.a2m,1,860,860,0.9,0.01,75014,0.976,839,36369.7,43.3488677,medium,2462,2.934445769,Q2N0S5_9HIV1_Haddox_2018.csv,fitness,1,mutant,Q2N0S5_9HIV1_theta_0.01.npy,Q2N0S5_9HIV1.pdb,1-860,0.1,,OrganismalFitness +Q53Z42_HUMAN_McShan_2019_binding-TAPBPR,Q53Z42_HUMAN_McShan_2019_binding-TAPBPR.csv,Q53Z42_HUMAN,Human,Homo sapiens,MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSLTACKV,365,FALSE,3344,3344,0,0.19,median,McShan,Molecular determinants of chaperone interactions on MHC-I for folding and antigen repertoire selection,2019,10.1073/pnas.1915562116,26-205,HLA-A,binding affinity (TAPBPR),,Q53Z42_HUMAN_2023-08-07_b01.a2m,1,365,365,0.1,0.2,41636,0.986,360,4986.2,13.85055556,Medium,210,0.5833333333,,score,1,mut_proteingym,Q53Z42_HUMAN_theta0.2_2023-08-07_b01.npy,Q53Z42_HUMAN.pdb,1-365,1,25,Binding +Q53Z42_HUMAN_McShan_2019_expression,Q53Z42_HUMAN_McShan_2019_expression.csv,Q53Z42_HUMAN,Human,Homo sapiens,MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSLTACKV,365,FALSE,3344,3344,0,-0.73,median,McShan,Molecular determinants of chaperone interactions on MHC-I for folding and antigen repertoire selection,2019,10.1073/pnas.1915562116,26-205,HLA-A,surface expression,,Q53Z42_HUMAN_2023-08-07_b01.a2m,1,365,365,0.1,0.2,41636,0.986,360,4986.2,13.85055556,Medium,210,0.5833333333,,score,1,mut_proteingym,Q53Z42_HUMAN_theta0.2_2023-08-07_b01.npy,Q53Z42_HUMAN.pdb,1-365,1,25,Expression +Q59976_STRSQ_Romero_2015,Q59976_STRSQ_Romero_2015.csv,Q59976_STRSQ,Prokaryote,Streptomyces sp.,MVPAAQQTAMAPDAALTFPEGFLWGSATASYQIEGAAAEDGRTPSIWDTYARTPGRVRNGDTGDVATDHYHRWREDVALMAELGLGAYRFSLAWPRIQPTGRGPALQKGLDFYRRLADELLAKGIQPVATLYHWDLPQELENAGGWPERATAERFAEYAAIAADALGDRVKTWTTLNEPWCSAFLGYGSGVHAPGRTDPVAALRAAHHLNLGHGLAVQALRDRLPADAQCSVTLNIHHVRPLTDSDADADAVRRIDALANRVFTGPMLQGAYPEDLVKDTAGLTDWSFVRDGDLRLAHQKLDFLGVNYYSPTLVSEADGSGTHNSDGHGRSAHSPWPGADRVAFHQPPGETTAMGWAVDPSGLYELLRRLSSDFPALPLVITENGAAFHDYADPEGNVNDPERIAYVRDHLAAVHRAIKDGSDVRGYFLWSLLDNFEWAHGYSKRFGAVYVDYPTGTRIPKASARWYAEVARTGVLPTAGDPNSSSVDKLAAALEHHHHHH,501,FALSE,2999,2999,0,-1,manual,Romero,Dissecting enzyme function with microfluidic-based deep mutational scanning,2015,10.1073/pnas.1422285112,2-501,β-glucosidase,Enzyme function,Activity,Q59976_STRSQ_full_11-26-2021_b03.a2m,1,501,501,0.3,0.2,105913,0.882,442,13981.2,31.63167421,medium,850,1.923076923,Q59976_STRSQ_Romero_2015.csv,enrichment,1,mutant,Q59976_STRSQ_theta_0.2.npy,Q59976_STRSQ.pdb,1-501,0.1,,Activity +Q6WV13_9MAXI_Somermeyer_2022,Q6WV13_9MAXI_Somermeyer_2022.csv,Q6WV12_9MAXI,Eukaryote,Pontellina plumata,MPAMKIECRITGTLNGVEFELVGGGEGTPEQGRMTNKMKSTKGALTFSPYLLSHVMGYGFYHFGTYPSGYENPFLHAINNGGYTNTRIEKYEDGGVLHVSFSYRYEAGRVIGDFKVVGTGFPEDSVIFTDKIIRSNATVEHLHPMGDNVLVGSFARTFSLRDGGYYSFVVDSHMHFKSAIHPSILQNGGPMFAFRRVEELHSNTELGIVEYQHAFKTPIAFA,222,TRUE,31401,1141,30260,15721.24977,median,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-222,Green fluorescent protein ppluGFP2,Fluorescence,FACS,Q6WV12_9MAXI_full_b0.6.a2m,1,222,222,0.6,0.2,506,1,222,95.9,0.431981982,Low,4,0.01801801802,Q6WV13_9MAXI_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,Q6WV12_9MAXI_theta_0.2.npy,Q6WV12_9MAXI.pdb,1-222,1,,Activity +Q837P4_ENTFA_Meier_2023,Q837P4_ENTFA_Meier_2023.csv,Q837P4_ENTFA,Prokaryote,Enterococcus faecalis,MTDLIKASKFFYHYLKRYKVSFLFIFLAIFAATYLQVKAPQFVGEAIQELAKYAVNVMQGKDDKSAFVSVIWKLLIFYVLTSAASFIYSILFTQVVGKSTNRMRIGLFNKLEKLTIRFFDSHQDGEILSRFTSDLDNIQNSLNQALLQVLTNIALLVGVLIMMFRQNVELAWATIASTPIAILIAVFVISKARKYVDLQQDEVGKLNGYMDEKISGQRVIITNGLQEETIDGFLEQNEKVRAATYKGQVYSGLLFPMMQGMSLVNTAIVIFFGGWLAINGSVDRAAALGLVVMFVQYSQQYYQPLMQISSGYSMIQLAVTGARRLNEMFDEPDEIRPENGEKLEEINKAVALNHVVFGYNPETPVLKDVSIHVDKGEMVALVGPTGSGKTTIMNLMNRFYDVNEGAVTFDGVDIREMDLDSLRSHVGIVLQESVLFSGTIRENIAFGKPEATDEEIVQAAKQANIHEFIVNLEQGYDTEITEENNLFSTGQKQLVSIARTIITNPELLILDEATSNVDTVTEAKIQKAMDEAIKGRTSFVIAHRLKTILNADRIIVLRDGEVIEEGNHHELVEQDGFYAELYKNQFVFE,589,FALSE,697,697,0,-0.6270963227,median,Meier,Deep mutational scan of a drug efflux pump reveals its structure–function landscape,2023,10.1038/s41589-022-01205-1,32-543,EfrD ABC transporter,Drug efflux,Growth,Q837P4_ENTFA_2023-08-07_b09.a2m,1,589,589,0.9,0.2,343933,0.975,574,54079.8,94.2151568,Medium,1123,1.95644599,41589_2022_1205_MOESM4_ESM.xlsx,avg_score,1,mutant,Q837P4_ENTFA_theta0.2_2023-08-07_b09.npy,Q837P4_ENTFA.pdb,1-589,1,,Activity +Q837P5_ENTFA_Meier_2023,Q837P5_ENTFA_Meier_2023.csv,Q837P5_ENTFA,Prokaryote,Enterococcus faecalis,MDLIIQHAKKYKGSVVIALLAVIVMVVSALWQPKLLQQVLEAIMNDDSDKMKNLGIQLIAIAGLGLVAGVINTIFSAKVAQGVSADIREATFRKIQTFSFGNIEKFSAGNLVVRLTNDVTQIQNVIMIALQTLFRIPFLFIGSFILAMLTLPQLWWVIVALVIAVILISMLSFSQMGKHFMIIQNLIDKINGIAKENLLGIRVVKSFVQEKNQLSRFTKVSEELTTHNLIVGSLFAVMIPAFMLVANLAVVGSIFFVSNLVKDDPTLIGGVASFMNYLMQIMMAIIIGGMMMMMTSRAAVSIKRIKEVMETEPDVTYKKVPEQELIGSVEFDHVSFRYPGDEEDTLKDISFSIQPGEMIGIVGATGAGKSTLAQLIPRLFDPTEGKIEVGGVDLREVNEHSLRKTVSFVLQKAILFSGTIAQNLRHGKRDASEADMERASGIAQAKEFIEKLAEGYDAPVEERSNNFSGGQKQRLSITRGVIGEPKILILDDSTSALDARSERLVREALDKELKETTTIVIAQKISSVVHADRILVLDNGRLVGEGTHEELAATNPVYQEIYETQKGKEEA,571,FALSE,747,747,0,-0.85731232,median,Meier,Deep mutational scan of a drug efflux pump reveals its structure–function landscape,2023,10.1038/s41589-022-01205-1,25-523,EfrC ABC transporter,Drug efflux,Growth,Q837P5_ENTFA_2023-08-07_b09.a2m,1,571,571,0.9,0.2,346355,0.993,567,54910.5,96.8439153,Medium,1135,2.00176367,,avg_score,1,mutant,Q837P5_ENTFA_theta0.2_2023-08-07_b09.npy,Q837P5_ENTFA.pdb,1-571,1,,Activity +Q8WTC7_9CNID_Somermeyer_2022,Q8WTC7_9CNID_Somermeyer_2022.csv,Q8WTC7_9CNID,Eukaryote,Aequorea macrodactyla,MSKGEELFTGIVPVLIELDGDVHGHKFSVRGEGEGDADYGKLEIKFICTTGKLPVPWPTLVTTLSYGILCFARYPEHMKMNDFFKSAMPEGYIQERTIFFQDDGKYKTRGEVKFEGDTLVNRIELKGMDFKEDGNILGHKLEYNFNSHNVYIMPDKANNGLKVNFKIRHNIEGGGVQLADHYQTNVPLGDGPVLIPINHYLSCQTAISKDRNETRDHMVFLEFFSACGHTHGMDELYK,238,TRUE,33510,1201,32309,5000,manual,Somermeyer,Heterogeneity of the GFP fitness landscape and data-driven protein design,2022,10.7554/eLife.75842,2-238,Green fluorescent protein amacGFP,Fluorescence,FACS,Q8WTC7_9CNID_full_b0.5.a2m,1,238,238,0.5,0.2,655,1,238,118.5,0.4978991597,Low,5,0.02100840336,Q8WTC8_9CNID_Somermeyer_2022.csv,replicates_mean_brightness,1,mutant,Q8WTC7_9CNID_theta_0.2.npy,Q8WTC7_9CNID.pdb,1-238,1,,Activity +R1AB_SARS2_Flynn_2022,R1AB_SARS2_Flynn_2022.csv,R1AB_SARS2,Virus,SARS-COV2,SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ,306,FALSE,5725,5725,0,0.5,manual,Flynn,Comprehensive fitness landscape of SARS-CoV-2 Mpro reveals insights into viral resistance mechanisms,2022,10.7554/eLife.77433,1-306,SARS-CoV-2 Mpro,"FRET, Growth",,R1AB_SARS2_02-19-2022_b07.a2m,1,306,306,0.7,0.01,182169,1,306,326.3,1.066339869,medium,79,0.2581699346,R1AB_SARS2_Flynn_2022.csv,average_growth,1,mutant,R1AB_SARS2_theta_0.01.npy,R1AB_SARS2.pdb,1-306,0.1,,OrganismalFitness +RAD_ANTMA_Tsuboyama_2023_2CJJ,RAD_ANTMA_Tsuboyama_2023_2CJJ.csv,RAD_ANTMA,Eukaryote,Antirrhinum majus,PWSAKENKAFERALAVYDKDTPDRWANVARAVEGRTPEEVKKHYEILVEDIKYI,54,TRUE,912,774,138,-0.3943851731,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-40,1-54,Transcription factor RADIALIS,Stability,cDNA display proteolysis,RAD_ANTMA_2023-08-07_b01.a2m,1,54,54,0.1,0.2,423275,0.833,45,38133.9,847.42,High,27,0.6,Tsuboyama2023_Dataset2_Dataset37,ddG_ML_float,1,mut_type,RAD_ANTMA_theta0.2_2023-08-07_b01.npy,RAD_ANTMA.pdb,1-54,1,,Stability +RAF1_HUMAN_Zinkus-Boltz_2019,RAF1_HUMAN_Zinkus-Boltz_2019.csv,RAF1_HUMAN,Human,Homo sapiens,MEHIQGAWKTISNGFGFKDAVFDGSSCISPTIVQQFGYQRRASDDGKLTDPSKTSNTIRVFLPNKQRTVVNVRNGMSLHDCLMKALKVRGLQPECCAVFRLLHEHKGKKARLDWNTDAASLIGEELQVDFLDHVPLTTHNFARKTFLKLAFCDICQKFLLNGFRCQTCGYKFHEHCSTKVPTMCVDWSNIRQLLLFPNSTIGDSGVPALPSLTMRRMRESVSRMPVSSQHRYSTPHAFTFNTSSPSSEGSLSQRQRSTSTPNVHMVSTTLPVDSRMIEDAIRSHSESASPSALSSSPNNLSPTGWSQPKTPVPAQRERAPVSGTQEKNKIRPRGQRDSSYYWEIEASEVMLSTRIGSGSFGTVYKGKWHGDVAVKILKVVDPTPEQFQAFRNEVAVLRKTRHVNILLFMGYMTKDNLAIVTQWCEGSSLYKHLHVQETKFQMFQLIDIARQTAQGMDYLHAKNIIHRDMKSNNIFLHEGLTVKIGDFGLATVKSRWSGSQQVEQPTGSVLWMAPEVIRMQDNNPFSFQSDVYSYGIVLYELMTGELPYSHINNRDQIIFMVGRGYASPDLSKLYKNCPKAMKRLVADCVKKVKEERPLFPQILSSIELLQHSLPKINRSASEPSLHRAAHTEDINACTLTTSPRLPVF,648,FALSE,297,297,0,-0.0671,median,Zinkus-Boltz,A Phage-Assisted Continuous Selection Approach for Deep Mutational Scanning of Protein–Protein Interactions,2019,10.1021/acschembio.9b00669,52-90,RAF oncogene,Viral Replication,binding assays,RAF1_HUMAN_2023-10-12_b05.a2m,1,648,648,0.5,0.2,9685,0.972,630,350.5,0.5563492063,Low,30,0.04761904762,urn_mavedb_00000061-a-1_scores.csv,score,1,mutant,RAF1_HUMAN_theta0.2_2023-10-12_b05.npy,RAF1_HUMAN.pdb,1-648,1,,OrganismalFitness +RASH_HUMAN_Bandaru_2017,RASH_HUMAN_Bandaru_2017.csv,RASH_HUMAN,Human,Homo sapiens,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMSCKCVLS,189,FALSE,3134,3134,0,-0.25,manual,Bandaru,Deconstruction of the Ras switching cycle through saturation mutagenesis,2017,10.7554/eLife.27810,2-166,HRAS,C-Raf binding and GEF,activity,RASH_HUMAN_full_11-26-2021_b03.a2m,1,189,189,0.3,0.2,204751,0.862,163,23971.6,147.0650307,high,205,1.257668712,RASH_HUMAN_Bandaru_2017.csv,unregulated,1,mutant,RASH_HUMAN_theta_0.2.npy,RASH_HUMAN.pdb,1-189,0.1,,Activity +RASK_HUMAN_Weng_2022_abundance,RASK_HUMAN_Weng_2022_abundance.csv,RASK_HUMAN,Human,Human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,188,TRUE,26012,3066,22946,-0.504113408,median,Weng,The energetic and allosteric landscape for KRAS inhibition,2022,10.1101/2022.12.06.519122,2-188,KRAS,Yeast growth,,RASK_HUMAN_2023-08-07_b03.a2m,1,188,188,0.3,0.2,260539,0.888,167,27850.5,166.7694611,High,211,1.263473054,kras_fitness.xlsx,fitness,1,mutant,RASK_HUMAN_theta0.2_2023-08-07_b03.npy,RASK_HUMAN.pdb,1-188,1,,Expression +RASK_HUMAN_Weng_2022_binding-DARPin_K55,RASK_HUMAN_Weng_2022_binding-DARPin_K55.csv,RASK_HUMAN,Human,Human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQGVDDAFYTLVREIRKHKEKMSKDGKKKKKKSKTKCVIM,188,TRUE,24873,3084,21789,-0.4605455262,median,Weng,The energetic and allosteric landscape for KRAS inhibition,2022,10.1101/2022.12.06.519127,2-188,KRAS,Yeast growth,,RASK_HUMAN_2023-08-07_b03.a2m,1,188,188,0.3,0.2,260539,0.888,167,27850.5,166.7694611,High,211,1.263473054,kras_fitness.xlsx,fitness,1,mutant,RASK_HUMAN_theta0.2_2023-08-07_b03.npy,RASK_HUMAN.pdb,1-188,1,,Binding +RBP1_HUMAN_Tsuboyama_2023_2KWH,RBP1_HUMAN_Tsuboyama_2023_2KWH.csv,RBP1_HUMAN,Human,Homo sapiens,ETQAGIKEEIRRQEFLLNSLHRDLQGGIKDLSKEERLWEVQRILTALKRKLR,52,TRUE,1332,975,357,-0.2693189895,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-41,1-52,RalA-binding protein 1,Stability,cDNA display proteolysis,RBP1_HUMAN_2023-08-07_b01.a2m,1,52,52,0.1,0.2,135922,0.827,43,50510,1174.651163,High,6,0.1395348837,Tsuboyama2023_Dataset2_Dataset38,ddG_ML_float,1,mut_type,RBP1_HUMAN_theta0.2_2023-08-07_b01.npy,RBP1_HUMAN.pdb,1-52,1,,Stability +RCD1_ARATH_Tsuboyama_2023_5OAO,RCD1_ARATH_Tsuboyama_2023_5OAO.csv,RCD1_ARATH,Eukaryote,Arabidopsis thaliana,PTLFAAISHKVAENDMLLINADYQQLRDKKMTRAEFVRKLRVIVGDDLLRSTITTLQ,57,TRUE,1261,988,273,-0.3828831078,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-42,1-57,Inactive poly [ADP-ribose] polymerase RCD1,Stability,cDNA display proteolysis,RCD1_ARATH_2023-08-07_b02.a2m,1,57,57,0.2,0.2,6525,0.93,53,1578.5,29.78301887,Medium,2,0.03773584906,Tsuboyama2023_Dataset2_Dataset39,ddG_ML_float,1,mut_type,RCD1_ARATH_theta0.2_2023-08-07_b02.npy,RCD1_ARATH.pdb,1-57,1,,Stability +RCRO_LAMBD_Tsuboyama_2023_1ORC,RCRO_LAMBD_Tsuboyama_2023_1ORC.csv,RCRO_LAMBD,Prokaryote,Escherichia phage lambda,QRITLKDYAMRFGQTKTAKDLGVYQSAINKAIHAGRKIFLTINADGSVYAEEVKDGEVKPFPS,63,TRUE,2278,1195,1083,-1.255848942,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-43,1-63,Regulatory protein cro,Stability,cDNA display proteolysis,RCRO_LAMBD_2023-08-07_b03.a2m,1,63,63,0.3,0.2,392895,0.762,48,51658.6,1076.220833,High,32,0.6666666667,Tsuboyama2023_Dataset2_Dataset40,ddG_ML_float,1,mut_type,RCRO_LAMBD_theta0.2_2023-08-07_b03.npy,RCRO_LAMBD.pdb,1-63,1,,Stability +RD23A_HUMAN_Tsuboyama_2023_1IFY,RD23A_HUMAN_Tsuboyama_2023_1IFY.csv,RD23A_HUMAN,Human,Homo sapiens,SEYETMLTEIMSMGYERERVVAALRASYNNPHRAVEYLLTGIPG,44,TRUE,1019,798,221,-0.7285205281,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-44,1-44,UV excision repair protein RAD23 homolog A,Stability,cDNA display proteolysis,RD23A_HUMAN_2023-08-07_b04.a2m,1,44,44,0.4,0.2,100991,0.864,38,7912.9,208.2342105,High,21,0.5526315789,Tsuboyama2023_Dataset2_Dataset41,ddG_ML_float,1,mut_type,RD23A_HUMAN_theta0.2_2023-08-07_b04.npy,RD23A_HUMAN.pdb,1-44,1,,Stability +RDRP_I33A0_Li_2023,RDRP_I33A0_Li_2023.csv,RDRP_I33A0,Virus,Influenza A virus,MDVNPTLLFLKVPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSERGRWTTNTETGAPQLNPIDGPLPEDNEPSGYAQTDCVLEAMAFLEESHPGIFETSCLETMEVVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRSNGLTANESGRLIDFLKDVMESMNKEEMEITTHFQRKRRVRDNMTKKMVTQRTIGKRKQRLNKRSYLIRALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEKLEQSGLPVGGNEKKAKLANVVRKMMTNSQDTEISFTITGDNTKWNENQNPRMFLAMITYITRNQPEWFRNVLSIAPIMFSNKMARLGKGYMFESKSMKIRTQIPAEMLASIDLKYFNDSTRKKIEKIRPLLIDGTASLSPGMMMGMFNMLSTVLGVSILNLGQKRHTKTTYWWDGLQSSDDFALIVNAPNHEGIQAGVNRFYRTCKLLGINMSKKKSYINRTGTFEFTSFFYRYGFVANFSMELPSFGVSGINESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFEIKKLWEQTHSKAGLLVSDGGPNLYNIRNLHIPEVCLKWELMDEDYQGRLCNPLNPFVNHKDIESVNNAVIMPAHGPAKNMEYDAVATTHSWIPKRNRSILNTSQRGILEDEQMYQKCCNLFEKFFPSSSYRRPVGISSMVEAMVSRARIDARIDFESGRIKKEEFTEIMKICSTIEELRRQK,757,FALSE,12003,12003,0,-1,manual,Li,Deep mutational scanning reveals the functional constraints and evolutionary potential of the influenza A virus PB1 protein,2023,10.1101/2023.08.27.554986,1-757,Influenza RNA polymerase PB1,Viral Replication,Growth,RDRP_I33A0_2023-08-07_b01.a2m,1,757,757,0.1,0.01,26589,1,757,102.8,0.1357992074,Low,0,0,554986_file16.csv,fitness,1,mutant,RDRP_I33A0_theta0.01_2023-08-07_b01.npy,RDRP_I33A0.pdb,1-757,1,,OrganismalFitness +REV_HV1H2_Fernandes_2016,REV_HV1H2_Fernandes_2016.csv,REV_HV1H2,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MAGRSGDSDEELIRTVRLIKLLYQSNPPPNPEGTRQARRNRRRRWRERQRQIHSISERILSTYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTVLESGTKE,116,FALSE,2147,2147,0,-0.06744744968,median,Fernandes,Functional Segregation of Overlapping Genes in HIV,2016,10.1016/j.cell.2016.11.031,1-116,HIV rev,Viral replication,Growth,REV_HV1H2_full_theta0.99_04-29-2022_b09.a2m,1,116,116,0.9,0.01,15839,0.948,110,9951.8,90.47090909,medium,54,0.4909090909,REV_HV1H2_Fernandes_2016.csv,sel_coeff_mean,1,mutant,REV_HV1H2_theta_0.01.npy,REV_HV1H2.pdb,1-116,0.1,,OrganismalFitness +RFAH_ECOLI_Tsuboyama_2023_2LCL,RFAH_ECOLI_Tsuboyama_2023_2LCL.csv,RFAH_ECOLI,Prokaryote,Escherichia coli,ATPYPGDKVIITEGAFEGFQAIFTEPDGEARSMLLLNLINKEIKHSVKNTEFRKL,55,TRUE,1326,969,357,-0.4014057355,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-45,1-55,Transcription antitermination protein RfaH,Stability,cDNA display proteolysis,RFAH_ECOLI_2023-08-07_b04.a2m,1,55,55,0.4,0.2,86049,0.927,51,11748.4,230.3607843,High,35,0.6862745098,Tsuboyama2023_Dataset2_Dataset42,ddG_ML_float,1,mut_type,RFAH_ECOLI_theta0.2_2023-08-07_b04.npy,RFAH_ECOLI.pdb,1-55,1,,Stability +RL20_AQUAE_Tsuboyama_2023_1GYZ,RL20_AQUAE_Tsuboyama_2023_1GYZ.csv,RL20_AQUAE,Prokaryote,Aquifex aeolicus,WIARINAAVRAYGLNYSTFINGLKKAGIELDRKILADMAVRDPQAFEQVVNKVKEALQV,59,TRUE,1461,1121,340,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-46,1-59,Large ribosomal subunit protein bL20,Stability,cDNA display proteolysis,RL20_AQUAE_2023-08-07_b01.a2m,1,59,59,0.1,0.2,397758,0.814,48,104766.4,2182.633333,High,34,0.7083333333,Tsuboyama2023_Dataset2_Dataset43,ddG_ML_float,1,mut_type,RL20_AQUAE_theta0.2_2023-08-07_b01.npy,RL20_AQUAE.pdb,1-59,1,,Stability +RL40A_YEAST_Mavor_2016,RL40A_YEAST_Mavor_2016.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,FALSE,1253,1253,0,-0.2,manual,Mavor,Determination of ubiquitin fitness landscapes under different chemical stresses in a classroom setting,2016,10.7554/eLife.15802,2-76,Ubiquitin,Growth,Growth,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89,3974.4,44.65617978,medium,12,0.1348314607,RL401_YEAST_Mavor_2016.csv,DMSO,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,1-128,0.1,,OrganismalFitness +RL40A_YEAST_Roscoe_2013,RL40A_YEAST_Roscoe_2013.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,FALSE,1195,1195,0,-0.2,manual,Roscoe,Analyses of the Effects of All Ubiquitin Point Mutants on Yeast Growth Rate,2013,10.1016/j.jmb.2013.01.032,2-76,Ubiquitin,Growth (essential function),Growth,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89,3974.4,44.65617978,medium,12,0.1348314607,RL401_YEAST_Roscoe_2013.csv,Selection Coefficient,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,1-128,0.1,,OrganismalFitness +RL40A_YEAST_Roscoe_2014,RL40A_YEAST_Roscoe_2014.csv,RL40A_YEAST,Eukaryote,Saccharomyces cerevisiae S288C,MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK,128,FALSE,1380,1380,0,0.5,manual,Roscoe,"Systematic Exploration of Ubiquitin Sequence, E1 Activation Efficiency, and Experimental Fitness in Yeast",2014,10.1016/j.jmb.2014.05.019,2-76,Ubiquitin,E1 reactivity,Binding,RL40A_YEAST_full_11-26-2021_b01.a2m,1,128,128,0.1,0.2,16228,0.695,89,3974.4,44.65617978,medium,12,0.1348314607,RL401_YEAST_Roscoe_2014.csv,rel_react,1,mutant,RL40A_YEAST_theta_0.2.npy,RL40A_YEAST.pdb,1-128,0.1,,Activity +RNC_ECOLI_Weeks_2023,RNC_ECOLI_Weeks_2023.csv,RNC_ECOLI,Prokaryote,Escherichia coli,MNPIVINRLQRKLGYTFNHQELLQQALTHRSASSKHNERLEFLGDSILSYVIANALYHRFPRVDEGDMSRMRATLVRGNTLAELAREFELGECLRLGPGELKSGGFRRESILADTVEALIGGVFLDSDIQTVEKLILNWYQTRLDEISPGDKQKDPKTRLQEYLQGRHLPLPTYLVVQVRGEAHDQEFTIHCQVSGLSEPVVGTGSSRRKAEQAAAEQALKKLELE,226,FALSE,4277,4277,0,-0.054826707,median,Weeks,Fitness and Functional Landscapes of the E. coli RNase III Gene rnc,2023,10.1093/molbev/msad047,1-226,RNase III,Fluorescence,FACS,RNC_ECOLI_2023-08-07_b06.a2m,1,226,226,0.6,0.2,66507,0.969,219,16221.4,74.07031963,Medium,275,1.255707763,RNC_ECOLI_Weeks_2023.csv,Functional Score Weighted Mean,1,mutant,RNC_ECOLI_theta0.2_2023-08-07_b06.npy,RNC_ECOLI.pdb,1-226,1,,Activity +RPC1_BP434_Tsuboyama_2023_1R69,RPC1_BP434_Tsuboyama_2023_1R69.csv,RPC1_BP434,Virus,Enterobacteria phage 434,SISSRVKSKRIQLGLNQAELAQKVGTTQQSIEQLENGKTKRPRFLPELASALGVSVDWLLN,61,TRUE,1459,1124,335,-1.349855239,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-47,1-61,Repressor protein CI,Stability,cDNA display proteolysis,RPC1_BP434_2023-08-07_b05.a2m,1,61,61,0.5,0.01,820224,0.951,58,192520.2,3319.313793,High,73,1.25862069,Tsuboyama2023_Dataset2_Dataset44,ddG_ML_float,1,mut_type,RPC1_BP434_theta0.01_2023-08-07_b05.npy,RPC1_BP434.pdb,1-61,1,,Stability +RPC1_LAMBD_Li_2019_high-expression,RPC1_LAMBD_Li_2019_high-expression.csv,RPC1_LAMBD,Prokaryote,Escherichia phage lambda (Bacteriophage lambda),MSTKKKPLTQEQLEDARRLKAIYEKKKNELGLSQESVADKMGMGQSGVGALFNGINALNAYNAALLAKILKVSVEEFSPSIAREIYEMYEAVSMQPSLRSEYEYPVFSHVQAGMFSPELRTFTKGDAERWVSTTKKASDSAFWLEVEGNSMTAPTGSKPSFPDGMLILVDPEQAVEPGDFCIARLGGDEFTFKKLIRDSGQVFLQPLNPQYPMIPCNESCSVVGKVIASQWPEETFG,237,FALSE,351,351,0,7,manual,Li,Changes in gene expression predictably shift and switch genetic interactions,2019,10.1038/s41467-019-11735-3,19-77,CI,Repressor activity (FACS sorting for expression of GFP reporter),FACS,RPC1_LAMBD_2023-08-07_b03.a2m,1,237,237,0.3,0.2,100755,0.886,210,28172.8,134.1561905,High,219,1.042857143,,H_GFP_mean_scaled,-1,mut_proteingym,RPC1_LAMBD_theta0.2_2023-08-07_b03.npy,RPC1_LAMBD.pdb,1-237,1,18,Activity +RPC1_LAMBD_Li_2019_low-expression,RPC1_LAMBD_Li_2019_low-expression.csv,RPC1_LAMBD,Prokaryote,Escherichia phage lambda (Bacteriophage lambda),MSTKKKPLTQEQLEDARRLKAIYEKKKNELGLSQESVADKMGMGQSGVGALFNGINALNAYNAALLAKILKVSVEEFSPSIAREIYEMYEAVSMQPSLRSEYEYPVFSHVQAGMFSPELRTFTKGDAERWVSTTKKASDSAFWLEVEGNSMTAPTGSKPSFPDGMLILVDPEQAVEPGDFCIARLGGDEFTFKKLIRDSGQVFLQPLNPQYPMIPCNESCSVVGKVIASQWPEETFG,237,FALSE,351,351,0,8.481244509,median,Li,Changes in gene expression predictably shift and switch genetic interactions,2019,10.1038/s41467-019-11735-3,19-77,CI,Repressor activity (FACS sorting for expression of GFP reporter),FACS,RPC1_LAMBD_2023-08-07_b03.a2m,1,237,237,0.3,0.2,100755,0.886,210,28172.8,134.1561905,High,219,1.042857143,,L_GFP_mean_scaled,-1,mut_proteingym,RPC1_LAMBD_theta0.2_2023-08-07_b03.npy,RPC1_LAMBD.pdb,1-237,1,18,Activity +RS15_GEOSE_Tsuboyama_2023_1A32,RS15_GEOSE_Tsuboyama_2023_1A32.csv,RS15_GEOSE,Prokaryote,Geobacillus stearothermophilus,SPEVQIAILTEQINNLNEHLRVHKKDHHSRRGLLKMVGKRRRLLAYLRNKDVARYREIVEKLG,63,FALSE,1195,1195,0,-0.1292928041,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-48,1-63,Small ribosomal subunit protein uS15,Stability,cDNA display proteolysis,RS15_GEOSE_2023-08-07_b06.a2m,1,63,63,0.6,0.2,44428,1,63,4519.5,71.73809524,Medium,35,0.5555555556,Tsuboyama2023_Dataset2_Dataset45,ddG_ML_float,1,mut_type,RS15_GEOSE_theta0.2_2023-08-07_b06.npy,RS15_GEOSE.pdb,1-63,1,,Stability +S22A1_HUMAN_Yee_2023_abundance,S22A1_HUMAN_Yee_2023_abundance.csv,S22A1_HUMAN,Human,Homo sapiens,PTVDDILEQVGESGWFQKQAFLILCLLSAAFAPICVGIVFLGFTPDHHCQSPGVAELSQRCGWSPAEELNYTVPGLGPAGEAFLGQCRRYEVDWNQSALSCVDPLASLATNRSHLPLGPCQDGWVYDTPGSSIVTEFNLVCADSWKLDLFQSCLNAGFLFGSLGVGYFADRFGRKLCLLGTVLVNAVSGVLMAFSPNYMSMLLFRLLQGLVSKGNWMAGYTLITEFVGSGSRRTVAIMYQMAFTVGLVALTGLAYALPHWRWLQLAVSLPTFLFLLYYWCVPESPRWLLSQKRNTEAIKIMDHIAQKNGKLPPADLKMLSLEEDVTEKLSPSFADLFRTPRLRKRTFILMYLWFTDSVLYQGLILHMGATSGNLYLDFLYSALVEIPGAFIALITIDRVGRIYPMAMSNLLAGAACLVMIFISPDLHWLNIIIMCVGRMGITIAIQMICLVNAELYPTFVRNLGVMVCSSLCDIGGIITPFIVFRLREVWQALPLILFAVLGLLAAGVTLLLPETKGVALPETMKDAENLGRKAKPKENTIYLKVQTSEPSGT,553,FALSE,9803,9803,0,-1,manual,Yee,The full spectrum of OCT1 (SLC22A1) mutations bridges transporter biophysics to drug pharmacogenomics,2023,10.1101/2023.06.06.543963,1-549,Oct1,abundance,FACS,S22A1_HUMAN_2023-08-07_b02.a2m,1,553,553,0.2,0.2,198790,0.807,446,32557.5,72.99887892,Medium,485,1.087443946,543963_file04.xlsx,GFP_score,1,mutant,S22A1_HUMAN_theta0.2_2023-08-07_b02.npy,S22A1_HUMAN.pdb,1-553,1,,Expression +S22A1_HUMAN_Yee_2023_activity,S22A1_HUMAN_Yee_2023_activity.csv,S22A1_HUMAN,Human,Homo sapiens,PTVDDILEQVGESGWFQKQAFLILCLLSAAFAPICVGIVFLGFTPDHHCQSPGVAELSQRCGWSPAEELNYTVPGLGPAGEAFLGQCRRYEVDWNQSALSCVDPLASLATNRSHLPLGPCQDGWVYDTPGSSIVTEFNLVCADSWKLDLFQSCLNAGFLFGSLGVGYFADRFGRKLCLLGTVLVNAVSGVLMAFSPNYMSMLLFRLLQGLVSKGNWMAGYTLITEFVGSGSRRTVAIMYQMAFTVGLVALTGLAYALPHWRWLQLAVSLPTFLFLLYYWCVPESPRWLLSQKRNTEAIKIMDHIAQKNGKLPPADLKMLSLEEDVTEKLSPSFADLFRTPRLRKRTFILMYLWFTDSVLYQGLILHMGATSGNLYLDFLYSALVEIPGAFIALITIDRVGRIYPMAMSNLLAGAACLVMIFISPDLHWLNIIIMCVGRMGITIAIQMICLVNAELYPTFVRNLGVMVCSSLCDIGGIITPFIVFRLREVWQALPLILFAVLGLLAAGVTLLLPETKGVALPETMKDAENLGRKAKPKENTIYLKVQTSEPSGT,553,FALSE,10094,10094,0,1,manual,Yee,The full spectrum of OCT1 (SLC22A1) mutations bridges transporter biophysics to drug pharmacogenomics,2023,10.1101/2023.06.06.543963,1-549,Oct1,uptake of cytotoxic substrate,Growth,S22A1_HUMAN_2023-08-07_b02.a2m,1,553,553,0.2,0.2,198790,0.807,446,32557.5,72.99887892,Medium,485,1.087443946,543963_file04.xlsx,SM73_1_score,-1,mutant,S22A1_HUMAN_theta0.2_2023-08-07_b02.npy,S22A1_HUMAN.pdb,1-553,1,,Activity +SAV1_MOUSE_Tsuboyama_2023_2YSB,SAV1_MOUSE_Tsuboyama_2023_2YSB.csv,SAV1_MOUSE,Eukaryote,Mus musculus,GEDLPLPPGWSVDWTMRGRKYYIDHNTNTTHWSHPLESGPSSG,43,TRUE,965,679,286,-0.6280556038,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-49,1-43,Protein salvador homolog 1,Stability,cDNA display proteolysis,SAV1_MOUSE_2023-08-07_b06.a2m,1,43,43,0.6,0.2,177542,0.791,34,4627.6,136.1058824,High,14,0.4117647059,Tsuboyama2023_Dataset2_Dataset46,ddG_ML_float,1,mut_type,SAV1_MOUSE_theta0.2_2023-08-07_b06.npy,SAV1_MOUSE.pdb,1-43,1,,Stability +SBI_STAAM_Tsuboyama_2023_2JVG,SBI_STAAM_Tsuboyama_2023_2JVG.csv,SBI_STAAM,Prokaryote,Staphylococcus aureus,VRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVKEHLQKQLDALVAQKD,56,FALSE,1025,1025,0,-0.5166138978,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-50,1-56,Immunoglobulin-binding protein Sbi,Stability,cDNA display proteolysis,SBI_STAAM_2023-08-07_b04.a2m,1,56,56,0.4,0.2,14476,0.875,49,1539.4,31.41632653,Medium,21,0.4285714286,Tsuboyama2023_Dataset2_Dataset47,ddG_ML_float,1,mut_type,SBI_STAAM_theta0.2_2023-08-07_b04.npy,SBI_STAAM.pdb,1-56,1,,Stability +SC6A4_HUMAN_Young_2021,SC6A4_HUMAN_Young_2021.csv,SC6A4_HUMAN,Human,Homo sapiens,METTPLNSQKQLSACEDGEDCQENGVLQKVVPTPGDKVESGQISNGYSAVPSPGAGDDTRHSIPATTTTLVAELHQGERETWGKKVDFLLSVIGYAVDLGNVWRFPYICYQNGGGAFLLPYTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTIMAWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFSEDNITWTLHSTSPAEEFYTRHVLQIHRSKGLQDLGGISWQLALCIMLIFTVIYFSIWKGVKTSGKVVWVTATFPYIILSVLLVRGATLPGAWRGVLFYLKPNWQKLLETGVWIDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQDALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPASTFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHVWAKRRERFVLAVVITCFFGSLVTLTFGGAYVVKLLEEYATGPAVLTVALIEAVAVSWFYGITQFCRDVKEMLGFSPGWFWRICWVAISPLFLLFIICSFLMSPPQLRLFQYNYPYWSIILGYCIGTSSFICIPTYIAYRLIITPGTFKERIIKSITPETPTEIPCGDIRLNAV,630,FALSE,11576,11576,0,-0.1560688323,median,Young,Deep Mutagenesis of a Transporter for Uptake of a Non-Native Substrate Identifies Conformationally Dynamic Regions,2021,10.1101/2021.04.19.440442,2-630,Sodium-dependent serotonin transporter,Fluorescence,Fluorescence,SC6A4_HUMAN_full_11-26-2021_b02.a2m,1,630,630,0.2,0.2,40971,0.805,507,5278.9,10.41203156,medium,278,0.5483234714,SC6A4_HUMAN_Young_2021.csv,avg_MYC,1,mutant,SC6A4_HUMAN_theta_0.2.npy,SC6A4_HUMAN.pdb,1-630,0.1,,Activity +SCIN_STAAR_Tsuboyama_2023_2QFF,SCIN_STAAR_Tsuboyama_2023_2QFF.csv,SCIN_STAAR,Prokaryote,Staphylococcus aureus,QNEKLANELKSLLDELNVNELATGSLNTYYKRTIKISGQKAMYALKSKDFKKMSEAKYQLQKIYNEIDEA,70,FALSE,1212,1212,0,-0.4037152866,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-51,1-70,Staphylococcal complement inhibitor,Stability,cDNA display proteolysis,SCIN_STAAR_2023-08-07_b02.a2m,1,70,70,0.2,0.2,38043,0.9,63,11146.3,176.9253968,High,4,0.06349206349,Tsuboyama2023_Dataset2_Dataset48,ddG_ML_float,1,mut_type,SCIN_STAAR_theta0.2_2023-08-07_b02.npy,SCIN_STAAR.pdb,1-70,1,,Stability +SCN5A_HUMAN_Glazer_2019,SCN5A_HUMAN_Glazer_2019.csv,SCN5A_HUMAN,Human,Homo sapiens,MANFLLPRGTSSFRRFTRESLAAIEKRMAEKQARGSTTLQESREGLPEEEAPRPQLDLQASKKLPDLYGNPPQELIGEPLEDLDPFYSTQKTFIVLNKGKTIFRFSATNALYVLSPFHPIRRAAVKILVHSLFNMLIMCTILTNCVFMAQHDPPPWTKYVEYTFTAIYTFESLVKILARGFCLHAFTFLRDPWNWLDFSVIIMAYTTEFVDLGNVSALRTFRVLRALKTISVISGLKTIVGALIQSVKKLADVMVLTVFCLSVFALIGLQLFMGNLRHKCVRNFTALNGTNGSVEADGLVWESLDLYLSDPENYLLKNGTSDVLLCGNSSDAGTCPEGYRCLKAGENPDHGYTSFDSFAWAFLALFRLMTQDCWERLYQQTLRSAGKIYMIFFMLVIFLGSFYLVNLILAVVAMAYEEQNQATIAETEEKEKRFQEAMEMLKKEHEALTIRGVDTVSRSSLEMSPLAPVNSHERRSKRRKRMSSGTEECGEDRLPKSDSEDGPRAMNHLSLTRGLSRTSMKPRSSRGSIFTFRRRDLGSEADFADDENSTAGESESHHTSLLVPWPLRRTSAQGQPSPGTSAPGHALHGKKNSTVDCNGVVSLLGAGDPEATSPGSHLLRPVMLEHPPDTTTPSEEPGGPQMLTSQAPCVDGFEEPGARQRALSAVSVLTSALEELEESRHKCPPCWNRLAQRYLIWECCPLWMSIKQGVKLVVMDPFTDLTITMCIVLNTLFMALEHYNMTSEFEEMLQVGNLVFTGIFTAEMTFKIIALDPYYYFQQGWNIFDSIIVILSLMELGLSRMSNLSVLRSFRLLRVFKLAKSWPTLNTLIKIIGNSVGALGNLTLVLAIIVFIFAVVGMQLFGKNYSELRDSDSGLLPRWHMMDFFHAFLIIFRILCGEWIETMWDCMEVSGQSLCLLVFLLVMVIGNLVVLNLFLALLLSSFSADNLTAPDEDREMNNLQLALARIQRGLRFVKRTTWDFCCGLLRQRPQKPAALAAQGQLPSCIATPYSPPPPETEKVPPTRKETRFEEGEQPGQGTPGDPEPVCVPIAVAESDTDDQEEDEENSLGTEEESSKQQESQPVSGGPEAPPDSRTWSQVSATASSEAEASASQADWRQQWKAEPQAPGCGETPEDSCSEGSTADMTNTAELLEQIPDLGQDVKDPEDCFTEGCVRRCPCCAVDTTQAPGKVWWRLRKTCYHIVEHSWFETFIIFMILLSSGALAFEDIYLEERKTIKVLLEYADKMFTYVFVLEMLLKWVAYGFKKYFTNAWCWLDFLIVDVSLVSLVANTLGFAEMGPIKSLRTLRALRPLRALSRFEGMRVVVNALVGAIPSIMNVLLVCLIFWLIFSIMGVNLFAGKFGRCINQTEGDLPLNYTIVNNKSQCESLNLTGELYWTKVKVNFDNVGAGYLALLQVATFKGWMDIMYAAVDSRGYEEQPQWEYNLYMYIYFVIFIIFGSFFTLNLFIGVIIDNFNQQKKKLGGQDIFMTEEQKKYYNAMKKLGSKKPQKPIPRPLNKYQGFIFDIVTKQAFDVTIMFLICLNMVTMMVETDDQSPEKINILAKINLLFVAIFTGECIVKLAALRHYYFTNSWNIFDFVVVILSIVGTVLSDIIQKYFFSPTLFRVIRLARIGRILRLIRGAKGIRTLLFALMMSLPALFNIGLLLFLVMFIYSIFGMANFAYVKWEAGIDDMFNFQTFANSMLCLFQITTSAGWDGLLSPILNTGPPYCDPTLPNSNGSRGDCGSPAVGILFFTTYIIISFLIVVNMYIAIILENFSVATEESTEPLSEDDFDMFYEIWEKFDPEATQFIEYSVLSDFADALSEPLRIAKPNQISLINMDLPMVSGDRIHCMDILFAFTKRVLGESGEMDALKIQMEEKFMAANPSKISYEPITTTLRRKHEEVSAMVIQRAFRRHLLQRSLKHASFLFRQQAGSGLSEEDAPEREGLIAYVMSENFSRPLGPPSSSSISSTSFPPSYDSVTRATSDNLQVRGSDYSHSEDLADFPPSPDRDRESIV,2016,FALSE,224,224,0,-88.35,median,Glazer,Deep Mutational Scan of an SCN5A Voltage Sensor,2019,10.1161/CIRCGEN.119.002786,1621-1632,SCN5A,"drug resistance (triple-drug assay: veratridine + brevetoxin + ouabain; surrogate for sodium channel dysfunction, select against function)",,SCN5A_HUMAN_1611-1642_11-26-2021_b03.a2m,1611,1642,32,0.3,0.2,49973,0.812,26,743.1,28.58076923,medium,2,0.07692307692,SCN5A_HUMAN_Glazer_2019.csv,dms,-1,mutation,SCN5A_HUMAN_theta_0.2.npy,SCN5A_HUMAN.pdb,1-2016,0.1,,OrganismalFitness +SDA_BACSU_Tsuboyama_2023_1PV0,SDA_BACSU_Tsuboyama_2023_1PV0.csv,SDA_BACSU,Prokaryote,Bacillus subtilis,MRKLSDELLIESYFKATEMNLNRDFIELIENEIKRRSLGHIISV,44,TRUE,2770,834,1936,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-52,1-44,Sporulation inhibitor sda,Stability,cDNA display proteolysis,SDA_BACSU_2023-08-07_b05.a2m,1,44,44,0.5,0.2,1953,0.886,39,876.8,22.48205128,Medium,4,0.1025641026,Tsuboyama2023_Dataset2_Dataset49,ddG_ML_float,1,mut_type,SDA_BACSU_theta0.2_2023-08-07_b05.npy,SDA_BACSU.pdb,1-44,1,,Stability +SERC_HUMAN_Xie_2023,SERC_HUMAN_Xie_2023.csv,SERC_HUMAN,Human,Homo sapiens,MDAPRQVVNFGPGPAKLPHSVLLEIQKELLDYKGVGISVLEMSHRSSDFAKIINNTENLVRELLAVPDNYKVIFLQGGGCGQFSAVPLNLIGLKAGRCADYVVTGAWSAKAAEEAKKFGTINIVHPKLGSYTKIPDPSTWNLNPDASYVYYCANETVHGVEFDFIPDVKGAVLVCDMSSNFLSKPVDVSKFGVIFAGAQKNVGSAGVTVVIVRDDLLGFALRECPSVLEYKVQAGNSSLYNTPPCFSIYVMGLVLEWIKNNGGAAAMEKLSSIKSQTIYEIIDNSQGFYVCPVEPQNRSKMNIPFRIGNAKGDDALEKRFLDKALELNMLSLKGHRSVGGIRASLYNAVTIEDVQKLAAFMKKFLEMHQL,370,FALSE,1914,1914,0,0.9360658319,median,Xie,Predicting the functional effect of compound heterozygous genotypes from large scale variant effect maps,2023,10.1101/2023.01.11.523651,2-370,PSAT1,Yeast growth,,SERC_HUMAN_2023-08-07_b02.a2m,1,370,370,0.2,0.2,232438,0.949,351,42521.5,121.1438746,High,899,2.561253561,urn_mavedb_00000107-b-1_scores-2.csv,score,1,mutant,SERC_HUMAN_theta0.2_2023-08-07_b02.npy,SERC_HUMAN.pdb,1-370,1,,OrganismalFitness +SHOC2_HUMAN_Kwon_2022,SHOC2_HUMAN_Kwon_2022.csv,SHOC2_HUMAN,Human,Homo sapiens,MSSSLGKEKDSKEKDPKVPSAKEREKEAKASGGFGKESKEKEPKTKGKDAKDGKKDSSAAQPGVAFSVDNTIKRPNPAPGTRKKSSNAEVIKELNKCREENSMRLDLSKRSIHILPSSIKELTQLTELYLYSNKLQSLPAEVGCLVNLMTLALSENSLTSLPDSLDNLKKLRMLDLRHNKLREIPSVVYRLDSLTTLYLRFNRITTVEKDIKNLSKLSMLSIRENKIKQLPAEIGELCNLITLDVAHNQLEHLPKEIGNCTQITNLDLQHNELLDLPDTIGNLSSLSRLGLRYNRLSAIPRSLAKCSALEELNLENNNISTLPESLLSSLVKLNSLTLARNCFQLYPVGGPSQFSTIYSLNMEHNRINKIPFGIFSRAKVLSKLNMKDNQLTSLPLDFGTWTSMVELNLATNQLTKIPEDVSGLVSLEVLILSNNLLKKLPHGLGNLRKLRELDLEENKLESLPNEIAYLKDLQKLVLTNNQLTTLPRGIGHLTNLTHLGLGENLLTHLPEEIGTLENLEELYLNDNPNLHSLPFELALCSKLSIMSIENCPLSHLPPQIVAGGPSFIIQFLKMQGPYRAMV,582,FALSE,10972,10972,0,-0.34,median,Kwon,Structure–function analysis of the SHOC2–MRAS–PP1C holophosphatase complex,2022,10.1038/s41586-022-04928-2,2-582,Leucine-rich repeat protein SHOC-2,Drug resistance,Survival (dosed with trametinib),SHOC2_HUMAN_2023-10-12_b04.a2m,1,582,582,0.4,0.2,22163,0.777,452,8806.8,19.4840708,Medium,379,0.8384955752,2022.3.16.Extended Data Table 4.csv,LFC_scaled,1,variant.by.aa,SHOC2_HUMAN_theta0.2_2023-10-12_b04.npy,SHOC2_HUMAN.pdb,1-582,1,,OrganismalFitness +SOX30_HUMAN_Tsuboyama_2023_7JJK,SOX30_HUMAN_Tsuboyama_2023_7JJK.csv,SOX30_HUMAN,Human,Homo sapiens,RPMNAFMVWARIHRPALAKANPAANNAEISVQLGLEWNKLSEEQKKPYYDEAQKIKE,57,FALSE,1010,1010,0,-0.3216404755,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-53,1-57,Transcription factor SOX-30,Stability,cDNA display proteolysis,SOX30_HUMAN_2023-08-07_b03.a2m,1,57,57,0.3,0.2,158104,0.982,56,14909.6,266.2428571,High,36,0.6428571429,Tsuboyama2023_Dataset2_Dataset50,ddG_ML_float,1,mut_type,SOX30_HUMAN_theta0.2_2023-08-07_b03.npy,SOX30_HUMAN.pdb,1-57,1,,Stability +SPA_STAAU_Tsuboyama_2023_1LP1,SPA_STAAU_Tsuboyama_2023_1LP1.csv,SPA_STAAU,Prokaryote,Staphylococcus aureus,KFNKELSVAGREIVTLPNLNDPQKKAFIFSLWDDPSQSANLLAEAKKLNDAQAPK,55,TRUE,2105,1035,1070,-0.9794586971,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-54,1-55,Immunoglobulin G-binding protein A,Stability,cDNA display proteolysis,SPA_STAAU_2023-08-07_b04.a2m,1,55,55,0.4,0.2,184804,0.927,51,2042.1,40.04117647,Medium,25,0.4901960784,Tsuboyama2023_Dataset2_Dataset51,ddG_ML_float,1,mut_type,SPA_STAAU_theta0.2_2023-08-07_b04.npy,SPA_STAAU.pdb,1-55,1,,Stability +SPG1_STRSG_Olson_2014,SPG1_STRSG_Olson_2014.csv,SPG1_STRSG,Prokaryote,Streptococcus sp. group G,MEKEKKVKYFLRKSAFGLASVSAAFLVGSTVFAVDSPIEDTPIIRNGGELTNLLGNSETTLALRNEESATADLTAAAVADTVAAAAAENAGAAAWEAAAAADALAKAKADALKEFNKYGVSDYYKNLINNAKTVEGIKDLQAQVVESAKKARISEATDGLSDFLKSQTPAEDTVKSIELAEAKVLANRELDKYGVSDYHKNLINNAKTVEGVKELIDEILAALPKTDQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTEKPEVIDASELTPAVTTYKLVINGKTLKGETTTKAVDAETAEKAFKQYANDNGVDGVWTYDDATKTFTVTEMVTEVPGDAPTEPEKPEASIPLVPLTPATPIAKDDAKKDDTKKEDAKKPEAKKDDAKKAETLPTTGEGSNPFFTAAALAVMAGAGALAVASKRKED,448,TRUE,536962,1045,535917,-4,manual,Olson,A comprehensive biophysical description of pairwise epistasis throughout an entire protein domain,2014,10.1016/j.cub.2014.09.072,228-282,GB1,Binding (IgG),Binding,SPG1_STRSG_full_11-26-2021_b07.a2m,1,448,448,0.7,0.2,44,0.913,409,3.3,0.008068459658,low,0,0,SPG1_STRSG_Olson_2014.csv,lnW,1,mutant,SPG1_STRSG_theta_0.2.npy,SPG1_STRSG.pdb,1-448,0.1,,Binding +SPG1_STRSG_Wu_2016,SPG1_STRSG_Wu_2016.csv,SPG1_STRSG,Prokaryote,Streptococcus sp. group G,MEKEKKVKYFLRKSAFGLASVSAAFLVGSTVFAVDSPIEDTPIIRNGGELTNLLGNSETTLALRNEESATADLTAAAVADTVAAAAAENAGAAAWEAAAAADALAKAKADALKEFNKYGVSDYYKNLINNAKTVEGIKDLQAQVVESAKKARISEATDGLSDFLKSQTPAEDTVKSIELAEAKVLANRELDKYGVSDYHKNLINNAKTVEGVKELIDEILAALPKTDQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTEKPEVIDASELTPAVTTYKLVINGKTLKGETTTKAVDAETAEKAFKQYANDNGVDGVWTYDDATKTFTVTEMVTEVPGDAPTEPEKPEASIPLVPLTPATPIAKDDAKKDDTKKEDAKKPEAKKDDAKKAETLPTTGEGSNPFFTAAALAVMAGAGALAVASKRKED,448,TRUE,149360,76,149284,0.1224388752,median,Wu,Adaptation in protein fitness landscapes is facilitated by indirect paths,2016,10.7554/eLife.16965,265-280,GB1,Binding (IgG),binding,SPG1_STRSG_full_b0.1.a2m,1,448,448,0.1,0.2,3109,1,448,600.4,1.340178571,Medium,97,0.2165178571,SPG1_STRSG_Wu_2016.csv,Fitness,1,Variants,SPG1_STRSG_b01_theta_0.2.npy,SPG1_STRSG.pdb,1-448,1,,Binding +SPG2_STRSG_Tsuboyama_2023_5UBS,SPG2_STRSG_Tsuboyama_2023_5UBS.csv,SPG2_STRSG,Prokaryote,Streptococcus sp. group G,MTFKLIINGKTLKGETTTEAVDAATAEKVFKQYFNDNGIDGEWTYDDATKTFTITE,56,TRUE,1451,1029,422,-1.000627629,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-55,1-56,Immunoglobulin G-binding protein G,Stability,cDNA display proteolysis,SPG2_STRSG_2023-08-07_b03.a2m,1,56,56,0.3,0.2,39899,0.75,42,2567.6,61.13333333,Medium,6,0.1428571429,Tsuboyama2023_Dataset2_Dataset52,ddG_ML_float,1,mut_type,SPG2_STRSG_theta0.2_2023-08-07_b03.npy,SPG2_STRSG.pdb,1-56,1,,Stability +SPIKE_SARS2_Starr_2020_binding,SPIKE_SARS2_Starr_2020_binding.csv,SPIKE_SARS2,Virus,SARS-COV2,MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT,1273,FALSE,3802,3802,0,-0.5,manual,Starr,Deep Mutational Scanning of SARS-CoV-2 Receptor Binding Domain Reveals Constraints on Folding and ACE2 Binding,2020,10.1016/j.cell.2020.08.012,331-531,Spike RBD,ACE2 binding,Binding,SPIKE_SARS2_theta0.99_full_11-26-2021_b01.a2m,1,1273,1273,0.1,0.01,36931,0.998,1271,1405.2,1.105586153,medium,2059,1.619984264,SPIKE_SARS2_Starr_2020.csv,bind_avg,1,mutation,SPIKE_SARS2_theta_0.01.npy,SPIKE_SARS2.pdb,1-1273,0.1,,Binding +SPIKE_SARS2_Starr_2020_expression,SPIKE_SARS2_Starr_2020_expression.csv,SPIKE_SARS2,Virus,SARS-COV2,MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT,1273,FALSE,3798,3798,0,-1,manual,Starr,Deep Mutational Scanning of SARS-CoV-2 Receptor Binding Domain Reveals Constraints on Folding and ACE2 Binding,2020,10.1016/j.cell.2020.08.012,331-531,Spike RBD,ACE2 binding,Binding,SPIKE_SARS2_theta0.99_full_11-26-2021_b01.a2m,1,1273,1273,0.1,0.01,36931,0.998,1271,1405.2,1.105586153,medium,2059,1.619984264,SPIKE_SARS2_Starr_2020.csv,expr_avg,1,mutation,SPIKE_SARS2_theta_0.01.npy,SPIKE_SARS2.pdb,1-1273,0.1,,Expression +SPTN1_CHICK_Tsuboyama_2023_1TUD,SPTN1_CHICK_Tsuboyama_2023_1TUD.csv,SPTN1_CHICK,Eukaryote,Gallus gallus,RQGFVPAAYVKKLDSGTGKELVLALYDYQEKSPREVTMKKGDILTLLNSTNKDWWKVEVN,60,TRUE,3201,1051,2150,-2.360476078,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-56,1-60,"Spectrin alpha chain, non-erythrocytic 1",Stability,cDNA display proteolysis,SPTN1_CHICK_2023-08-07_b03.a2m,1,60,60,0.3,0.2,420793,0.933,56,15051.5,268.7767857,High,47,0.8392857143,Tsuboyama2023_Dataset2_Dataset53,ddG_ML_float,1,mut_type,SPTN1_CHICK_theta0.2_2023-08-07_b03.npy,SPTN1_CHICK.pdb,1-60,1,,Stability +SQSTM_MOUSE_Tsuboyama_2023_2RRU,SQSTM_MOUSE_Tsuboyama_2023_2RRU.csv,SQSTM_MOUSE,Eukaryote,Mus musculus,RLIESLSQMLSMGFSDEGGWLTRLLQTKNYDIGAALDTIQ,40,FALSE,707,707,0,-0.8554856463,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-57,1-40,Sequestosome-1,Stability,cDNA display proteolysis,SQSTM_MOUSE_2023-08-07_b05.a2m,1,40,40,0.5,0.2,34660,0.925,37,3244.5,87.68918919,Medium,13,0.3513513514,Tsuboyama2023_Dataset2_Dataset54,ddG_ML_float,1,mut_type,SQSTM_MOUSE_theta0.2_2023-08-07_b05.npy,SQSTM_MOUSE.pdb,1-40,1,,Stability +SR43C_ARATH_Tsuboyama_2023_2N88,SR43C_ARATH_Tsuboyama_2023_2N88.csv,SR43C_ARATH,Eukaryote,Arabidopsis thaliana,AVAESVIGKRVGDDGKTIEYLVKWTDMSDATWEPQDNVDSTLVLLYQQ,48,TRUE,1583,889,694,-1.591761235,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-58,1-48,"Signal recognition particle 43 kDa protein, chloroplastic",Stability,cDNA display proteolysis,SR43C_ARATH_2023-08-07_b02.a2m,1,48,48,0.2,0.2,101118,0.917,44,12180.6,276.8318182,High,26,0.5909090909,Tsuboyama2023_Dataset2_Dataset55,ddG_ML_float,1,mut_type,SR43C_ARATH_theta0.2_2023-08-07_b02.npy,SR43C_ARATH.pdb,1-48,1,,Stability +SRBS1_HUMAN_Tsuboyama_2023_2O2W,SRBS1_HUMAN_Tsuboyama_2023_2O2W.csv,SRBS1_HUMAN,Human,Homo sapiens,GIDPFTGEAIAKFNFNGDTQVEMSFRKGERITLLRQVDENWYEGRIPGTSRQGIFPITYVDVIKRPL,67,TRUE,1556,1211,345,-1.169019411,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-59,1-67,Sorbin and SH3 domain-containing protein 1,Stability,cDNA display proteolysis,SRBS1_HUMAN_2023-08-07_b03.a2m,1,67,67,0.3,0.2,708655,0.836,56,22689,405.1607143,High,60,1.071428571,Tsuboyama2023_Dataset2_Dataset56,ddG_ML_float,1,mut_type,SRBS1_HUMAN_theta0.2_2023-08-07_b03.npy,SRBS1_HUMAN.pdb,1-67,1,,Stability +SRC_HUMAN_Ahler_2019,SRC_HUMAN_Ahler_2019.csv,SRC_HUMAN,Human,Homo sapiens,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,FALSE,3372,3372,0,-1,manual,Ahler,"A Combined Approach Reveals a Regulatory Mechanism Coupling Src's Kinase Activity, Localization, and Phosphotransferase-Independent Functions",2019,10.1016/j.molcel.2019.02.003,270-519,SRC,growth (surrogate for phosphorylation activity),Growth,SRC_HUMAN_full_11-26-2021_b06.a2m,1,536,536,0.6,0.2,26974,0.808,433,1405.1,3.245034642,medium,86,0.1986143187,SRC_HUMAN_Ahler_CD_2019.csv,Activity_Score,1,mutant_uniprot_1,SRC_HUMAN_theta_0.2.npy,SRC_HUMAN.pdb,1-536,0.1,,Activity +SRC_HUMAN_Chakraborty_2023_binding-DAS_25uM,SRC_HUMAN_Chakraborty_2023_binding-DAS_25uM.csv,SRC_HUMAN,Human,S. Cerevisiae,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,FALSE,3637,3637,0,-0.086077486,median,Chakraborty,Profiling of the drug resistance of thousands of Src tyrosine kinase mutants uncovers a regulatory network that couples autoinhibition to catalytic domain dynamics,2022,10.1101/2021.12.05.471322,270-519,SRC,Fluorescence measurement,,SRC_HUMAN_2023-08-07_b06.a2m,1,536,536,0.6,0.2,37675,0.869,466,1789,3.839055794,Medium,117,0.2510729614,GSE190495_Src_DAS_25_Score.csv,DMS_score,1,mutant,SRC_HUMAN_theta0.2_2023-08-07_b06.npy,SRC_HUMAN.pdb,1-536,1,,Activity +SRC_HUMAN_Nguyen_2022,SRC_HUMAN_Nguyen_2022.csv,SRC_HUMAN,Human,Human,MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,536,FALSE,3366,3366,0,0.535786927,median,Nguyen,Molecular Determinants of Hsp90 Dependence of Src Kinase Revealed by Deep Mutational Scanning,2022,10.1002/pro.4656,270-519,SRC,growth enrichment,,SRC_HUMAN_2023-08-07_b06.a2m,1,536,536,0.6,0.2,37675,0.869,466,1789,3.839055794,Medium,117,0.2510729614,diffsel_calib.csv,diffsel,-1,mutant,SRC_HUMAN_theta0.2_2023-08-07_b06.npy,SRC_HUMAN.pdb,1-536,1,,OrganismalFitness +SUMO1_HUMAN_Weile_2017,SUMO1_HUMAN_Weile_2017.csv,SUMO1_HUMAN,Human,Homo sapiens,MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV,101,FALSE,1700,1700,0,0.3,manual,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-97,Small ubiquitin-related modifier 1,Yeast growth,complementation,SUMO1_HUMAN_full_11-26-2021_b02.a2m,1,101,101,0.2,0.2,85570,0.703,71,13120.2,184.7915493,high,67,0.9436619718,SUMO1_HUMAN_Weile_2017.csv,screenscore,1,mutant,SUMO1_HUMAN_theta_0.2.npy,SUMO1_HUMAN.pdb,1-101,0.1,,OrganismalFitness +SYUA_HUMAN_Newberry_2020,SYUA_HUMAN_Newberry_2020.csv,SYUA_HUMAN,Human,Homo sapiens,MDVYMKGLSKAKEGVVAAAEKTKQGVAEAAGKTKEGVLFVGSKTKEGVVHGVATVAEKTKEQVTNVGGAVVTGVTAVAQKTVEGAGSIAAATGYVKKDQLGKNEEGAPQEGILEDMPVDPDNEAFEMPSEEGFQDFEPEA,140,FALSE,2497,2497,0,-0.1,manual,Newberry,Robust Sequence Determinants of α-Synuclein Toxicity in Yeast Implicate Membrane Binding,2020,10.1021/acschembio.0c00339,1-140,alpha-synuclein,Growth,Growth,SYUA_HUMAN_full_04-29-2022_b01.a2m,1,140,140,0.1,0.2,15711,0.707,99,6509.6,65.75353535,medium,62,0.6262626263,SYUA_HUMAN_Newberry_2020.csv,Fitness Score,-1,mutant,SYUA_HUMAN_theta_0.2.npy,SYUA_HUMAN.pdb,1-140,0.1,,OrganismalFitness +TADBP_HUMAN_Bolognesi_2019,TADBP_HUMAN_Bolognesi_2019.csv,TADBP_HUMAN,Human,Homo sapiens,MSEYIRVTEDENDEPIEIPSEDDGTVLLSTVTAQFPGACGLRYRNPVSQCMRGVRLVEGILHAPDAGWGNLVYVVNYPKDNKRKMDETDASSAVKVKRAVQKTSDLIVLGLPWKTTEQDLKEYFSTFGEVLMVQVKKDLKTGHSKGFGFVRFTEYETQVKVMSQRHMIDGRWCDCKLPNSKQSQDEPLRSRKVFVGRCTEDMTEDELREFFSQYGDVMDVFIPKPFRAFAFVTFADDQIAQSLCGEDLIIKGISVHISNAEPKHNSNRQLERSGRFGGNPGGFGNQGGFGNSRGGGAGLGNNQGSNMGGGMNFGAFSINPAMMAAAQAALQSSWGMMGMLASQQNQSGPSGNNQNQGNMQREPNQAFGSGNNSYSGSNSGAAIGWGSASNAGSGSGFNGGFGSSMDSKSSGWGM,414,FALSE,1196,1196,0,0.003661517102,median,Bolognesi,The mutational landscape of a prion-like domain,2019,10.1038/s41467-019-12101-z,290-373,TARDBP,growth (surrogate for toxicity),Growth,TADBP_HUMAN_full_11-26-2021_b09.a2m,1,414,414,0.9,0.2,1211,0.911,377,147.3,0.3907161804,low,8,0.02122015915,TADBP_HUMAN_Bolognesi_2019.csv,toxicity,1,mutant_uniprot_1,TADBP_HUMAN_theta_0.2.npy,TADBP_HUMAN.pdb,1-414,0.1,,OrganismalFitness +TAT_HV1BR_Fernandes_2016,TAT_HV1BR_Fernandes_2016.csv,TAT_HV1BR,Virus,Human immunodeficiency virus type 1 group M subtype B (isolate BRU/LAI) (HIV-1),MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFMTKALGISYGRKKRRQRRRAHQNSQTHQASLSKQPTSQSRGDPTGPKE,86,FALSE,1577,1577,0,-0.2,manual,Fernandes,Functional Segregation of Overlapping Genes in HIV,2016,10.1016/j.cell.2016.11.031,1-86,HIV tat,Viral replication,Growth,TAT_HV1BR_full_theta0.99_04-29-2022_b09.a2m,1,86,86,0.9,0.01,12155,0.988,85,9925,116.7647059,high,49,0.5764705882,TAT_HV1BR_Fernandes_2016.csv,sel_coeff_mean,1,mutant,TAT_HV1BR_theta_0.01.npy,TAT_HV1BR.pdb,1-86,0.1,,OrganismalFitness +TCRG1_MOUSE_Tsuboyama_2023_1E0L,TCRG1_MOUSE_Tsuboyama_2023_1E0L.csv,TCRG1_MOUSE,Eukaryote,Mus musculus,GATAVSEWTEYKTADGKTYYYNNRTLESTWEKPQELK,37,TRUE,1058,621,437,-1.2,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-60,1-37,Transcription elongation regulator 1,Stability,cDNA display proteolysis,TCRG1_MOUSE_2023-08-07_b08.a2m,1,37,37,0.8,0.2,43363,0.865,32,2819.7,88.115625,Medium,14,0.4375,Tsuboyama2023_Dataset2_Dataset57,ddG_ML_float,1,mut_type,TCRG1_MOUSE_theta0.2_2023-08-07_b08.npy,TCRG1_MOUSE.pdb,1-37,1,,Stability +THO1_YEAST_Tsuboyama_2023_2WQG,THO1_YEAST_Tsuboyama_2023_2WQG.csv,THO1_YEAST,Eukaryote,Saccharomyces cerevisiae,SADYSSLTVVQLKDLLTKRNLSVGGLKNEWVQRLIKDDEES,41,TRUE,1279,656,623,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-61,1-41,Protein THO1,Stability,cDNA display proteolysis,THO1_YEAST_2023-08-07_b05.a2m,1,41,41,0.5,0.2,54877,0.805,33,8516.7,258.0818182,High,15,0.4545454545,Tsuboyama2023_Dataset2_Dataset58,ddG_ML_float,1,mut_type,THO1_YEAST_theta0.2_2023-08-07_b05.npy,THO1_YEAST.pdb,1-41,1,,Stability +TNKS2_HUMAN_Tsuboyama_2023_5JRT,TNKS2_HUMAN_Tsuboyama_2023_5JRT.csv,TNKS2_HUMAN,Human,Homo sapiens,FSITQFVRNLGLEHLMDIFEREQITLRVLVEMGHKELKEIGINAYGHREKLIKGVERLI,59,TRUE,1479,1118,361,-0.9451205822,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-62,1-59,Poly [ADP-ribose] polymerase tankyrase-2,Stability,cDNA display proteolysis,TNKS2_HUMAN_2023-08-07_b03.a2m,1,59,59,0.3,0.2,270654,0.949,56,11206,200.1071429,High,26,0.4642857143,Tsuboyama2023_Dataset2_Dataset59,ddG_ML_float,1,mut_type,TNKS2_HUMAN_theta0.2_2023-08-07_b03.npy,TNKS2_HUMAN.pdb,1-59,1,,Stability +TPK1_HUMAN_Weile_2017,TPK1_HUMAN_Weile_2017.csv,TPK1_HUMAN,Human,Homo sapiens,MEHAFTPLEPLLSTGNLKYCLVILNQPLDNYFRHLWNKALLRACADGGANRLYDITEGERESFLPEFINGDFDSIRPEVREYYATKGCELISTPDQDHTDFTKCLKMLQKKIEEKDLKVDVIVTLGGLAGRFDQIMASVNTLFQATHITPFPIIIIQEESLIYLLQPGKHRLHVDTGMEGDWCGLIPVGQPCMQVTTTGLKWNLTNDVLAFGTLVSTSNTYDGSGVVTVETDHPLLWTMAIKS,243,FALSE,3181,3181,0,0.5,manual,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,2-243,Thiamin pyrophosphokinase 1,Yeast growth,complementation,TPK1_HUMAN_full_11-26-2021_b02.a2m,1,243,243,0.2,0.2,21515,0.823,200,7122.6,35.613,medium,234,1.17,TPK1_HUMAN_Weile_2017.csv,screenscore,1,mutant,TPK1_HUMAN_theta_0.2.npy,TPK1_HUMAN.pdb,1-243,0.1,,OrganismalFitness +TPMT_HUMAN_Matreyek_2018,TPMT_HUMAN_Matreyek_2018.csv,TPMT_HUMAN,Human,Homo sapiens,MDGTRTSLDIEEYSDTEVQKNQVLTLEEWQDKWVNGKTAFHQEQGHQLLKKHLDTFLKGKSGLRVFFPLCGKAVEMKWFADRGHSVVGVEISELGIQEFFTEQNLSYSEEPITEIPGTKVFKSSSGNISLYCCSIFDLPRTNIGKFDMIWDRGALVAINPGDRKCYADTMFSLLGKKFQYLLCVLSYDPTKHPGPPFYVPHAEIERLFGKICNIRCLEKVDAFEERHKSWGIDCLFEKLYLLTEK,245,FALSE,3648,3648,0,0.5,manual,Matreyek,Multiplex Assessment of Protein Variant Abundance by Massively Parallel Sequencing,2018,10.1038/s41588-018-0122-z,1-245,Thiopurine S-methyltransferase,Protein abundance (FACS sorting for abundance of GFP-fused target),Protein stability,TPMT_HUMAN_full_11-26-2021_b03.a2m,1,245,245,0.3,0.2,19526,0.731,179,6296.8,35.17765363,medium,109,0.6089385475,TPMT_HUMAN_Matreyek_2018.csv,score,1,mutant,TPMT_HUMAN_theta_0.2.npy,TPMT_HUMAN.pdb,1-245,0.1,,Expression +TPOR_HUMAN_Bridgford_2020,TPOR_HUMAN_Bridgford_2020.csv,TPOR_HUMAN,Human,Homo sapiens,MPSWALFMVTSCLLLAPQNLAQVSSQDVSLLASDSEPLKCFSRTFEDLTCFWDEEEAAPSGTYQLLYAYPREKPRACPLSSQSMPHFGTRYVCQFPDQEEVRLFFPLHLWVKNVFLNQTRTQRVLFVDSVGLPAPPSIIKAMGGSQPGELQISWEEPAPEISDFLRYELRYGPRDPKNSTGPTVIQLIATETCCPALQRPHSASALDQSPCAQPTMPWQDGPKQTSPSREASALTAEGGSCLISGLQPGNSYWLQLRSEPDGISLGGSWGSWSLPVTVDLPGDAVALGLQCFTLDLKNVTCQWQQQDHASSQGFFYHSRARCCPRDRYPIWENCEEEEKTNPGLQTPQFSRCHFKSRNDSIIHILVEVTTAPGTVHSYLGSPFWIHQAVRLPTPNLHWREISSGHLELEWQHPSSWAAQETCYQLRYTGEGHQDWKVLEPPLGARGGTLELRPRSRYRLQLRARLNGPTYQGPWSSWSDPTRVETATETAWISLVTALHLVLGLNAVLGLLLLRWQFPAHYRRLRHALWPSLPDLHRVLGQYLRDTAALSPPKATVSDTCEEVEPSLLEILPKSSERTPLPLCSSQAQMDYRRLQPSCLGTMPLSVCPPMAESGSCCTTHIANHSYLPLSYWQQP,635,FALSE,562,562,0,-0.1,manual,Bridgford,Novel drivers and modifiers of MPL-dependent oncogenic transformation identified by deep mutational scanning,2020,10.1182/blood.2019002561,487-517,MPL,growth/survival (surrogate for TpoR/MPL enhanced constitutive activation),Growth,TPOR_HUMAN_full_11-26-2021_b01.a2m,1,635,635,0.1,0.2,937,0.825,524,128.4,0.2450381679,low,0,0,TPOR_HUMAN_Bridgford_S505N_2020.csv,score,1,mutant_uniprot_1,TPOR_HUMAN_theta_0.2.npy,TPOR_HUMAN.pdb,1-635,0.1,,OrganismalFitness +TRPC_SACS2_Chan_2017,TRPC_SACS2_Chan_2017.csv,TRPC_SACS2,Prokaryote,Thermus thermophilus,MPRYLKGWLKDVVQLSLRRPSFRASRQRPIISLNERILEFNKRNITAIIAEYKRKSPSGLDVERDPIEYSKFMERYAVGLSILTEEKYFNGSYETLRKIASSVSIPILMKDFIVKESQIDDAYNLGADTVLLIVKILTERELESLLEYARSYGMEPLIEINDENDLDIALRIGARFIGINSRDLETLEINKENQRKLISMIPSNVVKVAESGISERNEIEELRKLGVNAFLIGSSLMRNPEKIKEFIL,248,FALSE,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,44-235,TIM Barrell (S. solfataricus),fitness,Growth,TRPC_SACS2_full_11-26-2021_b07.a2m,1,248,248,0.7,0.2,52935,0.944,234,10651.1,45.51752137,medium,364,1.555555556,TRPC_SACS2_Chan_2017.csv,fitness,1,mutant,TRPC_SACS2_theta_0.2.npy,TRPC_SACS2.pdb,1-248,0.1,,OrganismalFitness +TRPC_THEMA_Chan_2017,TRPC_THEMA_Chan_2017.csv,TRPC_THEMA,Prokaryote,Thermus thermophilus,MRRLWEIVEAKKKDILEIDGENLIVQRRNHRFLEVLSGKERVKIIAEFKKASPSAGDINADASLEDFIRMYDELADAISILTEKHYFKGDPAFVRAARNLTSRPILAKDFYIDTVQVKLASSVGADAILIIARILTAEQIKEIYEAAEELGMDSLVEVHSREDLEKVFSVIRPKIIGINTRDLDTFEIKKNVLWELLPLVPDDTVVVAESGIKDPRELKDLRGKVNAVLVGTSIMKAENPRRFLEEMRAWSE,252,FALSE,1519,1519,0,-0.5,manual,Chan,Correlation of fitness landscapes from three orthologous TIM barrels originates from sequence and structure constraints,2017,10.1038/ncomms14614,40-233,TIM Barrell (T. maritima),fitness,Growth,TRPC_THEMA_full_11-26-2021_b07.a2m,1,252,252,0.7,0.2,52988,0.948,239,10582.5,44.27824268,medium,380,1.589958159,TRPC_THEMA_Chan_2017.csv,fitness,1,mutant,TRPC_THEMA_theta_0.2.npy,TRPC_THEMA.pdb,1-252,0.1,,OrganismalFitness +UBC9_HUMAN_Weile_2017,UBC9_HUMAN_Weile_2017.csv,UBC9_HUMAN,Human,Homo sapiens,MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPSY,159,FALSE,2563,2563,0,0.384407289,median,Weile,A framework for exhaustively mapping functional missense variants,2017,10.15252/msb.20177908,1-158,SUMO-conjugating enzyme UBC9,Yeast growth,complementation,UBC9_HUMAN_full_11-26-2021_b03.a2m,1,159,159,0.3,0.2,69788,0.849,135,8394,62.17777778,medium,89,0.6592592593,UBC9_HUMAN_Weile_2017.csv,screenscore,1,mutant,UBC9_HUMAN_theta_0.2.npy,UBC9_HUMAN.pdb,1-159,0.1,,OrganismalFitness +UBE4B_HUMAN_Tsuboyama_2023_3L1X,UBE4B_HUMAN_Tsuboyama_2023_3L1X.csv,UBE4B_HUMAN,Human,Homo sapiens,DAPDEFRDPLMDTLMTDPVRLPSGTIMDRSIILRHLLNSPTDPFNRQTLTESMLEPVPELKEQIQAWMR,69,TRUE,3622,1118,2504,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-63,1-69,Ubiquitin conjugation factor E4 B,Stability,cDNA display proteolysis,UBE4B_HUMAN_2023-08-07_b04.a2m,1,69,69,0.4,0.2,310943,0.928,64,34185.4,534.146875,High,52,0.8125,Tsuboyama2023_Dataset2_Dataset60,ddG_ML_float,1,mut_type,UBE4B_HUMAN_theta0.2_2023-08-07_b04.npy,UBE4B_HUMAN.pdb,1-69,1,,Stability +UBE4B_MOUSE_Starita_2013,UBE4B_MOUSE_Starita_2013.csv,UBE4B_MOUSE,Eukaryote,Mus musculus,MEELSADEIRRRRLARLAGGQTSQPTTPLTSPQRENPPGPPIAASAPGPSQSLGLNVHNMTPATSPIGAAGVAHRSQSSEGVSSLSSSPSNSLETQSQSLSRSQSMDIDGVSCEKSMSQVDVDSGIENMEVDENDRREKRSLSDKEPSSGPEVSEEQALQLVCKIFRVSWKDRDRDVIFLSSLSAQFKQNPKEVFSDFKDLIGQILMEVLMMSTQTRDENPFASLTATSQPIATAARSPDRNLMLNTGSSSGTSPMFCNMGSFSTSSLSSLGASGGASNWDSYSDHFTIETCKETDMLNYLIECFDRVGIEEKKAPKMCSQPAVSQLLSNIRSQCISHTALVLQGSLTQPRSLQQPSFLVPYMLCRNLPYGFIQELVRTTHQDEEVFKQIFIPILQGLALAAKECSLESDYFKYPLMALGELCETKFGKTHPMCNLVASLPLWLPKSLSPGSGRELQRLSYLGAFFSFSVFAEDDAKVVEKYFSGPAITLENTRVVSQSLQHYLELGRQELFKILHSILLNGETREAALSYMAALVNANMKKAQMQADDRLVSTDGFMLNLLWVLQQLSTKIKLETVDPTYIFHPRCRITLPNDETRINATMEDVNERLTELYGDQPPFSEPKFPTECFFLTLHAHHLSILPSCRRYIRRLRAIRELNRTVEDLKNNESQWKDSPLATRHREMLKRCKTQLKKLVRCKACADAGLLDESFLRRCLNFYGLLIQLMLRILDPAYPDVTLPLNSEVPKVFAALPEFYVEDVAEFLFFIVQYSPQVLYEPCTQDIVMFLVVMLCNQNYIRNPYLVAKLVEVMFMTNPSVQPRTQKFFEMIENHPLSTKLLVPSLMKFYTDVEHTGATSEFYDKFTIRYHISTIFKSLWQNIAHHGTFMEEFNSGKQFVRYINMLINDTTFLLDESLESLKRIHEVQEEMKNKEQWDQLPRDQQQARQSQLAQDERVSRSYLALATETVDMFHLLTKQVQKPFLRPELGPRLAAMLNFNLQQLCGPKCRDLKVENPEKYGFEPKKLLDQLTDIYLQLDCARFAKAIADDQRSYSKELFEEVISKMRKAGIKSTIAIEKFKLLAEKVEEIVAKNARAEIDYSDAPDEFRDPLMDTLMTDPVRLPSGTVMDRSIILRHLLNSPTDPFNRQMLTESMLEPVPELKEQIQAWMREKQSSDH,1173,FALSE,899,899,0,-1.8,manual,Starita,Activity-enhancing mutations in an E3 ubiquitin ligase identified by high-throughput mutagenesis,2013,10.1073/pnas.1303309110,1072-1173,Ube4b,Ligase activity (phage display),Auto-ubiquitination,UBE4B_MOUSE_full_11-26-2021_b05.a2m,1,1173,1173,0.5,0.2,4743,0.765,897,679.4,0.7574136009,low,49,0.05462653289,UBE4B_MOUSE_Starita_2013.csv,log2_ratio,1,mutant,UBE4B_MOUSE_theta_0.2.npy,UBE4B_MOUSE.pdb,1-1173,0.1,,Activity +UBR5_HUMAN_Tsuboyama_2023_1I2T,UBR5_HUMAN_Tsuboyama_2023_1I2T.csv,UBR5_HUMAN,Human,Homo sapiens,HRQALGERLYPRVQAMQPAFASKITGMLLELSPAQLLLLLASEDSLRARVDEAMELII,58,TRUE,1453,1094,359,-0.4460165437,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-64,1-58,E3 ubiquitin-protein ligase UBR5,Stability,cDNA display proteolysis,UBR5_HUMAN_2023-08-07_b05.a2m,1,58,58,0.5,0.2,17888,0.966,56,1031.7,18.42321429,Medium,14,0.25,Tsuboyama2023_Dataset2_Dataset61,ddG_ML_float,1,mut_type,UBR5_HUMAN_theta0.2_2023-08-07_b05.npy,UBR5_HUMAN.pdb,1-58,1,,Stability +VG08_BPP22_Tsuboyama_2023_2GP8,VG08_BPP22_Tsuboyama_2023_2GP8.csv,VG08_BPP22,Virus,Salmonella phage P22,ITGDVSAANKDAIRKQMDAAASKGDVETYRKLKAKLKGIR,40,FALSE,723,723,0,-0.2013306011,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-65,1-40,Scaffolding protein,Stability,cDNA display proteolysis,VG08_BPP22_2023-08-07_b05.a2m,1,40,40,0.5,0.01,102464,0.875,35,12963.6,370.3885714,High,13,0.3714285714,Tsuboyama2023_Dataset2_Dataset62,ddG_ML_float,1,mut_type,VG08_BPP22_theta0.01_2023-08-07_b05.npy,VG08_BPP22.pdb,1-40,1,,Stability +VILI_CHICK_Tsuboyama_2023_1YU5,VILI_CHICK_Tsuboyama_2023_1YU5.csv,VILI_CHICK,Eukaryote,Gallus gallus,KLETFPLDVLVNTAAEDLPRGVDPSRKENHLSDEDFKAVFGMTRSAFANLPLWKQQNLKKEKGLF,65,TRUE,2568,1202,1366,-0.7,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-66,1-65,Villin-1,Stability,cDNA display proteolysis,VILI_CHICK_2023-08-07_b01.a2m,1,65,65,0.1,0.2,254210,0.769,50,46507.8,930.156,High,19,0.38,Tsuboyama2023_Dataset2_Dataset63,ddG_ML_float,1,mut_type,VILI_CHICK_theta0.2_2023-08-07_b01.npy,VILI_CHICK.pdb,1-65,1,,Stability +VKOR1_HUMAN_Chiasson_2020_abundance,VKOR1_HUMAN_Chiasson_2020_abundance.csv,VKOR1_HUMAN,Human,Homo sapiens,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,163,FALSE,2695,2695,0,0.7480893367,median,Chiasson,"Multiplexed measurement of variant abundance and activity reveals VKOR topology, active site and human variant impact",2020,10.7554/eLife.58026,2-163,VKORC1,protein abundance (eGFP fusion reporter),Fluorescence,VKOR1_HUMAN_full_11-26-2021_b03.a2m,1,163,163,0.3,0.2,14510,0.779,127,4655,36.65354331,medium,97,0.7637795276,VKOR1_HUMAN_Chiasson_2020.csv,abundance_score,1,variant,VKOR1_HUMAN_theta_0.2.npy,VKOR1_HUMAN.pdb,1-163,0.1,,Expression +VKOR1_HUMAN_Chiasson_2020_activity,VKOR1_HUMAN_Chiasson_2020_activity.csv,VKOR1_HUMAN,Human,Homo sapiens,MGSTWGSPGWVRLALCLTGLVLSLYALHVKAARARDRDYRALCDVGTAISCSRVFSSRWGRGFGLVEHVLGQDSILNQSNSIFGCIFYTLQLLLGCLRTRWASVLMLLSSLVSLAGSVYLAWILFFVLYDFCIVCITTYAINVSLMWLSFRKVQEPQGKAKRH,163,FALSE,697,697,0,0.7,manual,Chiasson,"Multiplexed measurement of variant abundance and activity reveals VKOR topology, active site and human variant impact",2020,10.7554/eLife.58026,3-163,VKORC1,carboxylation activity (carboxylation reporter on cell surface),enzymatic activity,VKOR1_HUMAN_full_11-26-2021_b03.a2m,1,163,163,0.3,0.2,14510,0.779,127,4655,36.65354331,medium,97,0.7637795276,VKOR1_HUMAN_Chiasson_2020.csv,activity_score,1,variant,VKOR1_HUMAN_theta_0.2.npy,VKOR1_HUMAN.pdb,1-163,0.1,,Activity +VRPI_BPT7_Tsuboyama_2023_2WNM,VRPI_BPT7_Tsuboyama_2023_2WNM.csv,VRPI_BPT7,Virus,Escherichia phage,SLSVDNKKFWATVESSEHSFEVPIYAETLDEALELAEWQYVPAGFEVTRVRPCVAP,56,FALSE,1047,1047,0,-1.1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-67,1-56,Bacterial RNA polymerase inhibitor,Stability,cDNA display proteolysis,VRPI_BPT7_2023-08-07_b02.a2m,1,56,56,0.2,0.01,6266,0.875,49,1555.8,31.75102041,Medium,3,0.0612244898,Tsuboyama2023_Dataset2_Dataset64,ddG_ML_float,1,mut_type,VRPI_BPT7_theta0.01_2023-08-07_b02.npy,VRPI_BPT7.pdb,1-56,1,,Stability +YAIA_ECOLI_Tsuboyama_2023_2KVT,YAIA_ECOLI_Tsuboyama_2023_2KVT.csv,YAIA_ECOLI,Prokaryote,Escherichia coli,PREAYIVTIEKGKPGQTVTWYQLRADHPKPDSLISEHPTAQEAMDAKKRYED,52,TRUE,1890,928,962,-1.953132017,median,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-68,1-52,Uncharacterized protein YaiA,Stability,cDNA display proteolysis,YAIA_ECOLI_2023-08-07_b03.a2m,1,52,52,0.3,0.2,5877,0.788,41,737.2,17.9804878,Medium,5,0.1219512195,Tsuboyama2023_Dataset2_Dataset65,ddG_ML_float,1,mut_type,YAIA_ECOLI_theta0.2_2023-08-07_b03.npy,YAIA_ECOLI.pdb,1-52,1,,Stability +YAP1_HUMAN_Araya_2012,YAP1_HUMAN_Araya_2012.csv,YAP1_HUMAN,Human,Homo sapiens,MDPGQQPPPQPAPQGQGQPPSQPPQGQGPPSGPGQPAPAATQAAPQAPPAGHQIVHVRGDSETDLEALFNAVMNPKTANVPQTVPMRLRKLPDSFFKPPEPKSHSRQASTDAGTAGALTPQHVRAHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLRQSSFEIPDDVPLPAGWEMAKTSSGQRYFLNHIDQTTTWQDPRKAMLSQMNVTAPTSPPVQQNMMNSASGPLPDGWEQAMTQDGEIYYINHKNKTTSWLDPRLDPRFAMNQRISQSAPVKQPPPLAPQSPQGGVMGGSNSNQQQQMRLQQLQMEKERLRLKQQELLRQAMRNINPSTANSPKCQELALRSQLPTLEQDGGTQNPVSSPGMSQELRTMTTNSSDPFLNSGTYHSRDESTDSGLSMSSYSVPRTPDDFLNSVDEMDTGDTINQSTLPSQQNRFPDYLEAIPGTNVDLGTLEGDGMNIEGEELMPSLQEALSSDILNDMESVLAATKLDKESFLTWL,504,TRUE,10075,362,9713,0.6236402571,median,Araya,"A fundamental protein property, thermodynamic stability, revealed solely from large-scale measurements of protein function",2012,10.1073/pnas.1209751109,170-203,YAP1,peptide binding,Binding,YAP1_HUMAN_full_11-26-2021_b02.a2m,1,504,504,0.2,0.2,1604,0.859,433,132.6,0.3062355658,low,1,0.002309468822,YAP1_HUMAN_Araya_2012.csv,W,1,mutant,YAP1_HUMAN_theta_0.2.npy,YAP1_HUMAN.pdb,1-504,0.1,,Binding +YNZC_BACSU_Tsuboyama_2023_2JVD,YNZC_BACSU_Tsuboyama_2023_2JVD.csv,YNZC_BACSU,Prokaryote,Bacillus subtilis,MISNAKIARINELAAKAKAGVITEEEKAEQQKLRQEYLK,39,TRUE,2300,714,1586,-1,manual,Tsuboyama,Mega-scale experimental analysis of protein folding stability in biology and design,2023,10.1038/s41586-023-06328-69,1-39,UPF0291 protein YnzC,Stability,cDNA display proteolysis,YNZC_BACSU_2023-08-07_b07.a2m,1,39,39,0.7,0.2,7116,0.974,38,1588.3,41.79736842,Medium,13,0.3421052632,Tsuboyama2023_Dataset2_Dataset66,ddG_ML_float,1,mut_type,YNZC_BACSU_theta0.2_2023-08-07_b07.npy,YNZC_BACSU.pdb,1-39,1,,Stability \ No newline at end of file diff --git a/scripts/scoring_DMS_zero_shot/scoring_CARP_substitutions.sh b/scripts/scoring_DMS_zero_shot/scoring_CARP_substitutions.sh new file mode 100644 index 0000000..ea7b973 --- /dev/null +++ b/scripts/scoring_DMS_zero_shot/scoring_CARP_substitutions.sh @@ -0,0 +1,16 @@ +source ../zero_shot_config.sh +source activate proteingym_env + +export model_name="carp_640M" #[carp_600k|carp_38M|carp_76M|carp_640M] +export model_path="Path to CARP checkpoints" +export DMS_output_score_folder=${DMS_output_score_folder_subs}/CARP +export performance_file='CARP_640M_performance.csv' + +srun python3 ../../proteingym/baselines/carp_mif/compute_fitness.py \ + --model_name ${model_name} \ + --model_path ${model_path} \ + --DMS_reference_file_path ${DMS_reference_file_path_subs} \ + --DMS_data_folder ${DMS_data_folder_subs} \ + --DMS_index $SLURM_ARRAY_TASK_ID \ + --output_scores_folder ${DMS_output_score_folder} \ + --performance_file ${performance_file} \ No newline at end of file diff --git a/scripts/scoring_DMS_zero_shot/score_GEMME_substitutions.sh b/scripts/scoring_DMS_zero_shot/scoring_GEMME_substitutions.sh similarity index 100% rename from scripts/scoring_DMS_zero_shot/score_GEMME_substitutions.sh rename to scripts/scoring_DMS_zero_shot/scoring_GEMME_substitutions.sh diff --git a/scripts/scoring_DMS_zero_shot/scoring_MIF_substitutions.sh b/scripts/scoring_DMS_zero_shot/scoring_MIF_substitutions.sh new file mode 100644 index 0000000..e66b97a --- /dev/null +++ b/scripts/scoring_DMS_zero_shot/scoring_MIF_substitutions.sh @@ -0,0 +1,17 @@ +source ../zero_shot_config.sh +source activate proteingym_env + +export model_name="mifst" #[mif|mifst] +export model_path="Path to MIFST checkpoints" +export DMS_output_score_folder=${DMS_output_score_folder_subs}/MIFST +export performance_file='MIFST_performance.csv' + +srun python3 ../../proteingym/baselines/carp_mif/compute_fitness.py \ + --model_name ${model_name} \ + --model_path ${model_path} \ + --DMS_reference_file_path ${DMS_reference_file_path_subs} \ + --DMS_data_folder ${DMS_data_folder_subs} \ + --DMS_index $SLURM_ARRAY_TASK_ID \ + --output_scores_folder ${DMS_output_score_folder} \ + --performance_file ${performance_file} \ + --structure_data_folder ${DMS_structure_folder} \ No newline at end of file