diff --git a/backend/src/app.py b/backend/src/app.py index e6c2065..7e9c077 100644 --- a/backend/src/app.py +++ b/backend/src/app.py @@ -16,6 +16,7 @@ from save_document import pack_document, unpack_document from app_state import create_app_state from validations import validate_fasta +import cluster import platform from Bio import SeqIO import psutil @@ -497,8 +498,22 @@ def load_data_and_stats(self, doc_id: str): cols_path = os.path.join(cols_dir, f"{cols_file_base}_cols.csv") if os.path.exists(cols_path): - identity_scores = read_csv(cols_path, skiprows=1).values.tolist() + cols_data = read_csv(cols_path, skiprows=1).values.tolist() + + id_map = {} + identity_scores = [] + + for row in cols_data: + a, b = row[:2] + if a not in id_map: + id_map[a] = len(id_map) + if b not in id_map: + id_map[b] = len(id_map) + identity_scores.append([id_map[a], id_map[b]] + list(row[2:])) + + ids = list(id_map.keys()) else: + ids = [] identity_scores = [] df = read_csv( @@ -514,10 +529,10 @@ def load_data_and_stats(self, doc_id: str): max_val = int(nanmax(data_no_diag)) # TODO might be able to make one tick text object for both to use? - return data, tick_text, min_val, max_val, identity_scores, stats_df + return data, tick_text, min_val, max_val, ids, identity_scores, stats_df def get_data(self, doc_id: str): - data, tick_text, min_val, max_val, identity_scores, stats_df = ( + data, tick_text, min_val, max_val, ids, identity_scores, stats_df = ( self.load_data_and_stats(doc_id) ) heat_data = DataFrame(data, index=tick_text) @@ -526,11 +541,24 @@ def get_data(self, doc_id: str): data_to_dump = dict( metadata=dict(minVal=min_val, maxVal=max_val), data=([tick_text] + parsedData), + ids=ids, identity_scores=identity_scores, full_stats=stats_df.values.tolist() ) return json.dumps(data_to_dump) + def generate_cluster_data(self, doc_id: str, threshold_one: int, threshold_two: int = 0): + doc = get_document(doc_id) + if doc is None: + raise Exception(f"Could not find document: {doc_id}") + matrix_path = get_matrix_path(doc) + + df = cluster.export(matrix_path, threshold_one, threshold_two, False) + df = df.rename(columns={str(df.columns[0]): 'id', str(df.columns[1]): 'group'}) + if len(df.columns) > 2 and df.columns[2] is not None: + df = df.rename(columns={str(df.columns[2]): 'subgroup'}) + return df.to_dict(orient="records") + def new_doc(self): id = make_doc_id() new_document(id) @@ -570,6 +598,7 @@ def save_doc_settings(self, args: dict): args["id"], dataView=args["dataView"], heatmap=args["heatmap"], + clustermap=args["clustermap"], distribution=args["distribution"], ) doc = get_document(args["id"]) diff --git a/backend/src/cluster.py b/backend/src/cluster.py index 7341993..0e9de5b 100644 --- a/backend/src/cluster.py +++ b/backend/src/cluster.py @@ -1,98 +1,127 @@ import os import numpy as np -from scipy.sparse import csr_matrix -from scipy.sparse.csgraph import connected_components import pandas as pd from collections import defaultdict +import networkx as nx - -def process_groups(threshold, data, index): - # create adjacensy matrix to id which cells are related by the threshold marking as binary with (1) for related or (0) for not meeting the threshold - adjacency_matrix = (data >= threshold).astype(int) - # create sparse matrix(absent 0s) for memeory efficiancy - sparse_matrix = csr_matrix(adjacency_matrix) - # identify connected components - _, labels = connected_components( - csgraph=sparse_matrix, directed=False, return_labels=True - ) - groups_dict = defaultdict(list) - for i, label in enumerate(labels): - groups_dict[label].append(index[i]) - groups = {} - for indx, clade in enumerate(labels): - groups.update({indx: clade}) - return groups_dict - +## switching from scipy to networkx. takes two threshold inputs now +def process_groups(data, index, threshold_1, threshold_2=0): + # check for a threshold 2 + if threshold_2 is None or threshold_2 == 0: + # set all values in the matrix that meet threshold 1 to 1 and all lower or NaN values to 0 + adjacency_matrix = (~np.isnan(data) & (data >= threshold_1)).astype(int) + # Create a graph from the adjacency matrix + G1 = nx.from_numpy_array(adjacency_matrix, parallel_edges=False, create_using=None) + + # empty dict to store the groups + groups_dict = defaultdict(list) + + # look for connected components in graph G1 + for i, component in enumerate(nx.connected_components(G1)): + for node_idx in component: + groups_dict[i].append(index[node_idx]) + + return groups_dict + else: + # Create two adjacency matrices, one for each threshold ~ is the bitwise 'not' operator + adjacency_1 = (~np.isnan(data) & (data >= threshold_1)).astype(int) + adjacency_2 = (~np.isnan(data) & (data >= threshold_2)).astype(int) + + # convert adjacency matrices to networkx graphs + G1 = nx.from_numpy_array(adjacency_1) + G2 = nx.from_numpy_array(adjacency_2) + + # find primary clusters with threshold_1 + groups_dict_1 = defaultdict(list) + for i, component in enumerate(nx.connected_components(G1)): + for node_idx in component: + groups_dict_1[i].append(index[node_idx]) + + # find subclusters with threshold_2 + groups_dict_2 = defaultdict(list) + for i, component in enumerate(nx.connected_components(G2)): + for node_idx in component: + groups_dict_2[i].append(index[node_idx]) + + # return both cluster sets + return groups_dict_1, groups_dict_2 def cluster_by_identity(clusters, nodes): output = [] reverse_clusters = {} - - # Create reverse lookup dictionary were values in value list are extracted to key and groups are assigned to value + + # create lookup table from sequence ID to primary cluster ID for group, values in clusters.items(): for value in values: reverse_clusters[value] = group - - # Initialize subgroup counters + + # initialize counters for subgroups within each primary cluster subgroup_counters = {group: 1 for group in clusters.keys()} - - # Iterate through nodes to determine the subgroup_number within each group_number + + # assign subgroups within each primary cluster for _, node_list in nodes.items(): if node_list: + # get first node to determine which primary cluster this belongs to first_value = node_list[0] if first_value in reverse_clusters: + # get primary cluster ID (add 1 for human-readable indexing) group_number = reverse_clusters[first_value] + 1 + # get next available subgroup number for this primary cluster subgroup_number = subgroup_counters[reverse_clusters[first_value]] + + # process all nodes in this subcluster for value in node_list: + # only include if node belongs to the same primary cluster if value in reverse_clusters: output.append((value, group_number, subgroup_number)) + + # increment subgroup counter for this primary cluster subgroup_counters[reverse_clusters[first_value]] += 1 - + return output -def export(matrix_path, threshold_1=79, threshold_2=0): +def export(matrix_path, threshold_1=79, threshold_2=0, save_csv=True): output_dir = os.path.dirname(matrix_path) file_name = os.path.basename(matrix_path) file_base, _ = os.path.splitext(file_name) file_name = file_base.replace("_mat", "") output_file = os.path.join(output_dir, file_name + "_cluster.csv") - - # https://stackoverflow.com/a/57824142 - # SDT1 matrix CSVs do not have padding for columns + with open(matrix_path, "r") as temp_f: col_count = [len(l.split(",")) for l in temp_f.readlines()] column_names = [i for i in range(0, max(col_count))] - + df = pd.read_csv( matrix_path, delimiter=",", index_col=0, header=None, names=column_names ) - # extract index + index = df.index.tolist() - # convert df to np array data = df.to_numpy() - # format values data = np.round(data, 2) - # maintain order of threshold processing + if threshold_2 != 0 and threshold_1 >= threshold_2: threshold_1, threshold_2 = threshold_2, threshold_1 - # handle instances of no threshold_2 + if threshold_2 is None or threshold_2 == 0: - output = process_groups(threshold_1, data, index) + output = process_groups(data, index, threshold_1) flattened_output = [ (item, key + 1) for key, sublist in output.items() for item in sublist ] - df = pd.DataFrame(flattened_output) - df.columns = ["ID", "Group 1 - Theshold: " + str(threshold_1)] - + df_result = pd.DataFrame(flattened_output) + df_result.columns = ["SeqID", "Group - Threshold: " + str(threshold_1)] else: - clusters = process_groups(threshold_1, data, index) - nodes = process_groups(threshold_2, data, index) + clusters, nodes = process_groups(data, index, threshold_1, threshold_2) output = cluster_by_identity(clusters, nodes) - df = pd.DataFrame(output) - df.columns = [ + df_result = pd.DataFrame(output) + df_result.columns = [ "ID", - "Group 1 - Theshold: " + str(threshold_1), - "Group 2 - Theshold: " + str(threshold_2), + "Group - Threshold: " + str(threshold_1), + "Subgroup - Threshold: " + str(threshold_2), ] - df.to_csv(output_file, index=False) + + if save_csv: + df_result.to_csv(output_file, index=False) + + return df_result + diff --git a/backend/src/document_state.py b/backend/src/document_state.py index a949533..73472a2 100644 --- a/backend/src/document_state.py +++ b/backend/src/document_state.py @@ -23,6 +23,7 @@ "validation_error_id", "compute_stats", "heatmap", + "clustermap", "distribution" ], ) @@ -34,9 +35,7 @@ vmin=65, cellspace=1, annotation=False, - annotation_font_size=10, annotation_rounding=0, - annotation_alpha="0", showscale=True, titleFont="Sans Serif", showTitles=False, @@ -48,14 +47,30 @@ cbar_aspect=2.5, cbar_pad=10, axis_labels=False, - axlabel_xrotation=270, - axlabel_xfontsize=12, - axlabel_yrotation=360, - axlabel_yfontsize=12, + axlabel_xrotation=0, + axlabel_fontsize=12, + axlabel_yrotation=0, cutoff_1=95, cutoff_2=75 ) +default_clustermap_state = dict( + threshold_one=85, + threshold_two=0, + annotation=False, + titleFont="Sans Serif", + showTitles=False, + title="", + subtitle="", + xtitle="", + ytitle="", + axis_labels=False, + axlabel_xrotation=0, + axlabel_fontsize=12, + axlabel_yrotation=0, + cellspace=1, +) + visualization_defaults = dict( plotTitle="Distribution of Percent Identities", lineColor="hsl(9, 100%, 64%)", @@ -163,6 +178,7 @@ def create_document_state( validation_error_id=None, compute_stats=None, heatmap=default_heatmap_state, + clustermap=default_clustermap_state, distribution=default_distribution_state ): if filetype == "application/vnd.sdt" and tempdir_path: @@ -191,6 +207,7 @@ def create_document_state( validation_error_id=validation_error_id, compute_stats=compute_stats, heatmap=heatmap, + clustermap=clustermap, distribution=distribution ) @@ -210,6 +227,7 @@ def save_doc_settings(doc_state: DocState): settings = { "dataView": doc_state.dataView, "heatmap": doc_state.heatmap, + "clustermap": doc_state.clustermap, "distribution": doc_state.distribution } json.dump(settings, f, indent=2) diff --git a/backend/src/export_data.py b/backend/src/export_data.py index 66a8eb8..1029d43 100644 --- a/backend/src/export_data.py +++ b/backend/src/export_data.py @@ -6,7 +6,7 @@ import cluster from constants import data_file_suffixes -image_types = ["heatmap", "histogram", "violin"] +image_types = ["heatmap", "clustermap", "histogram", "violin"] def find_source_files(state: DocState, prefix, suffixes): with os.scandir(state.tempdir_path) as entries: @@ -44,7 +44,6 @@ def prepare_export_data(export_path: str, matrix_path: str, doc: DocState, args: if args["output_cluster"] == True: suffixes.append("_cluster") - # TODO: it's not intuitive that an export happens in here, move it outside? cluster.export( matrix_path, args["cluster_threshold_one"], @@ -62,8 +61,8 @@ def prepare_export_data(export_path: str, matrix_path: str, doc: DocState, args: ) image_filenames = { - img_type: f"{base_filename}_{img_type}.{image_format}" - for img_type in image_types + image_type: f"{base_filename}_{image_type}.{image_format}" + for image_type in image_types } image_destinations = { @@ -86,9 +85,9 @@ def do_export_data(export_path, image_destinations, image_format, doc, prefix, s shutil.copy2(entry.path, temp_destination_path) os.replace(temp_destination_path, destination_path) - for img_type in image_types: + for image_type in image_types: save_image_from_api( - data=args[f"{img_type}_image_data"], + data=args[f"{image_type}_image_data"], format=image_format, - destination=image_destinations[img_type], + destination=image_destinations[image_type], ) diff --git a/bun.lock b/bun.lock index b58ff1e..46c42c6 100644 --- a/bun.lock +++ b/bun.lock @@ -11,6 +11,7 @@ "react": "^18.2.0", "react-aria-components": "^1.5.0", "react-dom": "^18.2.0", + "react-icons": "^5.5.0", "react-plotly.js": "^2.6.0", "tinycolor2": "^1.6.0", "zod": "^3.23.8", @@ -1020,6 +1021,8 @@ "react-dom": ["react-dom@18.2.0", "", { "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.0" }, "peerDependencies": { "react": "^18.2.0" } }, "sha512-6IMTriUmvsjHUjNtEDudZfuDQUoWXVxKHhlEGSk81n4YFS+r/Kl99wXiwlVXtPBtJenozv2P+hxDsw9eA7Xo6g=="], + "react-icons": ["react-icons@5.5.0", "", { "peerDependencies": { "react": "*" } }, "sha512-MEFcXdkP3dLo8uumGI5xN3lDFNsRtrjbOEKDLD7yv76v4wpnEq2Lt2qeHaQOr34I/wPN3s3+N08WkQ+CW37Xiw=="], + "react-is": ["react-is@16.13.1", "", {}, "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="], "react-plotly.js": ["react-plotly.js@2.6.0", "", { "dependencies": { "prop-types": "^15.8.1" }, "peerDependencies": { "plotly.js": ">1.34.0", "react": ">0.13.0" } }, "sha512-g93xcyhAVCSt9kV1svqG1clAEdL6k3U+jjuSzfTV7owaSU9Go6Ph8bl25J+jKfKvIGAEYpe4qj++WHJuc9IaeA=="], diff --git a/docs/images/2AppRun.png b/docs/images/2AppRun.png new file mode 100644 index 0000000..ea220b0 Binary files /dev/null and b/docs/images/2AppRun.png differ diff --git a/docs/images/3AppRun.png b/docs/images/3AppRun.png new file mode 100644 index 0000000..1fc3d18 Binary files /dev/null and b/docs/images/3AppRun.png differ diff --git a/docs/images/4AppRun.png b/docs/images/4AppRun.png new file mode 100644 index 0000000..f69bd39 Binary files /dev/null and b/docs/images/4AppRun.png differ diff --git a/docs/images/AppRun.png b/docs/images/AppRun.png new file mode 100644 index 0000000..f81973c Binary files /dev/null and b/docs/images/AppRun.png differ diff --git a/docs/images/Clustermap.png b/docs/images/Clustermap.png new file mode 100644 index 0000000..9f5f39a Binary files /dev/null and b/docs/images/Clustermap.png differ diff --git a/docs/images/ColorScale.png b/docs/images/ColorScale.png new file mode 100644 index 0000000..7ebadeb Binary files /dev/null and b/docs/images/ColorScale.png differ diff --git a/docs/images/DataType.png b/docs/images/DataType.png new file mode 100644 index 0000000..95d1878 Binary files /dev/null and b/docs/images/DataType.png differ diff --git a/docs/images/Discrete.png b/docs/images/Discrete.png new file mode 100644 index 0000000..0f38a9d Binary files /dev/null and b/docs/images/Discrete.png differ diff --git a/docs/images/Export.png b/docs/images/Export.png new file mode 100644 index 0000000..28b2d4f Binary files /dev/null and b/docs/images/Export.png differ diff --git a/docs/images/FileMenu.png b/docs/images/FileMenu.png new file mode 100644 index 0000000..bc5d323 Binary files /dev/null and b/docs/images/FileMenu.png differ diff --git a/docs/images/Heatmap.png b/docs/images/Heatmap.png new file mode 100644 index 0000000..ffb1ca2 Binary files /dev/null and b/docs/images/Heatmap.png differ diff --git a/docs/images/HeatmapBar.png b/docs/images/HeatmapBar.png new file mode 100644 index 0000000..cc19200 Binary files /dev/null and b/docs/images/HeatmapBar.png differ diff --git a/docs/images/HistoBar.png b/docs/images/HistoBar.png new file mode 100644 index 0000000..d693bca Binary files /dev/null and b/docs/images/HistoBar.png differ diff --git a/docs/images/Histogram.png b/docs/images/Histogram.png new file mode 100644 index 0000000..e9d5b1b Binary files /dev/null and b/docs/images/Histogram.png differ diff --git a/docs/images/LeftSidebar.png b/docs/images/LeftSidebar.png new file mode 100644 index 0000000..c616163 Binary files /dev/null and b/docs/images/LeftSidebar.png differ diff --git a/docs/images/LoadMain.png b/docs/images/LoadMain.png new file mode 100644 index 0000000..53ab5aa Binary files /dev/null and b/docs/images/LoadMain.png differ diff --git a/docs/images/Loader.png b/docs/images/Loader.png new file mode 100644 index 0000000..e19d0bd Binary files /dev/null and b/docs/images/Loader.png differ diff --git a/docs/images/Tabs.png b/docs/images/Tabs.png new file mode 100644 index 0000000..a58ebda Binary files /dev/null and b/docs/images/Tabs.png differ diff --git a/docs/images/Violin.png b/docs/images/Violin.png new file mode 100644 index 0000000..0056e45 Binary files /dev/null and b/docs/images/Violin.png differ diff --git a/docs/images/ViolinBar.png b/docs/images/ViolinBar.png new file mode 100644 index 0000000..72924c9 Binary files /dev/null and b/docs/images/ViolinBar.png differ diff --git a/docs/images/ViolinButtons.png b/docs/images/ViolinButtons.png new file mode 100644 index 0000000..5b70531 Binary files /dev/null and b/docs/images/ViolinButtons.png differ diff --git a/docs/images/app.png b/docs/images/app.png deleted file mode 100644 index 92b0b99..0000000 Binary files a/docs/images/app.png and /dev/null differ diff --git a/docs/images/manual-01.jpg b/docs/images/manual-01.jpg deleted file mode 100644 index 0d13ab4..0000000 Binary files a/docs/images/manual-01.jpg and /dev/null differ diff --git a/docs/images/manual-02.jpg b/docs/images/manual-02.jpg deleted file mode 100644 index 5c4b3bc..0000000 Binary files a/docs/images/manual-02.jpg and /dev/null differ diff --git a/docs/images/manual-06.jpg b/docs/images/manual-06.jpg deleted file mode 100644 index d16c0fb..0000000 Binary files a/docs/images/manual-06.jpg and /dev/null differ diff --git a/docs/images/manual-07.jpg b/docs/images/manual-07.jpg deleted file mode 100644 index 9d7a0ac..0000000 Binary files a/docs/images/manual-07.jpg and /dev/null differ diff --git a/docs/images/manual-08.gif b/docs/images/manual-08.gif deleted file mode 100644 index 77c0df3..0000000 Binary files a/docs/images/manual-08.gif and /dev/null differ diff --git a/docs/images/manual-09.jpg b/docs/images/manual-09.jpg deleted file mode 100644 index f3449a0..0000000 Binary files a/docs/images/manual-09.jpg and /dev/null differ diff --git a/docs/images/manual-10.jpg b/docs/images/manual-10.jpg deleted file mode 100644 index b95c35f..0000000 Binary files a/docs/images/manual-10.jpg and /dev/null differ diff --git a/docs/images/manual-11.jpg b/docs/images/manual-11.jpg deleted file mode 100644 index 1558fd9..0000000 Binary files a/docs/images/manual-11.jpg and /dev/null differ diff --git a/docs/images/manual-12.jpg b/docs/images/manual-12.jpg deleted file mode 100644 index 0227f20..0000000 Binary files a/docs/images/manual-12.jpg and /dev/null differ diff --git a/docs/images/manual-14.gif b/docs/images/manual-14.gif deleted file mode 100644 index 4f04efb..0000000 Binary files a/docs/images/manual-14.gif and /dev/null differ diff --git a/docs/images/manual-16.jpg b/docs/images/manual-16.jpg deleted file mode 100644 index 0227f20..0000000 Binary files a/docs/images/manual-16.jpg and /dev/null differ diff --git a/docs/images/manual-advanced.jpg b/docs/images/manual-advanced.jpg deleted file mode 100644 index cc2496f..0000000 Binary files a/docs/images/manual-advanced.jpg and /dev/null differ diff --git a/docs/images/manual-export.jpg b/docs/images/manual-export.jpg deleted file mode 100644 index 9be3fed..0000000 Binary files a/docs/images/manual-export.jpg and /dev/null differ diff --git a/docs/images/manual-menu.jpg b/docs/images/manual-menu.jpg deleted file mode 100644 index 4eb9afc..0000000 Binary files a/docs/images/manual-menu.jpg and /dev/null differ diff --git a/docs/images/manual-run.jpg b/docs/images/manual-run.jpg deleted file mode 100644 index 95ba0b9..0000000 Binary files a/docs/images/manual-run.jpg and /dev/null differ diff --git a/docs/images/manual-run.png b/docs/images/manual-run.png deleted file mode 100644 index b48ce23..0000000 Binary files a/docs/images/manual-run.png and /dev/null differ diff --git a/docs/manual.html b/docs/manual.html index feef3fe..1e80532 100644 --- a/docs/manual.html +++ b/docs/manual.html @@ -1,132 +1,547 @@ -
- - -- SDT 2.0.0 is a standalone application for Windows/Mac/Linux. It allows - analysis of FASTA files or SDT/SDT2 matrices using pairwise alignments - and computes pairwise identity scores. Clustering is available via - Neighbor-Joining or UPGMA. Results are visualized as a heatmap and a - distribution plot. -
-- Select a file via Select File. For FASTA, the - Run Options screen is loaded. For SDT/SDT2 matrices, the - Viewer screen is loaded. -
+ + + ++ Sequence Demarcation Tool 2 (SDT 2) is a standalone application for Windows, Mac, and Linux, created as the modern + successor to the original SDT. Like its predecessor, SDT2 is designed to explore, visualize, and demarcate + biological sequences through lightning-fast global pairwise sequence alignments. + SDT2 employs the highly optimized Parasail library for Needleman-Wunsch global sequence alignments. Copmbined with + seamless multiprocessing + support, SDT2 is capable of handling large datasets and longer sequence lengths. + Alignments can be organized into easy-to-interpret clusters using Neighbor + Joining or UPGMA phylogenies. Additionally, SDT2 utilizes the powerful D3 graphing library to visualize pairwise + sequence identities in a highly customizable, interactive lower triangle heatmap, which can be exported as + high-resolution vector images. The tool also provides distribution statistics for pairwise sequence identity values, + sequence lengths, and GC content, which can be displayed in either histogram or violin plot formats. +
+ ++ When first launched SDT2, the loading interface provides two primary options for loading data: +
+For FASTA files, the Runner Interface screen is loaded to prepare for sequence alignment and analysis.
+For SDT/SDT2 matrices, theViewer screen is loaded, and the pre-run data is displayed as a heatmap.
+Relocated files will still show in the Recent Files directory if you have moved them since the last they were used
++ Once a file is selected, the application interface displays several key components and controls. +
+ + +The core selection feature in SDT2 significantly increases both compute speed and memory usage during analysis. The + application leverages Python's Multiprocessing capabilities to dramatically increase processing speed through + parallelization. By distributing the computational workload across multiple CPU cores, analysis times can be reduced + by orders of magnitude. + However, users should be aware that increased parallelization comes with higher memory requirements. Each additional + core allocated to the analysis requires its own memory space, causing total memory usage to scale with the number of + cores selected. The application provides best-estimate memory usage indicators to help users understand these + increasing memory requirements and make appropriate selections based on their available system resources.
+Note: Users on systems that can use hard disk space as a page file to increase available memory may be + able to align very long sequences (100-200kb) by setting cores to a minimal value. However, this approach may + cause system instability and should be used at your own risk. Even with additional virtual memory, the system + performance may degrade significantly when physical memory limits are exceeded.
+Run: Start the sequence comparison process.
+Select Folder: Choose an output directory for alignments.
+Select...: Choose the folder for alignment output files.
+Note: Displaying percentage values in large datasets may significantly impact rendering performance.
++ SDT2 provides visualization tools for examining the distribution of your sequence data: + Histogram and Violin plots. +
-+ Histograms divide your data into bins and display the frequency of values within each bin as + bars. This helps identify patterns, peaks, and outliers in your sequence data. +
++ Violin plots combine box plots with kernel density plots to show the distribution shape, + central tendency, and variability of your data. +
+Note: For large sequence sets, consider hiding individual data points to improve performance.
+If "Start Analysis" is disabled, check:
+If performance is slow:
+SDT2 Documentation © 2023-2025 | Version 2.0.0 Beta 4
+ + + +