From 4a205ac815374b31bdc4141f98f2479f95f78ffd Mon Sep 17 00:00:00 2001
From: Leon Oostrum <l.oostrum@esciencecenter.nl>
Date: Fri, 26 Jul 2024 17:45:40 +0200
Subject: [PATCH] Add .clang-format

---
 .clang-format           | 280 ++++++++++++++++++++++++++++++++++++++++
 .pre-commit-config.yaml |   4 +-
 include/tcbf.h          |  17 ++-
 src/main.cu             |  23 ++--
 src/prepare_a_matrix.cu |  92 +++++--------
 src/tcbf.cu             |  50 +++----
 6 files changed, 349 insertions(+), 117 deletions(-)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..1390c85
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,280 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  AlignFunctionPointers: false
+  PadOperators:    false
+AlignConsecutiveShortCaseStatements:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCaseColons: false
+AlignEscapedNewlines: Left
+AlignOperands:   Align
+AlignTrailingComments:
+  Kind:            Always
+  OverEmptyLines:  0
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowBreakBeforeNoexceptSpecifier: Never
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortCompoundRequirementOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+AttributeMacros:
+  - __capability
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterExternBlock: false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Leave
+BreakAfterJavaFieldAnnotations: false
+BreakArrays:     true
+BreakBeforeBinaryOperators: None
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: Attach
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: true
+IndentExternBlock: AfterExternBlock
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentRequiresClause: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+InsertBraces:    false
+InsertNewlineAtEOF: false
+InsertTrailingCommas: None
+IntegerLiteralSeparator:
+  Binary:          0
+  BinaryMinDigits: 0
+  Decimal:         0
+  DecimalMinDigits: 0
+  Hex:             0
+  HexMinDigits:    0
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+KeepEmptyLinesAtEOF: false
+LambdaBodyIndentation: Signature
+LineEnding:      DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PackConstructorInitializers: NextLine
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakScopeResolution: 500
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+PPIndentWidth:   -1
+QualifierAlignment: Leave
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+      - ParseTestProto
+      - ParsePartialTestProto
+    CanonicalDelimiter: pb
+    BasedOnStyle:    google
+ReferenceAlignment: Pointer
+ReflowComments:  true
+RemoveBracesLLVM: false
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SkipMacroDefinitionBody: false
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeJsonColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  AfterOverloadedOperator: false
+  AfterPlacementOperator: true
+  AfterRequiresInClause: false
+  AfterRequiresInExpression: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  Never
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParens:  Never
+SpacesInParensOptions:
+  InCStyleCasts:   false
+  InConditionalStatements: false
+  InEmptyParentheses: false
+  Other:           false
+SpacesInSquareBrackets: false
+Standard:        Auto
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseTab:          Never
+VerilogBreakBetweenInstancePorts: true
+WhitespaceSensitiveMacros:
+  - BOOST_PP_STRINGIZE
+  - CF_SWIFT_NAME
+  - NS_SWIFT_NAME
+  - PP_STRINGIZE
+  - STRINGIZE
+...
+
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7eceb16..1f03ee0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,10 +1,10 @@
 repos:
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v17.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
   - repo: https://github.com/cheshirekow/cmake-format-precommit
     rev: v0.6.13
     hooks:
       - id: cmake-format
-      - id: cmake-lint
\ No newline at end of file
+      - id: cmake-lint
diff --git a/include/tcbf.h b/include/tcbf.h
index 790b763..fce00d9 100644
--- a/include/tcbf.h
+++ b/include/tcbf.h
@@ -1,16 +1,16 @@
 #ifndef TCBF_H_
 #define TCBF_H_
 
+#include <limits.h>
+
 #include <ccglib/ccglib.hpp>
 #include <cudawrappers/cu.hpp>
-#include <limits.h>
 
 namespace tcbf {
 
 class Beamformer {
-public:
-  Beamformer(const size_t pixels, const size_t frames, const size_t samples,
-             cu::Device &device, cu::Stream &stream);
+ public:
+  Beamformer(const size_t pixels, const size_t frames, const size_t samples, cu::Device &device, cu::Stream &stream);
 
   // static void prepare_A_matrix(const std::string input_path, const
   // std::string output_path, const size_t pixels, const size_t samples,
@@ -20,7 +20,7 @@ class Beamformer {
   void write_BF(cu::HostMemory &BF, const std::string path);
   void process(cu::HostMemory &RF, cu::HostMemory &BF);
 
-private:
+ private:
   void RF_to_device(cu::HostMemory &RF);
   void BF_to_host(cu::HostMemory &BF);
 
@@ -28,8 +28,7 @@ class Beamformer {
   static const size_t kBatchSize{1};
   static const ccglib::mma::Precision kGEMMPrecision{ccglib::mma::int1};
   static const ccglib::mma::Variant kGEMMVariant{ccglib::mma::opt};
-  const dim3 kGEMMTileSize{
-      ccglib::mma::GEMM::GetDimensions(kGEMMPrecision, kGEMMVariant)};
+  const dim3 kGEMMTileSize{ccglib::mma::GEMM::GetDimensions(kGEMMPrecision, kGEMMVariant)};
 
   std::unique_ptr<cu::DeviceMemory> d_A;
   std::unique_ptr<cu::DeviceMemory> d_RF;
@@ -58,6 +57,6 @@ class Beamformer {
   cu::Stream &stream_;
 };
 
-} // namespace tcbf
+}  // namespace tcbf
 
-#endif // TCBF_H_
\ No newline at end of file
+#endif  // TCBF_H_
\ No newline at end of file
diff --git a/src/main.cu b/src/main.cu
index bd8b1ba..d84bbd7 100644
--- a/src/main.cu
+++ b/src/main.cu
@@ -1,22 +1,18 @@
+#include <tcbf.h>
+
 #include <cxxopts.hpp>
 #include <fstream>
 #include <iostream>
 
-#include <tcbf.h>
-
 cxxopts::Options create_commandline_parser(const char *argv[]) {
   cxxopts::Options options(argv[0], "Echoframe standalone beamformer");
 
-  options.add_options()("a_matrix", "Full path to input prepared A matrix",
-                        cxxopts::value<std::string>())(
-      "rf", "Full path to input RF", cxxopts::value<std::string>())(
-      "bf", "Full path to output BF", cxxopts::value<std::string>())(
-      "pixels", "Number of pixels", cxxopts::value<size_t>())(
-      "frames", "Number of frames", cxxopts::value<size_t>())(
+  options.add_options()("a_matrix", "Full path to input prepared A matrix", cxxopts::value<std::string>())(
+      "rf", "Full path to input RF", cxxopts::value<std::string>())("bf", "Full path to output BF",
+                                                                    cxxopts::value<std::string>())(
+      "pixels", "Number of pixels", cxxopts::value<size_t>())("frames", "Number of frames", cxxopts::value<size_t>())(
       "samples", "Number of samples", cxxopts::value<size_t>())(
-      "device", "GPU device ID",
-      cxxopts::value<unsigned>()->default_value(std::to_string(0)))(
-      "h,help", "Print help");
+      "device", "GPU device ID", cxxopts::value<unsigned>()->default_value(std::to_string(0)))("h,help", "Print help");
 
   return options;
 }
@@ -32,8 +28,7 @@ cxxopts::ParseResult parse_commandline(int argc, const char *argv[]) {
       exit(EXIT_SUCCESS);
     }
 
-    std::vector<std::string> required_options{"a_matrix", "rf",     "bf",
-                                              "pixels",   "frames", "samples"};
+    std::vector<std::string> required_options{"a_matrix", "rf", "bf", "pixels", "frames", "samples"};
     for (auto &opt : required_options) {
       if (!result.count(opt)) {
         std::cerr << "Required argument missing: " << opt << std::endl;
@@ -72,4 +67,4 @@ int main(int argc, const char *argv[]) {
   beamformer.read_RF(RF, path_rf);
   beamformer.process(RF, BF);
   beamformer.write_BF(BF, path_bf);
-}
\ No newline at end of file
+}
diff --git a/src/prepare_a_matrix.cu b/src/prepare_a_matrix.cu
index fa808af..c2e4ecf 100644
--- a/src/prepare_a_matrix.cu
+++ b/src/prepare_a_matrix.cu
@@ -1,12 +1,12 @@
+#include <ccglib/helper.h>
 #include <cuda_runtime.h>
-#include <cxxopts.hpp>
-#include <fstream>
-#include <iostream>
 #include <limits.h>
+#include <tcbf.h>
 
 #include <ccglib/ccglib.hpp>
-#include <ccglib/helper.h>
-#include <tcbf.h>
+#include <cxxopts.hpp>
+#include <fstream>
+#include <iostream>
 
 __global__ void conjugate_1bit(unsigned *data, const size_t n_bytes) {
   size_t tid = threadIdx.x + blockDim.x * static_cast<size_t>(blockIdx.x);
@@ -17,23 +17,15 @@ __global__ void conjugate_1bit(unsigned *data, const size_t n_bytes) {
   data[tid] = ~data[tid];
 }
 
-inline size_t align(size_t a, size_t b) {
-  return b * ccglib::helper::ceildiv(a, b);
-}
+inline size_t align(size_t a, size_t b) { return b * ccglib::helper::ceildiv(a, b); }
 
 cxxopts::Options create_commandline_parser(const char *argv[]) {
-  cxxopts::Options options(argv[0],
-                           "Echoframe beamformer A matrix preparation");
-
-  options.add_options()("a_matrix_in", "Full path to input A matrix",
-                        cxxopts::value<std::string>())(
-      "a_matrix_out", "Full path to output A matrix",
-      cxxopts::value<std::string>())("pixels", "Number of pixels",
-                                     cxxopts::value<size_t>())(
-      "samples", "Number of samples", cxxopts::value<size_t>())(
-      "device", "GPU device ID",
-      cxxopts::value<unsigned>()->default_value(std::to_string(0)))(
-      "h,help", "Print help");
+  cxxopts::Options options(argv[0], "Echoframe beamformer A matrix preparation");
+
+  options.add_options()("a_matrix_in", "Full path to input A matrix", cxxopts::value<std::string>())(
+      "a_matrix_out", "Full path to output A matrix", cxxopts::value<std::string>())(
+      "pixels", "Number of pixels", cxxopts::value<size_t>())("samples", "Number of samples", cxxopts::value<size_t>())(
+      "device", "GPU device ID", cxxopts::value<unsigned>()->default_value(std::to_string(0)))("h,help", "Print help");
 
   return options;
 }
@@ -49,8 +41,7 @@ cxxopts::ParseResult parse_commandline(int argc, const char *argv[]) {
       exit(EXIT_SUCCESS);
     }
 
-    std::vector<std::string> required_options{"a_matrix_in", "a_matrix_out",
-                                              "pixels", "samples"};
+    std::vector<std::string> required_options{"a_matrix_in", "a_matrix_out", "pixels", "samples"};
     for (auto &opt : required_options) {
       if (!result.count(opt)) {
         std::cerr << "Required argument missing: " << opt << std::endl;
@@ -69,8 +60,7 @@ cxxopts::ParseResult parse_commandline(int argc, const char *argv[]) {
 int main(int argc, const char *argv[]) {
   cxxopts::ParseResult cmdline = parse_commandline(argc, argv);
   const std::string path_a_matrix_in = cmdline["a_matrix_in"].as<std::string>();
-  const std::string path_a_matrix_out =
-      cmdline["a_matrix_out"].as<std::string>();
+  const std::string path_a_matrix_out = cmdline["a_matrix_out"].as<std::string>();
   const size_t pixels = cmdline["pixels"].as<size_t>();
   const size_t samples = cmdline["samples"].as<size_t>();
   const unsigned device_id = cmdline["device"].as<unsigned>();
@@ -81,8 +71,7 @@ int main(int argc, const char *argv[]) {
   cu::Stream stream;
 
   // tile size in beams, frames, samples axes
-  dim3 tile_sizes =
-      ccglib::mma::GEMM::GetDimensions(ccglib::mma::int1, ccglib::mma::opt);
+  dim3 tile_sizes = ccglib::mma::GEMM::GetDimensions(ccglib::mma::int1, ccglib::mma::opt);
 
   const size_t pixels_padded = align(pixels, tile_sizes.x);
   const size_t samples_padded = align(samples, tile_sizes.z);
@@ -90,8 +79,7 @@ int main(int argc, const char *argv[]) {
   // factor 2 for complex
   // host is unpadded, device is always padded
   const size_t bytes_a_matrix = 2UL * pixels * samples;
-  const size_t bytes_a_matrix_packed =
-      2UL * pixels_padded * samples_padded / CHAR_BIT;
+  const size_t bytes_a_matrix_packed = 2UL * pixels_padded * samples_padded / CHAR_BIT;
 
   // Read data from disk
   cu::HostMemory a_matrix_host(bytes_a_matrix);
@@ -126,11 +114,9 @@ int main(int argc, const char *argv[]) {
   std::cout << "Packing" << std::endl;
   for (size_t c = 0; c < 2; c++) {
     const size_t complex_offset_host = c * pixels * samples;
-    const size_t complex_offset_device_packed =
-        c * pixels_padded * samples_padded / CHAR_BIT;
+    const size_t complex_offset_device_packed = c * pixels_padded * samples_padded / CHAR_BIT;
     // process chunks
-    for (size_t pixel_start = 0; pixel_start < pixels;
-         pixel_start += pixels_per_chunk) {
+    for (size_t pixel_start = 0; pixel_start < pixels; pixel_start += pixels_per_chunk) {
       size_t local_npixels = pixels_per_chunk;
       // correct npixels in last chunk
       if (pixel_start + local_npixels > pixels) {
@@ -141,27 +127,19 @@ int main(int argc, const char *argv[]) {
       // copy chunk to device, row-by-row to handle padding
       for (size_t pixel = 0; pixel < local_npixels; pixel++) {
         const size_t d_offset = pixel * samples_padded;
-        const size_t offset =
-            (pixel_start + pixel) * samples + complex_offset_host;
+        const size_t offset = (pixel_start + pixel) * samples + complex_offset_host;
         const size_t bytes_to_transfer = samples;
 
-        cu::DeviceMemory d_a_chunk_slice(d_a_chunk, d_offset,
-                                         bytes_to_transfer);
-        stream.memcpyHtoDAsync(d_a_chunk_slice,
-                               static_cast<char *>(a_matrix_host) + offset,
-                               bytes_to_transfer);
+        cu::DeviceMemory d_a_chunk_slice(d_a_chunk, d_offset, bytes_to_transfer);
+        stream.memcpyHtoDAsync(d_a_chunk_slice, static_cast<char *>(a_matrix_host) + offset, bytes_to_transfer);
       }
       // get offset for this chunk in a_packed
-      cu::DeviceMemory d_a_packed_chunk(
-          d_a_matrix_packed,
-          pixel_start * samples_padded / CHAR_BIT +
-              complex_offset_device_packed,
-          local_npixels * samples_padded / CHAR_BIT);
+      cu::DeviceMemory d_a_packed_chunk(d_a_matrix_packed,
+                                        pixel_start * samples_padded / CHAR_BIT + complex_offset_device_packed,
+                                        local_npixels * samples_padded / CHAR_BIT);
       // run packing kernel
-      ccglib::packing::Packing packing(local_npixels * samples_padded, device,
-                                       stream);
-      packing.Run(d_a_chunk, d_a_packed_chunk, ccglib::packing::pack,
-                  ccglib::packing::complex_first);
+      ccglib::packing::Packing packing(local_npixels * samples_padded, device, stream);
+      packing.Run(d_a_chunk, d_a_packed_chunk, ccglib::packing::pack, ccglib::packing::complex_first);
     }
   }
 
@@ -169,33 +147,27 @@ int main(int argc, const char *argv[]) {
   std::cout << "Conjugate" << std::endl;
   dim3 threads(256);
   dim3 grid(ccglib::helper::ceildiv(bytes_a_matrix_packed / 2, threads.x));
-  cu::DeviceMemory d_a_matrix_packed_imag(
-      d_a_matrix_packed, bytes_a_matrix_packed / 2, bytes_a_matrix_packed / 2);
+  cu::DeviceMemory d_a_matrix_packed_imag(d_a_matrix_packed, bytes_a_matrix_packed / 2, bytes_a_matrix_packed / 2);
   conjugate_1bit<<<grid, threads, 0, stream>>>(
-      reinterpret_cast<unsigned *>(
-          static_cast<CUdeviceptr>(d_a_matrix_packed_imag)),
-      bytes_a_matrix_packed / 2);
+      reinterpret_cast<unsigned *>(static_cast<CUdeviceptr>(d_a_matrix_packed_imag)), bytes_a_matrix_packed / 2);
 
   // transpose
   std::cout << "Transpose" << std::endl;
-  ccglib::transpose::Transpose transpose(1, pixels_padded, samples_padded,
-                                         tile_sizes.x, tile_sizes.z, 1, device,
+  ccglib::transpose::Transpose transpose(1, pixels_padded, samples_padded, tile_sizes.x, tile_sizes.z, 1, device,
                                          stream);
   transpose.Run(d_a_matrix_packed, d_a_transposed);
 
   // copy output to host
   std::cout << "Copy to host" << std::endl;
   cu::HostMemory a_matrix_output(bytes_a_matrix_packed);
-  stream.memcpyDtoHAsync(a_matrix_output, d_a_transposed,
-                         bytes_a_matrix_packed);
+  stream.memcpyDtoHAsync(a_matrix_output, d_a_transposed, bytes_a_matrix_packed);
   stream.synchronize();
 
   // write to disk
   std::cout << "Write to disk" << std::endl;
   std::ofstream out(path_a_matrix_out, std::ios::binary | std::ios::out);
   if (!out) {
-    throw std::runtime_error("Failed to open output file: " +
-                             path_a_matrix_out);
+    throw std::runtime_error("Failed to open output file: " + path_a_matrix_out);
   }
   out.write(static_cast<char *>(a_matrix_output), bytes_a_matrix_packed);
-}
\ No newline at end of file
+}
diff --git a/src/tcbf.cu b/src/tcbf.cu
index 59bf522..41104b3 100644
--- a/src/tcbf.cu
+++ b/src/tcbf.cu
@@ -1,26 +1,22 @@
+#include <ccglib/helper.h>
+
 #include <fstream>
 #include <iostream>
 
 #include "tcbf.h"
-#include <ccglib/helper.h>
 
 #ifndef COMPLEX
 #define COMPLEX 2
 #endif
 
-static inline size_t align(size_t a, size_t b) {
-  return b * ccglib::helper::ceildiv(a, b);
-}
+static inline size_t align(size_t a, size_t b) { return b * ccglib::helper::ceildiv(a, b); }
 
 namespace tcbf {
-Beamformer::Beamformer(const size_t pixels, const size_t frames,
-                       const size_t samples, cu::Device &device,
+Beamformer::Beamformer(const size_t pixels, const size_t frames, const size_t samples, cu::Device &device,
                        cu::Stream &stream)
-    : pixels_(pixels), frames_(frames), samples_(samples), device_(device),
-      stream_(stream) {
+    : pixels_(pixels), frames_(frames), samples_(samples), device_(device), stream_(stream) {
   // padded sizes
-  dim3 tile_sizes =
-      ccglib::mma::GEMM::GetDimensions(kGEMMPrecision, kGEMMVariant);
+  dim3 tile_sizes = ccglib::mma::GEMM::GetDimensions(kGEMMPrecision, kGEMMVariant);
   pixels_padded_ = align(pixels, tile_sizes.x);
   frames_padded_ = align(frames, tile_sizes.y);
   samples_padded_ = align(samples, tile_sizes.z);
@@ -36,15 +32,12 @@ Beamformer::Beamformer(const size_t pixels, const size_t frames,
   d_RF_transposed = std::make_unique<cu::DeviceMemory>(bytesRFPacked_);
   d_BF = std::make_unique<cu::DeviceMemory>(bytesBF_);
   // create objects to run kernels
-  pack_rf_ = std::make_unique<ccglib::packing::Packing>(
-      COMPLEX * frames_padded_ * samples_padded_, device_, stream_);
+  pack_rf_ = std::make_unique<ccglib::packing::Packing>(COMPLEX * frames_padded_ * samples_padded_, device_, stream_);
   transpose_rf_ = std::make_unique<ccglib::transpose::Transpose>(
-      kBatchSize, frames_padded_, samples_padded_, kGEMMTileSize.y,
-      kGEMMTileSize.z, kBitsPerSample, device_, stream_);
-  gemm_ = std::make_unique<ccglib::mma::GEMM>(
-      kBatchSize, pixels_padded_, frames_padded_, samples_padded_,
-      kBitsPerSample, device_, stream_, kGEMMPrecision, kGEMMVariant,
-      ccglib::mma::col_major);
+      kBatchSize, frames_padded_, samples_padded_, kGEMMTileSize.y, kGEMMTileSize.z, kBitsPerSample, device_, stream_);
+  gemm_ =
+      std::make_unique<ccglib::mma::GEMM>(kBatchSize, pixels_padded_, frames_padded_, samples_padded_, kBitsPerSample,
+                                          device_, stream_, kGEMMPrecision, kGEMMVariant, ccglib::mma::col_major);
 }
 
 void Beamformer::read_A_matrix(const std::string path) {
@@ -72,8 +65,7 @@ void Beamformer::write_BF(cu::HostMemory &BF, const std::string path) {
   if (!out) {
     throw std::runtime_error("Failed to open output file: " + path);
   }
-  out.write(static_cast<char *>(BF),
-            COMPLEX * frames_ * pixels_ * sizeof(unsigned));
+  out.write(static_cast<char *>(BF), COMPLEX * frames_ * pixels_ * sizeof(unsigned));
 }
 
 void Beamformer::RF_to_device(cu::HostMemory &RF) {
@@ -86,8 +78,7 @@ void Beamformer::RF_to_device(cu::HostMemory &RF) {
     const size_t offset = f * samples_ * 2;
     const size_t bytes_to_transfer = samples_ * 2;
     cu::DeviceMemory d_RF_chunk(*d_RF, d_offset, bytes_to_transfer);
-    stream_.memcpyHtoDAsync(d_RF_chunk, static_cast<char *>(RF) + offset,
-                            bytes_to_transfer);
+    stream_.memcpyHtoDAsync(d_RF_chunk, static_cast<char *>(RF) + offset, bytes_to_transfer);
   }
 }
 
@@ -97,15 +88,11 @@ void Beamformer::BF_to_host(cu::HostMemory &BF) {
   for (size_t c = 0; c < COMPLEX; c++) {
     for (size_t f = 0; f < frames_; f++) {
       // get objects pointing to start of chunk to transfer
-      const size_t d_offset =
-          (c * frames_padded_ * pixels_padded_ + f * pixels_padded_) *
-          sizeof(unsigned);
-      const size_t offset =
-          (c * frames_ * pixels_ + f * pixels_) * sizeof(unsigned);
+      const size_t d_offset = (c * frames_padded_ * pixels_padded_ + f * pixels_padded_) * sizeof(unsigned);
+      const size_t offset = (c * frames_ * pixels_ + f * pixels_) * sizeof(unsigned);
       const size_t bytes_to_transfer = pixels_ * sizeof(unsigned);
       cu::DeviceMemory d_BF_chunk(*d_BF, d_offset, bytes_to_transfer);
-      stream_.memcpyDtoHAsync(static_cast<char *>(BF) + offset, d_BF_chunk,
-                              bytes_to_transfer);
+      stream_.memcpyDtoHAsync(static_cast<char *>(BF) + offset, d_BF_chunk, bytes_to_transfer);
     }
   }
 }
@@ -114,8 +101,7 @@ void Beamformer::process(cu::HostMemory &RF, cu::HostMemory &BF) {
   // transfer RF to GPU
   RF_to_device(RF);
   // pack bits
-  pack_rf_->Run(*d_RF, *d_RF_packed, ccglib::packing::pack,
-                ccglib::packing::complex_last);
+  pack_rf_->Run(*d_RF, *d_RF_packed, ccglib::packing::pack, ccglib::packing::complex_last);
   // transpose to format required by GEMM
   transpose_rf_->Run(*d_RF_packed, *d_RF_transposed);
   // do GEMM
@@ -125,4 +111,4 @@ void Beamformer::process(cu::HostMemory &RF, cu::HostMemory &BF) {
   stream_.synchronize();
 }
 
-} // namespace tcbf
+}  // namespace tcbf