From 181df0c36f497271bff52b71f4ac9377ceaaecfd Mon Sep 17 00:00:00 2001
From: Tom Hvitved <hvitved@github.com>
Date: Tue, 7 May 2024 09:26:47 +0200
Subject: [PATCH] Python: Also use tree-sitter 0.22.6

---
 python/extractor/tsg-python/Cargo.Bazel.lock  | 300 +++++++++++++-----
 python/extractor/tsg-python/Cargo.lock        |  48 ++-
 python/extractor/tsg-python/Cargo.toml        |   4 +-
 python/extractor/tsg-python/Makefile          | 112 +++++++
 python/extractor/tsg-python/Package.swift     |  47 +++
 python/extractor/tsg-python/binding.gyp       |  30 ++
 .../bindings/c/tree-sitter-tsg_python.h       |  16 +
 .../bindings/c/tree-sitter-tsg_python.pc.in   |  11 +
 .../tsg-python/bindings/go/binding.go         |  13 +
 .../tsg-python/bindings/go/binding_test.go    |  15 +
 .../extractor/tsg-python/bindings/go/go.mod   |   5 +
 .../tsg-python/bindings/node/binding.cc       |  20 ++
 .../tsg-python/bindings/node/index.d.ts       |  28 ++
 .../tsg-python/bindings/node/index.js         |   7 +
 .../python/tree_sitter_tsg_python/__init__.py |   5 +
 .../tree_sitter_tsg_python/__init__.pyi       |   1 +
 .../python/tree_sitter_tsg_python/binding.c   |  27 ++
 .../python/tree_sitter_tsg_python/py.typed    |   0
 .../tsg-python/bindings/rust/build.rs         |  22 ++
 .../extractor/tsg-python/bindings/rust/lib.rs |  54 ++++
 .../swift/TreeSitterTsgPython/tsg_python.h    |  16 +
 python/extractor/tsg-python/grammar.js        |  11 +
 python/extractor/tsg-python/package-lock.json |   6 +
 python/extractor/tsg-python/package.json      |  53 ++++
 python/extractor/tsg-python/pyproject.toml    |  29 ++
 .../extractor/tsg-python/rust-toolchain.toml  |   2 +-
 python/extractor/tsg-python/setup.py          |  60 ++++
 python/extractor/tsg-python/src/grammar.json  |  20 ++
 .../extractor/tsg-python/src/node-types.json  |  11 +
 python/extractor/tsg-python/src/parser.c      | 178 +++++++++++
 .../tsg-python/src/tree_sitter/alloc.h        |  54 ++++
 .../tsg-python/src/tree_sitter/array.h        | 290 +++++++++++++++++
 .../tsg-python/src/tree_sitter/parser.h       | 265 ++++++++++++++++
 python/extractor/tsg-python/tsp/Cargo.toml    |   2 +-
 ruby/extractor/Cargo.lock                     |   2 +-
 ruby/extractor/cargo-bazel-lock.json          |   4 +-
 36 files changed, 1669 insertions(+), 99 deletions(-)
 create mode 100644 python/extractor/tsg-python/Makefile
 create mode 100644 python/extractor/tsg-python/Package.swift
 create mode 100644 python/extractor/tsg-python/binding.gyp
 create mode 100644 python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.h
 create mode 100644 python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.pc.in
 create mode 100644 python/extractor/tsg-python/bindings/go/binding.go
 create mode 100644 python/extractor/tsg-python/bindings/go/binding_test.go
 create mode 100644 python/extractor/tsg-python/bindings/go/go.mod
 create mode 100644 python/extractor/tsg-python/bindings/node/binding.cc
 create mode 100644 python/extractor/tsg-python/bindings/node/index.d.ts
 create mode 100644 python/extractor/tsg-python/bindings/node/index.js
 create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py
 create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi
 create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c
 create mode 100644 python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed
 create mode 100644 python/extractor/tsg-python/bindings/rust/build.rs
 create mode 100644 python/extractor/tsg-python/bindings/rust/lib.rs
 create mode 100644 python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h
 create mode 100644 python/extractor/tsg-python/grammar.js
 create mode 100644 python/extractor/tsg-python/package-lock.json
 create mode 100644 python/extractor/tsg-python/package.json
 create mode 100644 python/extractor/tsg-python/pyproject.toml
 create mode 100644 python/extractor/tsg-python/setup.py
 create mode 100644 python/extractor/tsg-python/src/grammar.json
 create mode 100644 python/extractor/tsg-python/src/node-types.json
 create mode 100644 python/extractor/tsg-python/src/parser.c
 create mode 100644 python/extractor/tsg-python/src/tree_sitter/alloc.h
 create mode 100644 python/extractor/tsg-python/src/tree_sitter/array.h
 create mode 100644 python/extractor/tsg-python/src/tree_sitter/parser.h

diff --git a/python/extractor/tsg-python/Cargo.Bazel.lock b/python/extractor/tsg-python/Cargo.Bazel.lock
index e3b5a249e4fc9..d62b9b45a19a8 100644
--- a/python/extractor/tsg-python/Cargo.Bazel.lock
+++ b/python/extractor/tsg-python/Cargo.Bazel.lock
@@ -1,5 +1,5 @@
 {
-  "checksum": "35a1ce4b6c4f997c496c11d3a8fcfaadc5833dfd41bebb022941687d73dde159",
+  "checksum": "5cc291d6260540e9e0364edc5927f547083e68247e5a694266279544ea1cf31c",
   "crates": {
     "ahash 0.4.7": {
       "name": "ahash",
@@ -37,14 +37,14 @@
       ],
       "license_file": null
     },
-    "aho-corasick 0.7.18": {
+    "aho-corasick 1.1.3": {
       "name": "aho-corasick",
-      "version": "0.7.18",
+      "version": "1.1.3",
       "package_url": "https://github.com/BurntSushi/aho-corasick",
       "repository": {
         "Http": {
-          "url": "https://static.crates.io/crates/aho-corasick/0.7.18/download",
-          "sha256": "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+          "url": "https://static.crates.io/crates/aho-corasick/1.1.3/download",
+          "sha256": "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
         }
       },
       "targets": [
@@ -65,7 +65,7 @@
         ],
         "crate_features": {
           "common": [
-            "default",
+            "perf-literal",
             "std"
           ],
           "selects": {}
@@ -73,16 +73,16 @@
         "deps": {
           "common": [
             {
-              "id": "memchr 2.4.1",
+              "id": "memchr 2.7.2",
               "target": "memchr"
             }
           ],
           "selects": {}
         },
-        "edition": "2018",
-        "version": "0.7.18"
+        "edition": "2021",
+        "version": "1.1.3"
       },
-      "license": "Unlicense/MIT",
+      "license": "Unlicense OR MIT",
       "license_ids": [
         "MIT",
         "Unlicense"
@@ -301,14 +301,14 @@
       ],
       "license_file": null
     },
-    "cc 1.0.70": {
+    "cc 1.0.97": {
       "name": "cc",
-      "version": "1.0.70",
-      "package_url": "https://github.com/alexcrichton/cc-rs",
+      "version": "1.0.97",
+      "package_url": "https://github.com/rust-lang/cc-rs",
       "repository": {
         "Http": {
-          "url": "https://static.crates.io/crates/cc/1.0.70/download",
-          "sha256": "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0"
+          "url": "https://static.crates.io/crates/cc/1.0.97/download",
+          "sha256": "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4"
         }
       },
       "targets": [
@@ -328,9 +328,9 @@
           "**"
         ],
         "edition": "2018",
-        "version": "1.0.70"
+        "version": "1.0.97"
       },
-      "license": "MIT/Apache-2.0",
+      "license": "MIT OR Apache-2.0",
       "license_ids": [
         "Apache-2.0",
         "MIT"
@@ -711,14 +711,14 @@
       ],
       "license_file": null
     },
-    "memchr 2.4.1": {
+    "memchr 2.7.2": {
       "name": "memchr",
-      "version": "2.4.1",
+      "version": "2.7.2",
       "package_url": "https://github.com/BurntSushi/memchr",
       "repository": {
         "Http": {
-          "url": "https://static.crates.io/crates/memchr/2.4.1/download",
-          "sha256": "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+          "url": "https://static.crates.io/crates/memchr/2.7.2/download",
+          "sha256": "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
         }
       },
       "targets": [
@@ -730,15 +730,6 @@
               "**/*.rs"
             ]
           }
-        },
-        {
-          "BuildScript": {
-            "crate_name": "build_script_build",
-            "crate_root": "build.rs",
-            "srcs": [
-              "**/*.rs"
-            ]
-          }
         }
       ],
       "library_target_name": "memchr",
@@ -748,29 +739,15 @@
         ],
         "crate_features": {
           "common": [
-            "default",
+            "alloc",
             "std"
           ],
           "selects": {}
         },
-        "deps": {
-          "common": [
-            {
-              "id": "memchr 2.4.1",
-              "target": "build_script_build"
-            }
-          ],
-          "selects": {}
-        },
-        "edition": "2018",
-        "version": "2.4.1"
-      },
-      "build_script_attrs": {
-        "data_glob": [
-          "**"
-        ]
+        "edition": "2021",
+        "version": "2.7.2"
       },
-      "license": "Unlicense/MIT",
+      "license": "Unlicense OR MIT",
       "license_ids": [
         "MIT",
         "Unlicense"
@@ -899,14 +876,14 @@
       ],
       "license_file": null
     },
-    "regex 1.5.5": {
+    "regex 1.10.4": {
       "name": "regex",
-      "version": "1.5.5",
+      "version": "1.10.4",
       "package_url": "https://github.com/rust-lang/regex",
       "repository": {
         "Http": {
-          "url": "https://static.crates.io/crates/regex/1.5.5/download",
-          "sha256": "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+          "url": "https://static.crates.io/crates/regex/1.10.4/download",
+          "sha256": "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
         }
       },
       "targets": [
@@ -927,14 +904,14 @@
         ],
         "crate_features": {
           "common": [
-            "aho-corasick",
             "default",
-            "memchr",
             "perf",
+            "perf-backtrack",
             "perf-cache",
             "perf-dfa",
             "perf-inline",
             "perf-literal",
+            "perf-onepass",
             "std",
             "unicode",
             "unicode-age",
@@ -950,22 +927,106 @@
         "deps": {
           "common": [
             {
-              "id": "aho-corasick 0.7.18",
+              "id": "aho-corasick 1.1.3",
               "target": "aho_corasick"
             },
             {
-              "id": "memchr 2.4.1",
+              "id": "memchr 2.7.2",
               "target": "memchr"
             },
             {
-              "id": "regex-syntax 0.6.25",
+              "id": "regex-automata 0.4.6",
+              "target": "regex_automata"
+            },
+            {
+              "id": "regex-syntax 0.8.3",
               "target": "regex_syntax"
             }
           ],
           "selects": {}
         },
-        "edition": "2018",
-        "version": "1.5.5"
+        "edition": "2021",
+        "version": "1.10.4"
+      },
+      "license": "MIT OR Apache-2.0",
+      "license_ids": [
+        "Apache-2.0",
+        "MIT"
+      ],
+      "license_file": null
+    },
+    "regex-automata 0.4.6": {
+      "name": "regex-automata",
+      "version": "0.4.6",
+      "package_url": "https://github.com/rust-lang/regex/tree/master/regex-automata",
+      "repository": {
+        "Http": {
+          "url": "https://static.crates.io/crates/regex-automata/0.4.6/download",
+          "sha256": "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+        }
+      },
+      "targets": [
+        {
+          "Library": {
+            "crate_name": "regex_automata",
+            "crate_root": "src/lib.rs",
+            "srcs": [
+              "**/*.rs"
+            ]
+          }
+        }
+      ],
+      "library_target_name": "regex_automata",
+      "common_attrs": {
+        "compile_data_glob": [
+          "**"
+        ],
+        "crate_features": {
+          "common": [
+            "alloc",
+            "dfa-onepass",
+            "hybrid",
+            "meta",
+            "nfa-backtrack",
+            "nfa-pikevm",
+            "nfa-thompson",
+            "perf-inline",
+            "perf-literal",
+            "perf-literal-multisubstring",
+            "perf-literal-substring",
+            "std",
+            "syntax",
+            "unicode",
+            "unicode-age",
+            "unicode-bool",
+            "unicode-case",
+            "unicode-gencat",
+            "unicode-perl",
+            "unicode-script",
+            "unicode-segment",
+            "unicode-word-boundary"
+          ],
+          "selects": {}
+        },
+        "deps": {
+          "common": [
+            {
+              "id": "aho-corasick 1.1.3",
+              "target": "aho_corasick"
+            },
+            {
+              "id": "memchr 2.7.2",
+              "target": "memchr"
+            },
+            {
+              "id": "regex-syntax 0.8.3",
+              "target": "regex_syntax"
+            }
+          ],
+          "selects": {}
+        },
+        "edition": "2021",
+        "version": "0.4.6"
       },
       "license": "MIT OR Apache-2.0",
       "license_ids": [
@@ -974,14 +1035,14 @@
       ],
       "license_file": null
     },
-    "regex-syntax 0.6.25": {
+    "regex-syntax 0.8.3": {
       "name": "regex-syntax",
-      "version": "0.6.25",
-      "package_url": "https://github.com/rust-lang/regex",
+      "version": "0.8.3",
+      "package_url": "https://github.com/rust-lang/regex/tree/master/regex-syntax",
       "repository": {
         "Http": {
-          "url": "https://static.crates.io/crates/regex-syntax/0.6.25/download",
-          "sha256": "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+          "url": "https://static.crates.io/crates/regex-syntax/0.8.3/download",
+          "sha256": "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
         }
       },
       "targets": [
@@ -1003,6 +1064,7 @@
         "crate_features": {
           "common": [
             "default",
+            "std",
             "unicode",
             "unicode-age",
             "unicode-bool",
@@ -1014,10 +1076,10 @@
           ],
           "selects": {}
         },
-        "edition": "2018",
-        "version": "0.6.25"
+        "edition": "2021",
+        "version": "0.8.3"
       },
-      "license": "MIT/Apache-2.0",
+      "license": "MIT OR Apache-2.0",
       "license_ids": [
         "Apache-2.0",
         "MIT"
@@ -1601,7 +1663,7 @@
         "deps": {
           "common": [
             {
-              "id": "regex 1.5.5",
+              "id": "regex 1.10.4",
               "target": "regex"
             },
             {
@@ -1621,7 +1683,7 @@
         "deps": {
           "common": [
             {
-              "id": "cc 1.0.70",
+              "id": "cc 1.0.97",
               "target": "cc"
             }
           ],
@@ -1634,6 +1696,78 @@
       ],
       "license_file": null
     },
+    "tree-sitter 0.22.6": {
+      "name": "tree-sitter",
+      "version": "0.22.6",
+      "package_url": "https://github.com/tree-sitter/tree-sitter",
+      "repository": {
+        "Http": {
+          "url": "https://static.crates.io/crates/tree-sitter/0.22.6/download",
+          "sha256": "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
+        }
+      },
+      "targets": [
+        {
+          "Library": {
+            "crate_name": "tree_sitter",
+            "crate_root": "binding_rust/lib.rs",
+            "srcs": [
+              "**/*.rs"
+            ]
+          }
+        },
+        {
+          "BuildScript": {
+            "crate_name": "build_script_build",
+            "crate_root": "binding_rust/build.rs",
+            "srcs": [
+              "**/*.rs"
+            ]
+          }
+        }
+      ],
+      "library_target_name": "tree_sitter",
+      "common_attrs": {
+        "compile_data_glob": [
+          "**"
+        ],
+        "deps": {
+          "common": [
+            {
+              "id": "regex 1.10.4",
+              "target": "regex"
+            },
+            {
+              "id": "tree-sitter 0.22.6",
+              "target": "build_script_build"
+            }
+          ],
+          "selects": {}
+        },
+        "edition": "2021",
+        "version": "0.22.6"
+      },
+      "build_script_attrs": {
+        "data_glob": [
+          "**"
+        ],
+        "deps": {
+          "common": [
+            {
+              "id": "cc 1.0.97",
+              "target": "cc"
+            }
+          ],
+          "selects": {}
+        },
+        "links": "tree-sitter"
+      },
+      "license": "MIT",
+      "license_ids": [
+        "MIT"
+      ],
+      "license_file": null
+    },
     "tree-sitter-graph 0.7.0": {
       "name": "tree-sitter-graph",
       "version": "0.7.0",
@@ -1667,7 +1801,7 @@
               "target": "log"
             },
             {
-              "id": "regex 1.5.5",
+              "id": "regex 1.10.4",
               "target": "regex"
             },
             {
@@ -1729,7 +1863,7 @@
               "target": "clap"
             },
             {
-              "id": "regex 1.5.5",
+              "id": "regex 1.10.4",
               "target": "regex"
             },
             {
@@ -1745,7 +1879,7 @@
               "target": "thiserror"
             },
             {
-              "id": "tree-sitter 0.20.4",
+              "id": "tree-sitter 0.22.6",
               "target": "tree_sitter"
             },
             {
@@ -1755,7 +1889,7 @@
           ],
           "selects": {}
         },
-        "edition": "2018",
+        "edition": "2021",
         "version": "0.1.0"
       },
       "license": null,
@@ -1795,7 +1929,7 @@
         "deps": {
           "common": [
             {
-              "id": "tree-sitter 0.20.4",
+              "id": "tree-sitter 0.22.6",
               "target": "tree_sitter"
             },
             {
@@ -1815,11 +1949,20 @@
         "deps": {
           "common": [
             {
-              "id": "cc 1.0.70",
+              "id": "cc 1.0.97",
               "target": "cc"
             }
           ],
           "selects": {}
+        },
+        "link_deps": {
+          "common": [
+            {
+              "id": "tree-sitter 0.22.6",
+              "target": "tree_sitter"
+            }
+          ],
+          "selects": {}
         }
       },
       "license": "MIT",
@@ -1986,7 +2129,6 @@
         "crate_features": {
           "common": [
             "consoleapi",
-            "errhandlingapi",
             "minwinbase",
             "minwindef",
             "processenv",
@@ -2333,13 +2475,13 @@
   },
   "direct_deps": [
     "anyhow 1.0.44",
-    "cc 1.0.70",
+    "cc 1.0.97",
     "clap 2.33.3",
-    "regex 1.5.5",
+    "regex 1.10.4",
     "smallvec 1.6.1",
     "string-interner 0.12.2",
     "thiserror 1.0.29",
-    "tree-sitter 0.20.4",
+    "tree-sitter 0.22.6",
     "tree-sitter-graph 0.7.0"
   ],
   "direct_dev_deps": []
diff --git a/python/extractor/tsg-python/Cargo.lock b/python/extractor/tsg-python/Cargo.lock
index c179dd3036f91..e2841f3875cbe 100644
--- a/python/extractor/tsg-python/Cargo.lock
+++ b/python/extractor/tsg-python/Cargo.lock
@@ -10,9 +10,9 @@ checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
 
 [[package]]
 name = "aho-corasick"
-version = "0.7.18"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 dependencies = [
  "memchr",
 ]
@@ -51,9 +51,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "cc"
-version = "1.0.70"
+version = "1.0.97"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0"
+checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4"
 
 [[package]]
 name = "cfg-if"
@@ -117,9 +117,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.4.1"
+version = "2.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
 
 [[package]]
 name = "proc-macro2"
@@ -141,9 +141,21 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.5.5"
+version = "1.10.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -152,9 +164,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.6.25"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
 
 [[package]]
 name = "ryu"
@@ -251,6 +263,16 @@ dependencies = [
  "regex",
 ]
 
+[[package]]
+name = "tree-sitter"
+version = "0.22.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
+dependencies = [
+ "cc",
+ "regex",
+]
+
 [[package]]
 name = "tree-sitter-graph"
 version = "0.7.0"
@@ -264,7 +286,7 @@ dependencies = [
  "smallvec",
  "string-interner",
  "thiserror",
- "tree-sitter",
+ "tree-sitter 0.20.4",
 ]
 
 [[package]]
@@ -277,7 +299,7 @@ dependencies = [
  "smallvec",
  "string-interner",
  "thiserror",
- "tree-sitter",
+ "tree-sitter 0.22.6",
  "tree-sitter-graph",
  "tsp",
 ]
@@ -287,7 +309,7 @@ name = "tsp"
 version = "0.19.0"
 dependencies = [
  "cc",
- "tree-sitter",
+ "tree-sitter 0.22.6",
 ]
 
 [[package]]
diff --git a/python/extractor/tsg-python/Cargo.toml b/python/extractor/tsg-python/Cargo.toml
index 1266f94f2b204..919235d115d82 100644
--- a/python/extractor/tsg-python/Cargo.toml
+++ b/python/extractor/tsg-python/Cargo.toml
@@ -4,7 +4,7 @@
 name = "tsg-python"
 version = "0.1.0"
 authors = ["Taus Brock-Nannestad <tausbn@github.com>"]
-edition = "2018"
+edition = "2021"
 
 # When changing/updating these, the `Cargo.Bazel.lock` file has to be regenerated.
 # Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./tools/bazel sync --only=py_deps`
@@ -20,7 +20,7 @@ anyhow = "1.0"
 regex = "1"
 smallvec = { version="1.6", features=["union"] }
 thiserror = "1.0"
-tree-sitter = "0.20.4"
+tree-sitter = ">= 0.22.6"
 tree-sitter-graph = "0.7.0"
 tsp = {path = "tsp"}
 clap = "2.32"
diff --git a/python/extractor/tsg-python/Makefile b/python/extractor/tsg-python/Makefile
new file mode 100644
index 0000000000000..1876dea8e6017
--- /dev/null
+++ b/python/extractor/tsg-python/Makefile
@@ -0,0 +1,112 @@
+VERSION := 0.0.1
+
+LANGUAGE_NAME := tree-sitter-tsg_python
+
+# repository
+SRC_DIR := src
+
+PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null)
+
+ifeq ($(PARSER_URL),)
+	PARSER_URL := $(subst .git,,$(PARSER_REPO_URL))
+ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),)
+	PARSER_URL := $(subst :,/,$(PARSER_URL))
+	PARSER_URL := $(subst git@,https://,$(PARSER_URL))
+endif
+endif
+
+TS ?= tree-sitter
+
+# ABI versioning
+SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION)))
+SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION)))
+
+# install directory layout
+PREFIX ?= /usr/local
+INCLUDEDIR ?= $(PREFIX)/include
+LIBDIR ?= $(PREFIX)/lib
+PCLIBDIR ?= $(LIBDIR)/pkgconfig
+
+# source/object files
+PARSER := $(SRC_DIR)/parser.c
+EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
+OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
+
+# flags
+ARFLAGS ?= rcs
+override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
+
+# OS-specific bits
+ifeq ($(OS),Windows_NT)
+	$(error "Windows is not supported")
+else ifeq ($(shell uname),Darwin)
+	SOEXT = dylib
+	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
+	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
+	LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
+	ifneq ($(ADDITIONAL_LIBS),)
+	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS),
+	endif
+	LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
+else
+	SOEXT = so
+	SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
+	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
+	LINKSHARED := $(LINKSHARED)-shared -Wl,
+	ifneq ($(ADDITIONAL_LIBS),)
+	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS)
+	endif
+	LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR)
+endif
+ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
+	PCLIBDIR := $(PREFIX)/libdata/pkgconfig
+endif
+
+all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc
+
+lib$(LANGUAGE_NAME).a: $(OBJS)
+	$(AR) $(ARFLAGS) $@ $^
+
+lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
+	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
+ifneq ($(STRIP),)
+	$(STRIP) $@
+endif
+
+$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
+	sed  -e 's|@URL@|$(PARSER_URL)|' \
+		-e 's|@VERSION@|$(VERSION)|' \
+		-e 's|@LIBDIR@|$(LIBDIR)|' \
+		-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+		-e 's|@REQUIRES@|$(REQUIRES)|' \
+		-e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \
+		-e 's|=$(PREFIX)|=$${prefix}|' \
+		-e 's|@PREFIX@|$(PREFIX)|' $< > $@
+
+$(PARSER): $(SRC_DIR)/grammar.json
+	$(TS) generate --no-bindings $^
+
+install: all
+	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
+	install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
+	install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
+	install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
+	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
+	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
+	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)
+
+uninstall:
+	$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
+		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
+		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
+		'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
+
+clean:
+	$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT)
+
+test:
+	$(TS) test
+
+.PHONY: all install uninstall clean test
diff --git a/python/extractor/tsg-python/Package.swift b/python/extractor/tsg-python/Package.swift
new file mode 100644
index 0000000000000..ca68797873fd4
--- /dev/null
+++ b/python/extractor/tsg-python/Package.swift
@@ -0,0 +1,47 @@
+// swift-tools-version:5.3
+import PackageDescription
+
+let package = Package(
+    name: "TreeSitterTsgPython",
+    products: [
+        .library(name: "TreeSitterTsgPython", targets: ["TreeSitterTsgPython"]),
+    ],
+    dependencies: [],
+    targets: [
+        .target(name: "TreeSitterTsgPython",
+                path: ".",
+                exclude: [
+                    "Cargo.toml",
+                    "Makefile",
+                    "binding.gyp",
+                    "bindings/c",
+                    "bindings/go",
+                    "bindings/node",
+                    "bindings/python",
+                    "bindings/rust",
+                    "prebuilds",
+                    "grammar.js",
+                    "package.json",
+                    "package-lock.json",
+                    "pyproject.toml",
+                    "setup.py",
+                    "test",
+                    "examples",
+                    ".editorconfig",
+                    ".github",
+                    ".gitignore",
+                    ".gitattributes",
+                    ".gitmodules",
+                ],
+                sources: [
+                    "src/parser.c",
+                    // NOTE: if your language has an external scanner, add it here.
+                ],
+                resources: [
+                    .copy("queries")
+                ],
+                publicHeadersPath: "bindings/swift",
+                cSettings: [.headerSearchPath("src")])
+    ],
+    cLanguageStandard: .c11
+)
diff --git a/python/extractor/tsg-python/binding.gyp b/python/extractor/tsg-python/binding.gyp
new file mode 100644
index 0000000000000..f6d0120ec5d48
--- /dev/null
+++ b/python/extractor/tsg-python/binding.gyp
@@ -0,0 +1,30 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_tsg_python_binding",
+      "dependencies": [
+        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
+      ],
+      "include_dirs": [
+        "src",
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+        # NOTE: if your language has an external scanner, add it here.
+      ],
+      "conditions": [
+        ["OS!='win'", {
+          "cflags_c": [
+            "-std=c11",
+          ],
+        }, { # OS == "win"
+          "cflags_c": [
+            "/std:c11",
+            "/utf-8",
+          ],
+        }],
+      ],
+    }
+  ]
+}
diff --git a/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.h b/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.h
new file mode 100644
index 0000000000000..74dc05bd3b310
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.h
@@ -0,0 +1,16 @@
+#ifndef TREE_SITTER_TSG_PYTHON_H_
+#define TREE_SITTER_TSG_PYTHON_H_
+
+typedef struct TSLanguage TSLanguage;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const TSLanguage *tree_sitter_tsg_python(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_TSG_PYTHON_H_
diff --git a/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.pc.in b/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.pc.in
new file mode 100644
index 0000000000000..a865541b8b4f8
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/c/tree-sitter-tsg_python.pc.in
@@ -0,0 +1,11 @@
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: tree-sitter-tsg_python
+Description: TsgPython grammar for tree-sitter
+URL: @URL@
+Version: @VERSION@
+Requires: @REQUIRES@
+Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-tsg_python
+Cflags: -I${includedir}
diff --git a/python/extractor/tsg-python/bindings/go/binding.go b/python/extractor/tsg-python/bindings/go/binding.go
new file mode 100644
index 0000000000000..c13047f3da29f
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/go/binding.go
@@ -0,0 +1,13 @@
+package tree_sitter_tsg_python
+
+// #cgo CFLAGS: -std=c11 -fPIC
+// #include "../../src/parser.c"
+// // NOTE: if your language has an external scanner, add it here.
+import "C"
+
+import "unsafe"
+
+// Get the tree-sitter Language for this grammar.
+func Language() unsafe.Pointer {
+	return unsafe.Pointer(C.tree_sitter_tsg_python())
+}
diff --git a/python/extractor/tsg-python/bindings/go/binding_test.go b/python/extractor/tsg-python/bindings/go/binding_test.go
new file mode 100644
index 0000000000000..04861c942fcbf
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/go/binding_test.go
@@ -0,0 +1,15 @@
+package tree_sitter_tsg_python_test
+
+import (
+	"testing"
+
+	tree_sitter "github.com/smacker/go-tree-sitter"
+	"github.com/tree-sitter/tree-sitter-tsg_python"
+)
+
+func TestCanLoadGrammar(t *testing.T) {
+	language := tree_sitter.NewLanguage(tree_sitter_tsg_python.Language())
+	if language == nil {
+		t.Errorf("Error loading TsgPython grammar")
+	}
+}
diff --git a/python/extractor/tsg-python/bindings/go/go.mod b/python/extractor/tsg-python/bindings/go/go.mod
new file mode 100644
index 0000000000000..10510cfb427c0
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/go/go.mod
@@ -0,0 +1,5 @@
+module github.com/tree-sitter/tree-sitter-tsg_python
+
+go 1.22
+
+require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8
diff --git a/python/extractor/tsg-python/bindings/node/binding.cc b/python/extractor/tsg-python/bindings/node/binding.cc
new file mode 100644
index 0000000000000..2c9ea04813c52
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/node/binding.cc
@@ -0,0 +1,20 @@
+#include <napi.h>
+
+typedef struct TSLanguage TSLanguage;
+
+extern "C" TSLanguage *tree_sitter_tsg_python();
+
+// "tree-sitter", "language" hashed with BLAKE2
+const napi_type_tag LANGUAGE_TYPE_TAG = {
+  0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
+};
+
+Napi::Object Init(Napi::Env env, Napi::Object exports) {
+    exports["name"] = Napi::String::New(env, "tsg_python");
+    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_tsg_python());
+    language.TypeTag(&LANGUAGE_TYPE_TAG);
+    exports["language"] = language;
+    return exports;
+}
+
+NODE_API_MODULE(tree_sitter_tsg_python_binding, Init)
diff --git a/python/extractor/tsg-python/bindings/node/index.d.ts b/python/extractor/tsg-python/bindings/node/index.d.ts
new file mode 100644
index 0000000000000..efe259eed03f0
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/node/index.d.ts
@@ -0,0 +1,28 @@
+type BaseNode = {
+  type: string;
+  named: boolean;
+};
+
+type ChildNode = {
+  multiple: boolean;
+  required: boolean;
+  types: BaseNode[];
+};
+
+type NodeInfo =
+  | (BaseNode & {
+      subtypes: BaseNode[];
+    })
+  | (BaseNode & {
+      fields: { [name: string]: ChildNode };
+      children: ChildNode[];
+    });
+
+type Language = {
+  name: string;
+  language: unknown;
+  nodeTypeInfo: NodeInfo[];
+};
+
+declare const language: Language;
+export = language;
diff --git a/python/extractor/tsg-python/bindings/node/index.js b/python/extractor/tsg-python/bindings/node/index.js
new file mode 100644
index 0000000000000..6657bcf42decc
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/node/index.js
@@ -0,0 +1,7 @@
+const root = require("path").join(__dirname, "..", "..");
+
+module.exports = require("node-gyp-build")(root);
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}
diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py
new file mode 100644
index 0000000000000..67aac2959f9c8
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.py
@@ -0,0 +1,5 @@
+"TsgPython grammar for tree-sitter"
+
+from ._binding import language
+
+__all__ = ["language"]
diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi
new file mode 100644
index 0000000000000..5416666fc300c
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/__init__.pyi
@@ -0,0 +1 @@
+def language() -> int: ...
diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c
new file mode 100644
index 0000000000000..bffec9d92fbe6
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/binding.c
@@ -0,0 +1,27 @@
+#include <Python.h>
+
+typedef struct TSLanguage TSLanguage;
+
+TSLanguage *tree_sitter_tsg_python(void);
+
+static PyObject* _binding_language(PyObject *self, PyObject *args) {
+    return PyLong_FromVoidPtr(tree_sitter_tsg_python());
+}
+
+static PyMethodDef methods[] = {
+    {"language", _binding_language, METH_NOARGS,
+     "Get the tree-sitter language for this grammar."},
+    {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef module = {
+    .m_base = PyModuleDef_HEAD_INIT,
+    .m_name = "_binding",
+    .m_doc = NULL,
+    .m_size = -1,
+    .m_methods = methods
+};
+
+PyMODINIT_FUNC PyInit__binding(void) {
+    return PyModule_Create(&module);
+}
diff --git a/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed b/python/extractor/tsg-python/bindings/python/tree_sitter_tsg_python/py.typed
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/python/extractor/tsg-python/bindings/rust/build.rs b/python/extractor/tsg-python/bindings/rust/build.rs
new file mode 100644
index 0000000000000..cf8024ee7cbf5
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/rust/build.rs
@@ -0,0 +1,22 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.std("c11").include(src_dir);
+
+    #[cfg(target_env = "msvc")]
+    c_config.flag("-utf-8");
+
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    // NOTE: if your language uses an external scanner, uncomment this block:
+    /*
+    let scanner_path = src_dir.join("scanner.c");
+    c_config.file(&scanner_path);
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+
+    c_config.compile("tree-sitter-tsg_python");
+}
diff --git a/python/extractor/tsg-python/bindings/rust/lib.rs b/python/extractor/tsg-python/bindings/rust/lib.rs
new file mode 100644
index 0000000000000..4ac467a69ea27
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/rust/lib.rs
@@ -0,0 +1,54 @@
+//! This crate provides TsgPython language support for the [tree-sitter][] parsing library.
+//!
+//! Typically, you will use the [language][language func] function to add this language to a
+//! tree-sitter [Parser][], and then use the parser to parse some code:
+//!
+//! ```
+//! let code = r#"
+//! "#;
+//! let mut parser = tree_sitter::Parser::new();
+//! parser.set_language(&tree_sitter_tsg_python::language()).expect("Error loading TsgPython grammar");
+//! let tree = parser.parse(code, None).unwrap();
+//! assert!(!tree.root_node().has_error());
+//! ```
+//!
+//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+//! [language func]: fn.language.html
+//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
+//! [tree-sitter]: https://tree-sitter.github.io/
+
+use tree_sitter::Language;
+
+extern "C" {
+    fn tree_sitter_tsg_python() -> Language;
+}
+
+/// Get the tree-sitter [Language][] for this grammar.
+///
+/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+pub fn language() -> Language {
+    unsafe { tree_sitter_tsg_python() }
+}
+
+/// The content of the [`node-types.json`][] file for this grammar.
+///
+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
+
+// Uncomment these to include any queries that this grammar contains
+
+// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
+// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
+// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
+// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_can_load_grammar() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(&super::language())
+            .expect("Error loading TsgPython grammar");
+    }
+}
diff --git a/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h b/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h
new file mode 100644
index 0000000000000..74dc05bd3b310
--- /dev/null
+++ b/python/extractor/tsg-python/bindings/swift/TreeSitterTsgPython/tsg_python.h
@@ -0,0 +1,16 @@
+#ifndef TREE_SITTER_TSG_PYTHON_H_
+#define TREE_SITTER_TSG_PYTHON_H_
+
+typedef struct TSLanguage TSLanguage;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const TSLanguage *tree_sitter_tsg_python(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_TSG_PYTHON_H_
diff --git a/python/extractor/tsg-python/grammar.js b/python/extractor/tsg-python/grammar.js
new file mode 100644
index 0000000000000..87da9c2310ace
--- /dev/null
+++ b/python/extractor/tsg-python/grammar.js
@@ -0,0 +1,11 @@
+/// <reference types="tree-sitter-cli/dsl" />
+// @ts-check
+
+module.exports = grammar({
+  name: "tsg_python",
+
+  rules: {
+    // TODO: add the actual grammar rules
+    source_file: $ => "hello"
+  }
+});
diff --git a/python/extractor/tsg-python/package-lock.json b/python/extractor/tsg-python/package-lock.json
new file mode 100644
index 0000000000000..31eed2f7bb12a
--- /dev/null
+++ b/python/extractor/tsg-python/package-lock.json
@@ -0,0 +1,6 @@
+{
+  "name": "tsg-python",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
diff --git a/python/extractor/tsg-python/package.json b/python/extractor/tsg-python/package.json
new file mode 100644
index 0000000000000..390c22ac097bf
--- /dev/null
+++ b/python/extractor/tsg-python/package.json
@@ -0,0 +1,53 @@
+{
+  "name": "tree-sitter-tsg-python",
+  "version": "0.0.1",
+  "description": "TsgPython grammar for tree-sitter",
+  "repository": "github:tree-sitter/tree-sitter-tsg-python",
+  "license": "MIT",
+  "main": "bindings/node",
+  "types": "bindings/node",
+  "keywords": [
+    "incremental",
+    "parsing",
+    "tree-sitter",
+    "tsg_python"
+  ],
+  "files": [
+    "grammar.js",
+    "binding.gyp",
+    "prebuilds/**",
+    "bindings/node/*",
+    "queries/*",
+    "src/**"
+  ],
+  "dependencies": {
+    "node-addon-api": "^7.1.0",
+    "node-gyp-build": "^4.8.0"
+  },
+  "devDependencies": {
+    "prebuildify": "^6.0.0",
+    "tree-sitter-cli": "^0.22.6"
+  },
+  "peerDependencies": {
+    "tree-sitter": "^0.21.0"
+  },
+  "peerDependenciesMeta": {
+    "tree-sitter": {
+      "optional": true
+    }
+  },
+  "scripts": {
+    "install": "node-gyp-build",
+    "prebuildify": "prebuildify --napi --strip",
+    "build": "tree-sitter generate --no-bindings",
+    "build-wasm": "tree-sitter build --wasm",
+    "test": "tree-sitter test",
+    "parse": "tree-sitter parse"
+  },
+  "tree-sitter": [
+    {
+      "scope": "source.tsg_python",
+      "injection-regex": "^tsg_python$"
+    }
+  ]
+}
diff --git a/python/extractor/tsg-python/pyproject.toml b/python/extractor/tsg-python/pyproject.toml
new file mode 100644
index 0000000000000..12ccf8442f9ec
--- /dev/null
+++ b/python/extractor/tsg-python/pyproject.toml
@@ -0,0 +1,29 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "tree-sitter-tsg-python"
+description = "TsgPython grammar for tree-sitter"
+version = "0.0.1"
+keywords = ["incremental", "parsing", "tree-sitter", "tsg-python"]
+classifiers = [
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Topic :: Software Development :: Compilers",
+  "Topic :: Text Processing :: Linguistic",
+  "Typing :: Typed"
+]
+requires-python = ">=3.8"
+license.text = "MIT"
+readme = "README.md"
+
+[project.urls]
+Homepage = "https://github.com/tree-sitter/tree-sitter-tsg-python"
+
+[project.optional-dependencies]
+core = ["tree-sitter~=0.21"]
+
+[tool.cibuildwheel]
+build = "cp38-*"
+build-frontend = "build"
diff --git a/python/extractor/tsg-python/rust-toolchain.toml b/python/extractor/tsg-python/rust-toolchain.toml
index fe5c5df29ffcd..92361581a978a 100644
--- a/python/extractor/tsg-python/rust-toolchain.toml
+++ b/python/extractor/tsg-python/rust-toolchain.toml
@@ -2,6 +2,6 @@
 # extractor. It is set to the lowest version of Rust we want to support.
 
 [toolchain]
-channel = "1.68"
+channel = "1.70"
 profile = "minimal"
 components = [ "rustfmt" ]
diff --git a/python/extractor/tsg-python/setup.py b/python/extractor/tsg-python/setup.py
new file mode 100644
index 0000000000000..9f395d46ab642
--- /dev/null
+++ b/python/extractor/tsg-python/setup.py
@@ -0,0 +1,60 @@
+from os.path import isdir, join
+from platform import system
+
+from setuptools import Extension, find_packages, setup
+from setuptools.command.build import build
+from wheel.bdist_wheel import bdist_wheel
+
+
+class Build(build):
+    def run(self):
+        if isdir("queries"):
+            dest = join(self.build_lib, "tree_sitter_tsg_python", "queries")
+            self.copy_tree("queries", dest)
+        super().run()
+
+
+class BdistWheel(bdist_wheel):
+    def get_tag(self):
+        python, abi, platform = super().get_tag()
+        if python.startswith("cp"):
+            python, abi = "cp38", "abi3"
+        return python, abi, platform
+
+
+setup(
+    packages=find_packages("bindings/python"),
+    package_dir={"": "bindings/python"},
+    package_data={
+        "tree_sitter_tsg_python": ["*.pyi", "py.typed"],
+        "tree_sitter_tsg_python.queries": ["*.scm"],
+    },
+    ext_package="tree_sitter_tsg_python",
+    ext_modules=[
+        Extension(
+            name="_binding",
+            sources=[
+                "bindings/python/tree_sitter_tsg_python/binding.c",
+                "src/parser.c",
+                # NOTE: if your language uses an external scanner, add it here.
+            ],
+            extra_compile_args=[
+                "-std=c11",
+            ] if system() != "Windows" else [
+                "/std:c11",
+                "/utf-8",
+            ],
+            define_macros=[
+                ("Py_LIMITED_API", "0x03080000"),
+                ("PY_SSIZE_T_CLEAN", None)
+            ],
+            include_dirs=["src"],
+            py_limited_api=True,
+        )
+    ],
+    cmdclass={
+        "build": Build,
+        "bdist_wheel": BdistWheel
+    },
+    zip_safe=False
+)
diff --git a/python/extractor/tsg-python/src/grammar.json b/python/extractor/tsg-python/src/grammar.json
new file mode 100644
index 0000000000000..b607032352a9d
--- /dev/null
+++ b/python/extractor/tsg-python/src/grammar.json
@@ -0,0 +1,20 @@
+{
+  "name": "tsg_python",
+  "rules": {
+    "source_file": {
+      "type": "STRING",
+      "value": "hello"
+    }
+  },
+  "extras": [
+    {
+      "type": "PATTERN",
+      "value": "\\s"
+    }
+  ],
+  "conflicts": [],
+  "precedences": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": []
+}
diff --git a/python/extractor/tsg-python/src/node-types.json b/python/extractor/tsg-python/src/node-types.json
new file mode 100644
index 0000000000000..43a64428b9118
--- /dev/null
+++ b/python/extractor/tsg-python/src/node-types.json
@@ -0,0 +1,11 @@
+[
+  {
+    "type": "source_file",
+    "named": true,
+    "fields": {}
+  },
+  {
+    "type": "hello",
+    "named": false
+  }
+]
\ No newline at end of file
diff --git a/python/extractor/tsg-python/src/parser.c b/python/extractor/tsg-python/src/parser.c
new file mode 100644
index 0000000000000..19f3840dffda0
--- /dev/null
+++ b/python/extractor/tsg-python/src/parser.c
@@ -0,0 +1,178 @@
+#include "tree_sitter/parser.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+#define LANGUAGE_VERSION 14
+#define STATE_COUNT 4
+#define LARGE_STATE_COUNT 2
+#define SYMBOL_COUNT 3
+#define ALIAS_COUNT 0
+#define TOKEN_COUNT 2
+#define EXTERNAL_TOKEN_COUNT 0
+#define FIELD_COUNT 0
+#define MAX_ALIAS_SEQUENCE_LENGTH 1
+#define PRODUCTION_ID_COUNT 1
+
+enum ts_symbol_identifiers {
+  anon_sym_hello = 1,
+  sym_source_file = 2,
+};
+
+static const char * const ts_symbol_names[] = {
+  [ts_builtin_sym_end] = "end",
+  [anon_sym_hello] = "hello",
+  [sym_source_file] = "source_file",
+};
+
+static const TSSymbol ts_symbol_map[] = {
+  [ts_builtin_sym_end] = ts_builtin_sym_end,
+  [anon_sym_hello] = anon_sym_hello,
+  [sym_source_file] = sym_source_file,
+};
+
+static const TSSymbolMetadata ts_symbol_metadata[] = {
+  [ts_builtin_sym_end] = {
+    .visible = false,
+    .named = true,
+  },
+  [anon_sym_hello] = {
+    .visible = true,
+    .named = false,
+  },
+  [sym_source_file] = {
+    .visible = true,
+    .named = true,
+  },
+};
+
+static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
+  [0] = {0},
+};
+
+static const uint16_t ts_non_terminal_alias_map[] = {
+  0,
+};
+
+static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
+  [0] = 0,
+  [1] = 1,
+  [2] = 2,
+  [3] = 3,
+};
+
+static bool ts_lex(TSLexer *lexer, TSStateId state) {
+  START_LEXER();
+  eof = lexer->eof(lexer);
+  switch (state) {
+    case 0:
+      if (eof) ADVANCE(5);
+      if (lookahead == 'h') ADVANCE(1);
+      if (('\t' <= lookahead && lookahead <= '\r') ||
+          lookahead == ' ') SKIP(0);
+      END_STATE();
+    case 1:
+      if (lookahead == 'e') ADVANCE(3);
+      END_STATE();
+    case 2:
+      if (lookahead == 'l') ADVANCE(4);
+      END_STATE();
+    case 3:
+      if (lookahead == 'l') ADVANCE(2);
+      END_STATE();
+    case 4:
+      if (lookahead == 'o') ADVANCE(6);
+      END_STATE();
+    case 5:
+      ACCEPT_TOKEN(ts_builtin_sym_end);
+      END_STATE();
+    case 6:
+      ACCEPT_TOKEN(anon_sym_hello);
+      END_STATE();
+    default:
+      return false;
+  }
+}
+
+static const TSLexMode ts_lex_modes[STATE_COUNT] = {
+  [0] = {.lex_state = 0},
+  [1] = {.lex_state = 0},
+  [2] = {.lex_state = 0},
+  [3] = {.lex_state = 0},
+};
+
+static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
+  [0] = {
+    [ts_builtin_sym_end] = ACTIONS(1),
+    [anon_sym_hello] = ACTIONS(1),
+  },
+  [1] = {
+    [sym_source_file] = STATE(3),
+    [anon_sym_hello] = ACTIONS(3),
+  },
+};
+
+static const uint16_t ts_small_parse_table[] = {
+  [0] = 1,
+    ACTIONS(5), 1,
+      ts_builtin_sym_end,
+  [4] = 1,
+    ACTIONS(7), 1,
+      ts_builtin_sym_end,
+};
+
+static const uint32_t ts_small_parse_table_map[] = {
+  [SMALL_STATE(2)] = 0,
+  [SMALL_STATE(3)] = 4,
+};
+
+static const TSParseActionEntry ts_parse_actions[] = {
+  [0] = {.entry = {.count = 0, .reusable = false}},
+  [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
+  [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
+  [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0),
+  [7] = {.entry = {.count = 1, .reusable = true}},  ACCEPT_INPUT(),
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef TREE_SITTER_HIDE_SYMBOLS
+#define TS_PUBLIC
+#elif defined(_WIN32)
+#define TS_PUBLIC __declspec(dllexport)
+#else
+#define TS_PUBLIC __attribute__((visibility("default")))
+#endif
+
+TS_PUBLIC const TSLanguage *tree_sitter_tsg_python(void) {
+  static const TSLanguage language = {
+    .version = LANGUAGE_VERSION,
+    .symbol_count = SYMBOL_COUNT,
+    .alias_count = ALIAS_COUNT,
+    .token_count = TOKEN_COUNT,
+    .external_token_count = EXTERNAL_TOKEN_COUNT,
+    .state_count = STATE_COUNT,
+    .large_state_count = LARGE_STATE_COUNT,
+    .production_id_count = PRODUCTION_ID_COUNT,
+    .field_count = FIELD_COUNT,
+    .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
+    .parse_table = &ts_parse_table[0][0],
+    .small_parse_table = ts_small_parse_table,
+    .small_parse_table_map = ts_small_parse_table_map,
+    .parse_actions = ts_parse_actions,
+    .symbol_names = ts_symbol_names,
+    .symbol_metadata = ts_symbol_metadata,
+    .public_symbol_map = ts_symbol_map,
+    .alias_map = ts_non_terminal_alias_map,
+    .alias_sequences = &ts_alias_sequences[0][0],
+    .lex_modes = ts_lex_modes,
+    .lex_fn = ts_lex,
+    .primary_state_ids = ts_primary_state_ids,
+  };
+  return &language;
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/python/extractor/tsg-python/src/tree_sitter/alloc.h b/python/extractor/tsg-python/src/tree_sitter/alloc.h
new file mode 100644
index 0000000000000..1f4466d75c40b
--- /dev/null
+++ b/python/extractor/tsg-python/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t);
+extern void *(*ts_current_calloc)(size_t, size_t);
+extern void *(*ts_current_realloc)(void *, size_t);
+extern void (*ts_current_free)(void *);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/python/extractor/tsg-python/src/tree_sitter/array.h b/python/extractor/tsg-python/src/tree_sitter/array.h
new file mode 100644
index 0000000000000..15a3b233bbb87
--- /dev/null
+++ b/python/extractor/tsg-python/src/tree_sitter/array.h
@@ -0,0 +1,290 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  do { \
+    if ((count) == 0) break; \
+    _array__grow((Array *)(self), count, array_elem_size(self)); \
+    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+    (self)->size += (count); \
+  } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
diff --git a/python/extractor/tsg-python/src/tree_sitter/parser.h b/python/extractor/tsg-python/src/tree_sitter/parser.h
new file mode 100644
index 0000000000000..17f0e94bfcf4d
--- /dev/null
+++ b/python/extractor/tsg-python/src/tree_sitter/parser.h
@@ -0,0 +1,265 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
diff --git a/python/extractor/tsg-python/tsp/Cargo.toml b/python/extractor/tsg-python/tsp/Cargo.toml
index 995f39a3bd996..a4b738657dee4 100644
--- a/python/extractor/tsg-python/tsp/Cargo.toml
+++ b/python/extractor/tsg-python/tsp/Cargo.toml
@@ -25,7 +25,7 @@ include = [
 path = "bindings/rust/lib.rs"
 
 [dependencies]
-tree-sitter = ">= 0.20, < 0.21"
+tree-sitter = ">= 0.22.6"
 
 [build-dependencies]
 cc = "1.0"
diff --git a/ruby/extractor/Cargo.lock b/ruby/extractor/Cargo.lock
index 5de84efb6b50a..eb212202f62fd 100644
--- a/ruby/extractor/Cargo.lock
+++ b/ruby/extractor/Cargo.lock
@@ -167,7 +167,7 @@ checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1"
 [[package]]
 name = "codeql-extractor"
 version = "0.2.0"
-source = "git+https://github.com/github/codeql.git?rev=d0688b0645e59e291531612d41b1af71afcae46d#d0688b0645e59e291531612d41b1af71afcae46d"
+source = "git+https://github.com/github/codeql.git?rev=5be699becc8ba8ac20f3258fbaa734381e26a9d9#5be699becc8ba8ac20f3258fbaa734381e26a9d9"
 dependencies = [
  "chrono",
  "encoding",
diff --git a/ruby/extractor/cargo-bazel-lock.json b/ruby/extractor/cargo-bazel-lock.json
index 14c93370e07f8..356a870837b33 100644
--- a/ruby/extractor/cargo-bazel-lock.json
+++ b/ruby/extractor/cargo-bazel-lock.json
@@ -1,5 +1,5 @@
 {
-  "checksum": "23633ca3169d15ab61de79d8d6a5f5b1b0a2043388e73b2fcd9e631939fd304b",
+  "checksum": "1ad87a553fb556a523e9dd8001738caf00cdb3f566a9a0b99a4d12b7659bbfd3",
   "crates": {
     "adler 1.0.2": {
       "name": "adler",
@@ -953,7 +953,7 @@
         "Git": {
           "remote": "https://github.com/github/codeql.git",
           "commitish": {
-            "Rev": "d0688b0645e59e291531612d41b1af71afcae46d"
+            "Rev": "5be699becc8ba8ac20f3258fbaa734381e26a9d9"
           },
           "strip_prefix": "shared/tree-sitter-extractor"
         }