diff --git a/docs/development/contributing/ide.md b/docs/development/contributing/ide.md index a77c83837d64..12bd94cab229 100644 --- a/docs/development/contributing/ide.md +++ b/docs/development/contributing/ide.md @@ -7,9 +7,11 @@ This page contains some recommendations for configuring popular IDEs. Make sure to configure VSCode to use the virtual environment created by the Makefile. -In addition, the extensions below are recommended. +### Extensions -### rust-analyzer +The extensions below are recommended. + +#### rust-analyzer If you work on the Rust code at all, you will need the [rust-analyzer](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) extension. This extension provides code completion for the Rust code. @@ -21,9 +23,9 @@ For it to work well for the Polars code base, add the following settings to your } ``` -### Ruff +#### Ruff -The Ruff extension will help you conform to the formatting requirements of the Python code. +The [Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension will help you conform to the formatting requirements of the Python code. We use both the Ruff linter and formatter. It is recommended to configure the extension to use the Ruff installed in your environment. This will make it use the correct Ruff version and configuration. @@ -34,6 +36,92 @@ This will make it use the correct Ruff version and configuration. } ``` +#### CodeLLDB + +The [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb) extension is useful for debugging Rust code. +You can also debug Rust code called from Python (see section below). + +### Debugging + +Due to the way that Python and Rust interoperate, debugging the Rust side of development from Python calls can be difficult. +This guide shows how to set up a debugging environment that makes debugging Rust code called from a Python script painless. + +#### Preparation + +Start by installing the CodeLLDB extension (see above). +Then add the following two configurations to your `launch.json` file. +This file is usually found in the `.vscode` folder of your project root. +See the [official VSCode documentation](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) for more information about the `launch.json` file. + +
launch.json + +```json +{ + "configurations": [ + { + "name": "Debug Rust/Python", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/py-polars/debug/launch.py", + "args": [ + "${file}" + ], + "console": "internalConsole", + "justMyCode": true, + "serverReadyAction": { + "pattern": "pID = ([0-9]+)", + "action": "startDebugging", + "name": "Rust LLDB" + } + }, + { + "name": "Rust LLDB", + "pid": "0", + "type": "lldb", + "request": "attach", + "program": "${workspaceFolder}/py-polars/.venv/bin/python", + "stopOnEntry": false, + "sourceLanguages": [ + "rust" + ], + "presentation": { + "hidden": true + } + } + ] +} +``` + +
+ +!!! info + + On some systems, the LLDB debugger will not attach unless [ptrace protection](https://linux-audit.com/protect-ptrace-processes-kernel-yama-ptrace_scope) is disabled. + To disable, run the following command: + + ```shell + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + ``` + +#### Running the debugger + +1. Create a Python script containing Polars code. Ensure that your virtual environment is activated. + +2. Set breakpoints in any `.rs` or `.py` file. + +3. In the `Run and Debug` panel on the left, select `Debug Rust/Python` from the drop-down menu on top and click the `Start Debugging` button. + +At this point, your debugger should stop on breakpoints in any `.rs` file located within the codebase. + +#### Details + +The debugging feature runs via the specially-designed VSCode launch configuration shown above. +The initial Python debugger is launched using a special launch script located at `py-polars/debug/launch.py` and passes the name of the script to be debugged (the target script) as an input argument. +The launch script determines the process ID, writes this value into the `launch.json` configuration file, compiles the target script and runs it in the current environment. +At this point, a second (Rust) debugger is attached to the Python debugger. +The result is two simultaneous debuggers operating on the same running instance. +Breakpoints in the Python code will stop on the Python debugger and breakpoints in the Rust code will stop on the Rust debugger. + ## PyCharm / RustRover / CLion !!! info diff --git a/py-polars/debug/launch.py b/py-polars/debug/launch.py new file mode 100644 index 000000000000..95352e4eafa3 --- /dev/null +++ b/py-polars/debug/launch.py @@ -0,0 +1,81 @@ +import os +import re +import sys +import time +from pathlib import Path + +""" +The following parameter determines the sleep time of the Python process after a signal +is sent that attaches the Rust LLDB debugger. If the Rust LLDB debugger attaches to the +current session too late, it might miss any set breakpoints. If this happens +consistently, it is recommended to increase this value. +""" +LLDB_DEBUG_WAIT_TIME_SECONDS = 1 + + +def launch_debugging() -> None: + """ + Debug Rust files via Python. + + Determine the pID for the current debugging session, attach the Rust LLDB launcher, + and execute the originally-requested script. + """ + if len(sys.argv) == 1: + msg = ( + "launch.py is not meant to be executed directly; please use the `Python: " + "Debug Rust` debugging configuration to run a python script that uses the " + "polars library." + ) + raise RuntimeError(msg) + + # Get the current process ID. + pID = os.getpid() + + # Print to the debug console to allow VSCode to pick up on the signal and start the + # Rust LLDB configuration automatically. + launch_file = Path(__file__).parents[2] / ".vscode/launch.json" + if not launch_file.exists(): + msg = f"Cannot locate {launch_file}" + raise RuntimeError(msg) + with launch_file.open("r") as f: + launch_info = f.read() + + # Overwrite the pid found in launch.json with the pid for the current process. + # Match the initial "Rust LLDB" definition with the pid defined immediately after. + pattern = re.compile('("Rust LLDB",\\s*"pid":\\s*")\\d+(")') + found = pattern.search(launch_info) + if not found: + msg = ( + "Cannot locate pid definition in launch.json for Rust LLDB configuration. " + "Please follow the instructions in CONTRIBUTING.md for creating the " + "launch configuration." + ) + raise RuntimeError(msg) + + launch_info_with_new_pid = pattern.sub(rf"\g<1>{pID}\g<2>", launch_info) + with launch_file.open("w") as f: + f.write(launch_info_with_new_pid) + + # Print pID to the debug console. This auto-triggers the Rust LLDB configurations. + print(f"pID = {pID}") + + # Give the LLDB time to connect. Depending on how long it takes for your LLDB + # debugging session to initiatialize, you may have to adjust this setting. + time.sleep(LLDB_DEBUG_WAIT_TIME_SECONDS) + + # Update sys.argv so that when exec() is called, the first argument is the script + # name itself, and the remaining are the input arguments. + sys.argv.pop(0) + with Path(sys.argv[0]).open() as fh: + script_contents = fh.read() + + # Run the originally requested file by reading in the script, compiling, and + # executing the code. + file_to_execute = Path(sys.argv[0]) + exec( + compile(script_contents, file_to_execute, mode="exec"), {"__name__": "__main__"} + ) + + +if __name__ == "__main__": + launch_debugging()