|
1 | 1 | //! Tools for managing ClickHouse during development
|
2 | 2 |
|
| 3 | +use std::net::SocketAddr; |
3 | 4 | use std::path::{Path, PathBuf};
|
4 | 5 | use std::process::Stdio;
|
5 | 6 | use std::time::Duration;
|
6 | 7 |
|
7 |
| -use anyhow::{bail, ensure, Context}; |
| 8 | +use anyhow::{bail, Context}; |
8 | 9 | use tempfile::TempDir;
|
| 10 | +use tokio::{ |
| 11 | + fs::File, |
| 12 | + io::{AsyncBufReadExt, BufReader}, |
| 13 | +}; |
9 | 14 |
|
10 | 15 | use crate::dev::poll;
|
11 | 16 |
|
@@ -83,18 +88,10 @@ impl ClickHouseInstance {
|
83 | 88 |
|
84 | 89 | // Discover the HTTP port on which we're listening, if it's not provided explicitly by the
|
85 | 90 | // caller.
|
86 |
| - // |
87 |
| - // Note: This is annoying. For tests, or any situation in which we'd run multiple servers, |
88 |
| - // we'd like to let the OS choose a port for us, by listening on port 0. ClickHouse |
89 |
| - // supports this, but doesn't do anything to discover the port on which it's actually |
90 |
| - // listening (i.e., the log file just says "listening on port 0"). In contrast, CockroachDB |
91 |
| - // dumps the full URL on which it's listening to a file for users to discover. |
92 |
| - // |
93 |
| - // This is a workaround which shells out to `lsof` or `pfiles` to discover the port. |
94 | 91 | let port = if port != 0 {
|
95 | 92 | port
|
96 | 93 | } else {
|
97 |
| - discover_local_listening_port(child.id().unwrap()).await? |
| 94 | + discover_local_listening_port(&log_path).await? |
98 | 95 | };
|
99 | 96 |
|
100 | 97 | Ok(Self {
|
@@ -169,284 +166,29 @@ impl Drop for ClickHouseInstance {
|
169 | 166 | }
|
170 | 167 | }
|
171 | 168 |
|
172 |
| -// Parse the output of the command used to find the HTTP port ClickHouse listens on, in the event |
173 |
| -// we start it with a port of 0. |
174 |
| -#[cfg(not(target_os = "illumos"))] |
175 |
| -async fn discover_local_listening_port(pid: u32) -> Result<u16, anyhow::Error> { |
176 |
| - // `lsof` doesn't do the right thing with the PID. It seems to list _all_ files for the |
177 |
| - // process, on both macOS and Linux, rather than the intersection of the PID we care about and |
178 |
| - // the other filters. So we ignore it. |
179 |
| - let output = tokio::process::Command::new("lsof") |
180 |
| - .arg("-i") |
181 |
| - .arg("6TCP@localhost") // TCP over IPv6, on the local address |
182 |
| - .arg("-F") |
183 |
| - .arg("n") // Only print the file name, \n-terminated |
184 |
| - .output() // Spawn and collect the output |
185 |
| - .await |
186 |
| - .context("Could not determine ClickHouse port number: Failed to spawn process.")?; |
187 |
| - ensure!( |
188 |
| - output.status.success(), |
189 |
| - "Could not determine ClickHouse port number: Process failed" |
190 |
| - ); |
191 |
| - extract_port_from_lsof_output(pid, &output.stdout) |
192 |
| -} |
193 |
| - |
194 |
| -// Parse the output of the command used to find the HTTP port ClickHouse listens on, in the event |
195 |
| -// we start it with a port of 0. |
196 |
| -#[cfg(target_os = "illumos")] |
197 |
| -async fn discover_local_listening_port(pid: u32) -> Result<u16, anyhow::Error> { |
198 |
| - let output = tokio::process::Command::new("pfiles") |
199 |
| - .arg(format!("{}", pid)) |
200 |
| - .output() // Spawn and collect the output |
201 |
| - .await |
202 |
| - .context("Could not determine ClickHouse port number: Failed to spawn pfiles process.")?; |
203 |
| - ensure!( |
204 |
| - output.status.success(), |
205 |
| - "Could not determine ClickHouse port number: Pfiles process failed" |
206 |
| - ); |
207 |
| - extract_port_from_pfiles_output(&output.stdout) |
208 |
| -} |
209 |
| - |
210 |
| -// Ports that ClickHouse opens, but we'd like to ignore when discovering. |
211 |
| -const IGNORED_CLICKHOUSE_PORTS: &[u16] = &[9000, 9004]; |
212 |
| - |
213 |
| -// Extract the port from `pfiles` output. |
214 |
| -// |
215 |
| -// This output is much simpler, since it already restricts things to the PID we're interested int. |
216 |
| -// Just look for AF_INET lines and pull out the port. |
217 |
| -#[cfg_attr(target_os = "illumos", allow(dead_code))] |
218 |
| -fn extract_port_from_pfiles_output( |
219 |
| - output: &[u8], |
220 |
| -) -> Result<u16, anyhow::Error> { |
221 |
| - let text = std::str::from_utf8(output) |
222 |
| - .context("Could not determine ClickHouse port number: Non-UTF8 output from command")?; |
223 |
| - for port_str in text.lines().filter_map(|line| { |
224 |
| - if line.trim().starts_with("sockname: AF_INET") { |
225 |
| - line.split_whitespace().last() |
226 |
| - } else { |
227 |
| - None |
228 |
| - } |
229 |
| - }) { |
230 |
| - let port = port_str |
231 |
| - .parse() |
232 |
| - .context("Could not determine ClickHouse port number: Invalid port found in output")?; |
233 |
| - if !IGNORED_CLICKHOUSE_PORTS.contains(&port) { |
234 |
| - return Ok(port); |
235 |
| - } |
236 |
| - } |
237 |
| - bail!("Could not determine ClickHouse port number: No valid ports found in output"); |
238 |
| -} |
239 |
| - |
240 |
| -// Parse the output from the `lsof` command on non-illumos systems |
241 |
| -// |
242 |
| -// The exact command run is: `lsof -i 6TCP@localhost -F n`. |
243 |
| -// |
244 |
| -// The output has groups of files like this: |
245 |
| -// p<PID> |
246 |
| -// f<FD> |
247 |
| -// n<NAME> |
248 |
| -// f<FD> |
249 |
| -// n<NAME> |
250 |
| -// ... |
251 |
| -// p<PID> |
252 |
| -// ... |
253 |
| -// |
254 |
| -// Parsing proceeds by: |
255 |
| -// - Splitting into lines |
256 |
| -// - Ignoring output until a PID line `p<PID>` is found, with the expected PID |
257 |
| -// - Ignores `n<FD>` lines |
258 |
| -// - Parses lines that look like `flocalhost:<NUMERIC_PORT>` |
259 |
| -// - Returns the first match, that's _not_ one of the other ports ClickHouse opens. |
260 |
| -// |
261 |
| -// If any of these conditions fails, an error is returned. |
262 |
| -#[cfg_attr(not(target_os = "illumos"), allow(dead_code))] |
263 |
| -fn extract_port_from_lsof_output( |
264 |
| - expected_pid: u32, |
265 |
| - output: &[u8], |
| 169 | +// Parse the ClickHouse log file at the given path, looking for a line reporting the port number of |
| 170 | +// the HTTP server. This is only used if the port is chosen by the OS, not the caller. |
| 171 | +async fn discover_local_listening_port( |
| 172 | + path: &Path, |
266 | 173 | ) -> Result<u16, anyhow::Error> {
|
267 |
| - ensure!( |
268 |
| - !output.is_empty(), |
269 |
| - "Could not determine ClickHouse port number: Process output empty" |
| 174 | + let reader = BufReader::new( |
| 175 | + File::open(path).await.context("Failed to open ClickHouse log file")?, |
270 | 176 | );
|
271 |
| - |
272 |
| - // Break into newline-terminated chunks. |
273 |
| - let mut chunks = output.split(|&x| x == b'\n'); |
274 |
| - |
275 |
| - // Small helpers to parse chunks. |
276 |
| - let is_process_start = |chunk: &[u8]| matches!(chunk.first(), Some(b'p')); |
277 |
| - let is_file_descriptor = |chunk: &[u8]| matches!(chunk.first(), Some(b'f')); |
278 |
| - let is_file_name = |chunk: &[u8]| matches!(chunk.first(), Some(b'n')); |
279 |
| - |
280 |
| - while let Some(chunk) = chunks.next() { |
281 |
| - if is_process_start(&chunk) { |
282 |
| - // Start of a process group. |
283 |
| - // |
284 |
| - // Parse the PID, check if it matches our expected PID. |
285 |
| - let pid: u32 = match String::from_utf8(chunk[1..].to_vec()) |
286 |
| - .context("Could not determine ClickHouse port number: Non-UTF8 output")? |
287 |
| - .parse() { |
288 |
| - Ok(pid) => pid, |
289 |
| - _ => continue, |
290 |
| - }; |
291 |
| - if pid == expected_pid { |
292 |
| - // PID matches |
293 |
| - // |
294 |
| - // The first chunk should be the numeric file descriptor |
295 |
| - if let Some(should_be_fd) = chunks.next() { |
296 |
| - ensure!( |
297 |
| - is_file_descriptor(should_be_fd), |
298 |
| - "Could not determine ClickHouse port number: Expected numeric file descriptor in output" |
299 |
| - ); |
300 |
| - } else { |
301 |
| - bail!("Could not determine ClickHouse port number: Expected numeric file descriptor in output"); |
302 |
| - } |
303 |
| - |
304 |
| - // Process chunks until we find one that has a valid port, or we get one that's |
305 |
| - // _not_ a filename. |
306 |
| - while let Some(chunk) = chunks.next() { |
307 |
| - ensure!( |
308 |
| - is_file_name(chunk), |
309 |
| - "Could not determine ClickHouse port number: Expected file name in output" |
310 |
| - ); |
311 |
| - |
312 |
| - // Ignore leading `n`, which is part of the formatting from lsof |
313 |
| - let chunk = &chunk[1..]; |
314 |
| - |
315 |
| - // Check if this looks like `localhost:<PORT>` |
316 |
| - const LOCALHOST: &[u8] = b"localhost:"; |
317 |
| - if chunk.starts_with(LOCALHOST) { |
318 |
| - let port: u16 = std::str::from_utf8(&chunk[LOCALHOST.len()..]) |
319 |
| - .context("Could not determine ClickHouse port number: Invalid PID in output")? |
320 |
| - .parse() |
321 |
| - .context("Could not determine ClickHouse port number: Invalid PID in output")?; |
322 |
| - |
323 |
| - // Check that it's not one of the default other TCP ports ClickHouse opens |
324 |
| - // by default |
325 |
| - if !IGNORED_CLICKHOUSE_PORTS.contains(&port) { |
326 |
| - return Ok(port); |
327 |
| - } |
328 |
| - } |
329 |
| - } |
330 |
| - |
331 |
| - // Early exit, the PID matched, but we couldn't find a valid port |
332 |
| - break; |
333 |
| - } |
| 177 | + const NEEDLE: &str = "<Information> Application: Listening for http://"; |
| 178 | + let mut lines = reader.lines(); |
| 179 | + while let Some(line) = lines |
| 180 | + .next_line() |
| 181 | + .await |
| 182 | + .context("Failed to read line from ClickHouse log file")? |
| 183 | + { |
| 184 | + if let Some(needle_start) = line.find(&NEEDLE) { |
| 185 | + let address_start = needle_start + NEEDLE.len(); |
| 186 | + let address: SocketAddr = |
| 187 | + line[address_start..].trim().parse().context( |
| 188 | + "Failed to parse ClickHouse socket address from log", |
| 189 | + )?; |
| 190 | + return Ok(address.port()); |
334 | 191 | }
|
335 | 192 | }
|
336 |
| - bail!("Could not determine ClickHouse port number: No valid ports found in output"); |
337 |
| -} |
338 |
| - |
339 |
| -#[cfg(test)] |
340 |
| -mod pfiles_tests { |
341 |
| - use super::extract_port_from_pfiles_output; |
342 |
| - |
343 |
| - // A known-good test output. |
344 |
| - const GOOD_INPUT: &[u8] = br#" |
345 |
| - 25: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0 |
346 |
| - O_RDWR FD_CLOEXEC |
347 |
| - SOCK_STREAM |
348 |
| - SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000) |
349 |
| - sockname: AF_INET6 ::1 port: 9004 |
350 |
| - 26: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0 |
351 |
| - O_RDWR FD_CLOEXEC |
352 |
| - SOCK_STREAM |
353 |
| - SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000) |
354 |
| - sockname: AF_INET 127.0.0.1 port: 8123 |
355 |
| - 27: S_IFSOCK mode:0666 dev:547,0 ino:42019 uid:0 gid:0 rdev:0,0 |
356 |
| - O_RDWR FD_CLOEXEC |
357 |
| - SOCK_STREAM |
358 |
| - SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000) |
359 |
| - sockname: AF_INET 127.0.0.1 port: 9000 |
360 |
| - "#; |
361 |
| - |
362 |
| - // Only contains the ignored ClickHouse ports |
363 |
| - const ONLY_IGNORED_CLICKHOUSE_PORTS: &[u8] = br#" |
364 |
| - 25: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0 |
365 |
| - O_RDWR FD_CLOEXEC |
366 |
| - SOCK_STREAM |
367 |
| - SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000) |
368 |
| - sockname: AF_INET6 ::1 port: 9004 |
369 |
| - "#; |
370 |
| - |
371 |
| - #[test] |
372 |
| - fn test_extract_port_from_pfiles_output() { |
373 |
| - assert_eq!(extract_port_from_pfiles_output(&GOOD_INPUT).unwrap(), 8123); |
374 |
| - } |
375 |
| - |
376 |
| - #[test] |
377 |
| - fn test_extract_port_from_lsof_output_no_valid_port() { |
378 |
| - assert!(extract_port_from_pfiles_output( |
379 |
| - &ONLY_IGNORED_CLICKHOUSE_PORTS |
380 |
| - ) |
381 |
| - .is_err()); |
382 |
| - } |
383 |
| -} |
384 |
| - |
385 |
| -#[cfg(test)] |
386 |
| -mod lsof_tests { |
387 |
| - use super::extract_port_from_lsof_output; |
388 |
| - |
389 |
| - // A known-good test output. This was generated by running the actual command while a |
390 |
| - // ClickHouse process is running. |
391 |
| - const GOOD_INPUT: &[u8] = b"p462\n\ |
392 |
| - f4\n\ |
393 |
| - nlocalhost:19536\n\ |
394 |
| - p7741\n\ |
395 |
| - f8\n\ |
396 |
| - nlocalhost:53091\n\ |
397 |
| - f9\n\ |
398 |
| - nlocalhost:cslistener\n\ |
399 |
| - f12\n\ |
400 |
| - nlocalhost:9004\n"; |
401 |
| - |
402 |
| - // This command has some valid `localhost:PORT` lines, but those ports are known to be other |
403 |
| - // ports that ClickHouse opens that aren't HTTP. These are the native client and the mySQL |
404 |
| - // client ports. |
405 |
| - const ONLY_IGNORED_CLICKHOUSE_PORTS: &[u8] = b"p462\n\ |
406 |
| - f4\n\ |
407 |
| - nlocalhost:19536\n\ |
408 |
| - p7741\n\ |
409 |
| - f8\n\ |
410 |
| - nlocalhost:9000\n\ |
411 |
| - f9\n\ |
412 |
| - nlocalhost:cslistener\n\ |
413 |
| - f12\n\ |
414 |
| - nlocalhost:9004\n"; |
415 |
| - |
416 |
| - // A bad output that has no lines like `flocalhost:<PORT>\n` at all |
417 |
| - const NO_FILE_NAMES: &[u8] = b"p462\n\ |
418 |
| - f4\n\ |
419 |
| - nlocalhost:19536\n\ |
420 |
| - p7741\n\ |
421 |
| - f8\n\ |
422 |
| - f9\n\ |
423 |
| - f12\n"; |
424 |
| - |
425 |
| - #[test] |
426 |
| - fn test_extract_port_from_lsof_output() { |
427 |
| - assert_eq!( |
428 |
| - extract_port_from_lsof_output(7741, &GOOD_INPUT).unwrap(), |
429 |
| - 53091 |
430 |
| - ); |
431 |
| - } |
432 |
| - |
433 |
| - #[test] |
434 |
| - fn test_extract_port_from_lsof_output_no_valid_port() { |
435 |
| - assert!(extract_port_from_lsof_output( |
436 |
| - 7741, |
437 |
| - &ONLY_IGNORED_CLICKHOUSE_PORTS |
438 |
| - ) |
439 |
| - .is_err()); |
440 |
| - } |
441 |
| - |
442 |
| - // A test that uses the good input, but assumes we're looking for another PID. |
443 |
| - #[test] |
444 |
| - fn test_extract_port_from_lsof_output_incorrect_pid() { |
445 |
| - assert!(extract_port_from_lsof_output(0, &GOOD_INPUT).is_err()); |
446 |
| - } |
447 |
| - |
448 |
| - #[test] |
449 |
| - fn test_extract_port_from_lsof_output_no_file_names() { |
450 |
| - assert!(extract_port_from_lsof_output(0, &NO_FILE_NAMES).is_err()); |
451 |
| - } |
| 193 | + bail!("Failed to discover port from ClickHouse log file"); |
452 | 194 | }
|
0 commit comments