Skip to content

Parses clickhouse log file for discovering port number #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ jobs:
# actions/[email protected]
uses: actions/cache@26968a09c0ea4f3e233fdddbafd1166051a095f6
with:
key: ${{ runner.os }}-cockroach-binary-v2
key: ${{ runner.os }}-cockroach-binary-${{ hashFiles('tools/cockroachdb_checksums') }}
path: "cockroachdb"
- name: Configure GitHub cache for ClickHouse binaries
id: cache-clickhouse
# actions/[email protected]
uses: actions/cache@26968a09c0ea4f3e233fdddbafd1166051a095f6
with:
key: ${{ runner.os }}-clickhouse-binary
key: ${{ runner.os }}-clickhouse-binary-${{ hashFiles('tools/clickhouse_checksums') }}
path: "clickhouse"
- name: Build
run: cargo +${{ matrix.toolchain }} build --locked --all-targets --verbose
Expand Down
314 changes: 28 additions & 286 deletions omicron-common/src/dev/clickhouse.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
//! Tools for managing ClickHouse during development

use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;

use anyhow::{bail, ensure, Context};
use anyhow::{bail, Context};
use tempfile::TempDir;
use tokio::{
fs::File,
io::{AsyncBufReadExt, BufReader},
};

use crate::dev::poll;

Expand Down Expand Up @@ -83,18 +88,10 @@ impl ClickHouseInstance {

// Discover the HTTP port on which we're listening, if it's not provided explicitly by the
// caller.
//
// Note: This is annoying. For tests, or any situation in which we'd run multiple servers,
// we'd like to let the OS choose a port for us, by listening on port 0. ClickHouse
// supports this, but doesn't do anything to discover the port on which it's actually
// listening (i.e., the log file just says "listening on port 0"). In contrast, CockroachDB
// dumps the full URL on which it's listening to a file for users to discover.
//
// This is a workaround which shells out to `lsof` or `pfiles` to discover the port.
let port = if port != 0 {
port
} else {
discover_local_listening_port(child.id().unwrap()).await?
discover_local_listening_port(&log_path).await?
};

Ok(Self {
Expand Down Expand Up @@ -169,284 +166,29 @@ impl Drop for ClickHouseInstance {
}
}

// Parse the output of the command used to find the HTTP port ClickHouse listens on, in the event
// we start it with a port of 0.
#[cfg(not(target_os = "illumos"))]
async fn discover_local_listening_port(pid: u32) -> Result<u16, anyhow::Error> {
// `lsof` doesn't do the right thing with the PID. It seems to list _all_ files for the
// process, on both macOS and Linux, rather than the intersection of the PID we care about and
// the other filters. So we ignore it.
let output = tokio::process::Command::new("lsof")
.arg("-i")
.arg("6TCP@localhost") // TCP over IPv6, on the local address
.arg("-F")
.arg("n") // Only print the file name, \n-terminated
.output() // Spawn and collect the output
.await
.context("Could not determine ClickHouse port number: Failed to spawn process.")?;
ensure!(
output.status.success(),
"Could not determine ClickHouse port number: Process failed"
);
extract_port_from_lsof_output(pid, &output.stdout)
}

// Parse the output of the command used to find the HTTP port ClickHouse listens on, in the event
// we start it with a port of 0.
#[cfg(target_os = "illumos")]
async fn discover_local_listening_port(pid: u32) -> Result<u16, anyhow::Error> {
let output = tokio::process::Command::new("pfiles")
.arg(format!("{}", pid))
.output() // Spawn and collect the output
.await
.context("Could not determine ClickHouse port number: Failed to spawn pfiles process.")?;
ensure!(
output.status.success(),
"Could not determine ClickHouse port number: Pfiles process failed"
);
extract_port_from_pfiles_output(&output.stdout)
}

// Ports that ClickHouse opens, but we'd like to ignore when discovering.
const IGNORED_CLICKHOUSE_PORTS: &[u16] = &[9000, 9004];

// Extract the port from `pfiles` output.
//
// This output is much simpler, since it already restricts things to the PID we're interested int.
// Just look for AF_INET lines and pull out the port.
#[cfg_attr(target_os = "illumos", allow(dead_code))]
fn extract_port_from_pfiles_output(
output: &[u8],
) -> Result<u16, anyhow::Error> {
let text = std::str::from_utf8(output)
.context("Could not determine ClickHouse port number: Non-UTF8 output from command")?;
for port_str in text.lines().filter_map(|line| {
if line.trim().starts_with("sockname: AF_INET") {
line.split_whitespace().last()
} else {
None
}
}) {
let port = port_str
.parse()
.context("Could not determine ClickHouse port number: Invalid port found in output")?;
if !IGNORED_CLICKHOUSE_PORTS.contains(&port) {
return Ok(port);
}
}
bail!("Could not determine ClickHouse port number: No valid ports found in output");
}

// Parse the output from the `lsof` command on non-illumos systems
//
// The exact command run is: `lsof -i 6TCP@localhost -F n`.
//
// The output has groups of files like this:
// p<PID>
// f<FD>
// n<NAME>
// f<FD>
// n<NAME>
// ...
// p<PID>
// ...
//
// Parsing proceeds by:
// - Splitting into lines
// - Ignoring output until a PID line `p<PID>` is found, with the expected PID
// - Ignores `n<FD>` lines
// - Parses lines that look like `flocalhost:<NUMERIC_PORT>`
// - Returns the first match, that's _not_ one of the other ports ClickHouse opens.
//
// If any of these conditions fails, an error is returned.
#[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
fn extract_port_from_lsof_output(
expected_pid: u32,
output: &[u8],
// Parse the ClickHouse log file at the given path, looking for a line reporting the port number of
// the HTTP server. This is only used if the port is chosen by the OS, not the caller.
async fn discover_local_listening_port(
path: &Path,
) -> Result<u16, anyhow::Error> {
ensure!(
!output.is_empty(),
"Could not determine ClickHouse port number: Process output empty"
let reader = BufReader::new(
File::open(path).await.context("Failed to open ClickHouse log file")?,
);

// Break into newline-terminated chunks.
let mut chunks = output.split(|&x| x == b'\n');

// Small helpers to parse chunks.
let is_process_start = |chunk: &[u8]| matches!(chunk.first(), Some(b'p'));
let is_file_descriptor = |chunk: &[u8]| matches!(chunk.first(), Some(b'f'));
let is_file_name = |chunk: &[u8]| matches!(chunk.first(), Some(b'n'));

while let Some(chunk) = chunks.next() {
if is_process_start(&chunk) {
// Start of a process group.
//
// Parse the PID, check if it matches our expected PID.
let pid: u32 = match String::from_utf8(chunk[1..].to_vec())
.context("Could not determine ClickHouse port number: Non-UTF8 output")?
.parse() {
Ok(pid) => pid,
_ => continue,
};
if pid == expected_pid {
// PID matches
//
// The first chunk should be the numeric file descriptor
if let Some(should_be_fd) = chunks.next() {
ensure!(
is_file_descriptor(should_be_fd),
"Could not determine ClickHouse port number: Expected numeric file descriptor in output"
);
} else {
bail!("Could not determine ClickHouse port number: Expected numeric file descriptor in output");
}

// Process chunks until we find one that has a valid port, or we get one that's
// _not_ a filename.
while let Some(chunk) = chunks.next() {
ensure!(
is_file_name(chunk),
"Could not determine ClickHouse port number: Expected file name in output"
);

// Ignore leading `n`, which is part of the formatting from lsof
let chunk = &chunk[1..];

// Check if this looks like `localhost:<PORT>`
const LOCALHOST: &[u8] = b"localhost:";
if chunk.starts_with(LOCALHOST) {
let port: u16 = std::str::from_utf8(&chunk[LOCALHOST.len()..])
.context("Could not determine ClickHouse port number: Invalid PID in output")?
.parse()
.context("Could not determine ClickHouse port number: Invalid PID in output")?;

// Check that it's not one of the default other TCP ports ClickHouse opens
// by default
if !IGNORED_CLICKHOUSE_PORTS.contains(&port) {
return Ok(port);
}
}
}

// Early exit, the PID matched, but we couldn't find a valid port
break;
}
const NEEDLE: &str = "<Information> Application: Listening for http://";
let mut lines = reader.lines();
while let Some(line) = lines
.next_line()
.await
.context("Failed to read line from ClickHouse log file")?
{
if let Some(needle_start) = line.find(&NEEDLE) {
let address_start = needle_start + NEEDLE.len();
let address: SocketAddr =
line[address_start..].trim().parse().context(
"Failed to parse ClickHouse socket address from log",
)?;
return Ok(address.port());
}
}
bail!("Could not determine ClickHouse port number: No valid ports found in output");
}

#[cfg(test)]
mod pfiles_tests {
use super::extract_port_from_pfiles_output;

// A known-good test output.
const GOOD_INPUT: &[u8] = br#"
25: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0
O_RDWR FD_CLOEXEC
SOCK_STREAM
SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000)
sockname: AF_INET6 ::1 port: 9004
26: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0
O_RDWR FD_CLOEXEC
SOCK_STREAM
SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000)
sockname: AF_INET 127.0.0.1 port: 8123
27: S_IFSOCK mode:0666 dev:547,0 ino:42019 uid:0 gid:0 rdev:0,0
O_RDWR FD_CLOEXEC
SOCK_STREAM
SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000)
sockname: AF_INET 127.0.0.1 port: 9000
"#;

// Only contains the ignored ClickHouse ports
const ONLY_IGNORED_CLICKHOUSE_PORTS: &[u8] = br#"
25: S_IFSOCK mode:0666 dev:547,0 ino:24056 uid:0 gid:0 rdev:0,0
O_RDWR FD_CLOEXEC
SOCK_STREAM
SO_REUSEADDR,SO_SNDBUF(49152),SO_RCVBUF(128000)
sockname: AF_INET6 ::1 port: 9004
"#;

#[test]
fn test_extract_port_from_pfiles_output() {
assert_eq!(extract_port_from_pfiles_output(&GOOD_INPUT).unwrap(), 8123);
}

#[test]
fn test_extract_port_from_lsof_output_no_valid_port() {
assert!(extract_port_from_pfiles_output(
&ONLY_IGNORED_CLICKHOUSE_PORTS
)
.is_err());
}
}

#[cfg(test)]
mod lsof_tests {
use super::extract_port_from_lsof_output;

// A known-good test output. This was generated by running the actual command while a
// ClickHouse process is running.
const GOOD_INPUT: &[u8] = b"p462\n\
f4\n\
nlocalhost:19536\n\
p7741\n\
f8\n\
nlocalhost:53091\n\
f9\n\
nlocalhost:cslistener\n\
f12\n\
nlocalhost:9004\n";

// This command has some valid `localhost:PORT` lines, but those ports are known to be other
// ports that ClickHouse opens that aren't HTTP. These are the native client and the mySQL
// client ports.
const ONLY_IGNORED_CLICKHOUSE_PORTS: &[u8] = b"p462\n\
f4\n\
nlocalhost:19536\n\
p7741\n\
f8\n\
nlocalhost:9000\n\
f9\n\
nlocalhost:cslistener\n\
f12\n\
nlocalhost:9004\n";

// A bad output that has no lines like `flocalhost:<PORT>\n` at all
const NO_FILE_NAMES: &[u8] = b"p462\n\
f4\n\
nlocalhost:19536\n\
p7741\n\
f8\n\
f9\n\
f12\n";

#[test]
fn test_extract_port_from_lsof_output() {
assert_eq!(
extract_port_from_lsof_output(7741, &GOOD_INPUT).unwrap(),
53091
);
}

#[test]
fn test_extract_port_from_lsof_output_no_valid_port() {
assert!(extract_port_from_lsof_output(
7741,
&ONLY_IGNORED_CLICKHOUSE_PORTS
)
.is_err());
}

// A test that uses the good input, but assumes we're looking for another PID.
#[test]
fn test_extract_port_from_lsof_output_incorrect_pid() {
assert!(extract_port_from_lsof_output(0, &GOOD_INPUT).is_err());
}

#[test]
fn test_extract_port_from_lsof_output_no_file_names() {
assert!(extract_port_from_lsof_output(0, &NO_FILE_NAMES).is_err());
}
bail!("Failed to discover port from ClickHouse log file");
}
5 changes: 2 additions & 3 deletions tools/ci_download_clickhouse
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ set -o pipefail
set -o xtrace
set -o errexit

SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
ARG0="$(basename ${BASH_SOURCE[0]})"

# If you change this, you must also update the md5sums below
CIDL_VERSION="v21.7"
CIDL_MD5_DARWIN="501dafb3cf0b134832d0719dfefdd851"
CIDL_MD5_LINUX=""95e3d042cec0a1c03edea01c5ee009ee
CIDL_MD5_ILLUMOS="53c9417619cb84c8ed1e1a1bc0510a89"
source "$SOURCE_DIR/clickhouse_checksums"
CIDL_ASSEMBLE_DIR="./clickhouse"

# Download from manually-populated S3 bucket for now
Expand Down
5 changes: 2 additions & 3 deletions tools/ci_download_cockroachdb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ set -o pipefail
set -o xtrace
set -o errexit

SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
ARG0="$(basename ${BASH_SOURCE[0]})"

# If you change this, you must also update the md5sums below
CIDL_VERSION="v20.2.5"
CIDL_MD5_DARWIN="4c4b84861c313884a4ef5fbbe57cb543"
CIDL_MD5_LINUX="8fe4f47413e2c6570da0e661af716f9a"
CIDL_MD5_ILLUMOS="2ee900af3bef860f627e4f8946dc6ba3"
source "$SOURCE_DIR/cockroachdb_checksums"

CIDL_ASSEMBLE_DIR="./cockroachdb"

Expand Down
3 changes: 3 additions & 0 deletions tools/clickhouse_checksums
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CIDL_MD5_DARWIN="d4e46f2fa545a24f5b668ea9f67c9a72"
CIDL_MD5_LINUX="2536950320df5b7b80f536ca8fd9f8c7"
CIDL_MD5_ILLUMOS="dc1e2f5fb17d5ace0b97c8b54a093d48"
Loading