From 71b2584ecbdce4bd3aa9328d8d562d5a7028e5c8 Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Fri, 24 Jan 2025 00:28:21 -0800 Subject: [PATCH 1/7] Add systemd configurations to strengthen OS core security Signed-off-by: Rajat Gupta --- .../src/common/systemd/opensearch.service | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/distribution/packages/src/common/systemd/opensearch.service b/distribution/packages/src/common/systemd/opensearch.service index 962dc5d2aae72..ddc0df46d2556 100644 --- a/distribution/packages/src/common/systemd/opensearch.service +++ b/distribution/packages/src/common/systemd/opensearch.service @@ -60,6 +60,101 @@ SuccessExitStatus=143 # Allow a slow startup before the systemd notifier module kicks in to extend the timeout TimeoutStartSec=75 +# Prevent modifications to the control group filesystem +ProtectControlGroups=true + +# Prevent loading or reading kernel modules +ProtectKernelModules=true + +# Prevent altering kernel tunables (sysctl parameters) +ProtectKernelTunables=true + +# Set device access policy to 'closed', allowing access only to specific devices +DevicePolicy=closed + +# Make /proc invisible to the service, enhancing isolation +ProtectProc=invisible + +# Make /usr, /boot, and /etc read-only (less restrictive than 'strict') +ProtectSystem=full + +# Prevent changes to control groups (redundant with earlier setting, can be removed) +ProtectControlGroups=yes + +# Prevent changing the execution domain +LockPersonality=yes + + +# System call filtering +# System call filterings which restricts which system calls a process can make +# @ means allowed +# ~ means not allowed +SystemCallFilter=@system-service +SystemCallFilter=~@reboot +SystemCallFilter=~@swap + +SystemCallErrorNumber=EPERM + +# Capability restrictions +# Remove the ability to block system suspends +CapabilityBoundingSet=~CAP_BLOCK_SUSPEND + +# Remove the ability to establish leases on files +CapabilityBoundingSet=~CAP_LEASE + +# Remove the ability to use system resource accounting +CapabilityBoundingSet=~CAP_SYS_PACCT + +# Remove the ability to configure TTY devices +CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG + +# Remov below capabilities: +# - CAP_SYS_ADMIN: Various system administration operations +# - CAP_SYS_PTRACE: Ability to trace processes +# - CAP_NET_ADMIN: Various network-related operations +CapabilityBoundingSet=~CAP_SYS_ADMIN ~CAP_SYS_PTRACE ~CAP_NET_ADMIN + + +# Address family restrictions +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + +# Filesystem Access + +ReadWritePaths=/var/log/opensearch +ReadWritePaths=/var/lib/opensearch +ReadWritePaths=/mnt/snapshots + +## Allow read access to system files +ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release + +## Allow read access to Linux IO stats +ReadOnlyPaths=/proc/self/mountinfo /proc/diskstats + +## Allow read access to control group stats +ReadOnlyPaths=/proc/self/cgroup /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/- +ReadOnlyPaths=/sys/fs/cgroup/cpuacct /sys/fs/cgroup/cpuacct/- /sys/fs/cgroup/memory /sys/fs/cgroup/memory/- + + +RestrictNamespaces=true + +NoNewPrivileges=true + +# Memory and execution protection +MemoryDenyWriteExecute=true # Prevent creating writable executable memory mappings +SystemCallArchitectures=native # Allow only native system calls +KeyringMode=private # Service does not share key material with other services +LockPersonality=true # Prevent changing ABI personality +RestrictSUIDSGID=true # Prevent creating SUID/SGID files +RestrictRealtime=true # Prevent acquiring realtime scheduling +ProtectHostname=true # Prevent changes to system hostname +ProtectKernelLogs=true # Prevent reading/writing kernel logs +ProtectClock=true # Prevent tampering with the system clock + +# Socket restrictions +SocketBindAllow=tcp:9200 +SocketBindAllow=tcp:9300 +SocketBindDeny=any # Deny all other socket bindings + [Install] WantedBy=multi-user.target From 40a31783d162143cd8e40db56b8e86897a173d3f Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Tue, 28 Jan 2025 04:40:08 -0800 Subject: [PATCH 2/7] Add systemd template unit file Signed-off-by: Rajat Gupta --- .../opensearch-systemd-template@.service | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 distribution/packages/src/common/systemd/opensearch-systemd-template@.service diff --git a/distribution/packages/src/common/systemd/opensearch-systemd-template@.service b/distribution/packages/src/common/systemd/opensearch-systemd-template@.service new file mode 100644 index 0000000000000..910c1603252cf --- /dev/null +++ b/distribution/packages/src/common/systemd/opensearch-systemd-template@.service @@ -0,0 +1,161 @@ +[Unit] +Description=OpenSearch +Documentation=https://www.elastic.co +Wants=network-online.target +After=network-online.target + +[Service] +Type=notify +RuntimeDirectory=opensearch +PrivateTmp=true +Environment=OPENSEARCH_HOME=/usr/share/opensearch +Environment=OPENSEARCH_PATH_CONF=${path.conf} +Environment=PID_DIR=/var/run/opensearch +Environment=OPENSEARCH_SD_NOTIFY=true +EnvironmentFile=-${path.env} + +WorkingDirectory=/usr/share/opensearch + +User=opensearch +Group=opensearch + +ExecStart=/usr/share/opensearch/bin/systemd-entrypoint -p ${PID_DIR}/opensearch.pid --quiet + +# StandardOutput is configured to redirect to journalctl since +# some error messages may be logged in standard output before +# opensearch logging system is initialized. OpenSearch +# stores its logs in /var/log/opensearch and does not use +# journalctl by default. If you also want to enable journalctl +# logging, you can simply remove the "quiet" option from ExecStart. +StandardOutput=journal +StandardError=inherit +SyslogIdentifier=opensearch + +# Specifies the maximum file descriptor number that can be opened by this process +LimitNOFILE=65535 + +# Specifies the maximum number of processes +LimitNPROC=4096 + +# Specifies the maximum size of virtual memory +LimitAS=infinity + +# Specifies the maximum file size +LimitFSIZE=infinity + +# Disable timeout logic and wait until process is stopped +TimeoutStopSec=0 + +# SIGTERM signal is used to stop the Java process +KillSignal=SIGTERM + +# Send the signal only to the JVM rather than its control group +KillMode=process + +# Java process is never killed +SendSIGKILL=no + +# When a JVM receives a SIGTERM signal it exits with code 143 +SuccessExitStatus=143 + +# Allow a slow startup before the systemd notifier module kicks in to extend the timeout +TimeoutStartSec=75 + +# Prevent modifications to the control group filesystem +ProtectControlGroups=true + +# Prevent loading or reading kernel modules +ProtectKernelModules=true + +# Prevent altering kernel tunables (sysctl parameters) +ProtectKernelTunables=true + +# Set device access policy to 'closed', allowing access only to specific devices +DevicePolicy=closed + +# Make /proc invisible to the service, enhancing isolation +ProtectProc=invisible + +# Make /usr, /boot, and /etc read-only (less restrictive than 'strict') +ProtectSystem=full + +# Prevent changes to control groups (redundant with earlier setting, can be removed) +ProtectControlGroups=yes + +# Prevent changing the execution domain +LockPersonality=yes + + +# System call filtering +# System call filterings which restricts which system calls a process can make +# @ means allowed +# ~ means not allowed +SystemCallFilter=@system-service +SystemCallFilter=~@reboot +SystemCallFilter=~@swap + +SystemCallErrorNumber=EPERM + +# Capability restrictions +# Remove the ability to block system suspends +CapabilityBoundingSet=~CAP_BLOCK_SUSPEND + +# Remove the ability to establish leases on files +CapabilityBoundingSet=~CAP_LEASE + +# Remove the ability to use system resource accounting +CapabilityBoundingSet=~CAP_SYS_PACCT + +# Remove the ability to configure TTY devices +CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG + +# Remov below capabilities: +# - CAP_SYS_ADMIN: Various system administration operations +# - CAP_SYS_PTRACE: Ability to trace processes +# - CAP_NET_ADMIN: Various network-related operations +CapabilityBoundingSet=~CAP_SYS_ADMIN ~CAP_SYS_PTRACE ~CAP_NET_ADMIN + + +# Address family restrictions +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + +# Filesystem Access + +ReadWritePaths=/var/log/opensearch +ReadWritePaths=/var/lib/opensearch +ReadWritePaths=/mnt/snapshots + +## Allow read access to system files +ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release + +## Allow read access to Linux IO stats +ReadOnlyPaths=/proc/self/mountinfo /proc/diskstats + +## Allow read access to control group stats +ReadOnlyPaths=/proc/self/cgroup /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/- +ReadOnlyPaths=/sys/fs/cgroup/cpuacct /sys/fs/cgroup/cpuacct/- /sys/fs/cgroup/memory /sys/fs/cgroup/memory/- + + +RestrictNamespaces=true + +NoNewPrivileges=true + +# Memory and execution protection +MemoryDenyWriteExecute=true # Prevent creating writable executable memory mappings +SystemCallArchitectures=native # Allow only native system calls +KeyringMode=private # Service does not share key material with other services +LockPersonality=true # Prevent changing ABI personality +RestrictSUIDSGID=true # Prevent creating SUID/SGID files +RestrictRealtime=true # Prevent acquiring realtime scheduling +ProtectHostname=true # Prevent changes to system hostname +ProtectKernelLogs=true # Prevent reading/writing kernel logs +ProtectClock=true # Prevent tampering with the system clock + +# Socket restrictions +SocketBindAllow=tcp:%1-%2 +SocketBindDeny=any # Deny all other socket bindings + +[Install] +WantedBy=multi-user.target + +# Built for ${project.name}-${project.version} (${project.name}) From 010445662a00b90d535f3ff63a2fe06d07b9794c Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Tue, 28 Jan 2025 04:55:42 -0800 Subject: [PATCH 3/7] Update CHANGELOG-3.0.md Signed-off-by: Rajat Gupta --- CHANGELOG-3.0.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index 6ca1543d85c7d..6f308ee84ee56 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) - Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625)) - Views, simplify data access and manipulation by providing a virtual layer over one or more indices ([#11957](https://github.com/opensearch-project/OpenSearch/pull/11957)) +- Add systemd configurations to strengthen OS core security ([#17107](https://github.com/opensearch-project/OpenSearch/pull/17107)) ### Dependencies - Update Apache Lucene to 10.1.0 ([#16366](https://github.com/opensearch-project/OpenSearch/pull/16366)) From c694f75d6c5f9784f4ecc0a31cced9582a5b6dba Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Tue, 28 Jan 2025 08:23:15 -0800 Subject: [PATCH 4/7] Revert "Add systemd configurations to strengthen OS core security" This reverts commit 71b2584ecbdce4bd3aa9328d8d562d5a7028e5c8. Signed-off-by: Rajat Gupta --- .../src/common/systemd/opensearch.service | 95 ------------------- 1 file changed, 95 deletions(-) diff --git a/distribution/packages/src/common/systemd/opensearch.service b/distribution/packages/src/common/systemd/opensearch.service index ddc0df46d2556..962dc5d2aae72 100644 --- a/distribution/packages/src/common/systemd/opensearch.service +++ b/distribution/packages/src/common/systemd/opensearch.service @@ -60,101 +60,6 @@ SuccessExitStatus=143 # Allow a slow startup before the systemd notifier module kicks in to extend the timeout TimeoutStartSec=75 -# Prevent modifications to the control group filesystem -ProtectControlGroups=true - -# Prevent loading or reading kernel modules -ProtectKernelModules=true - -# Prevent altering kernel tunables (sysctl parameters) -ProtectKernelTunables=true - -# Set device access policy to 'closed', allowing access only to specific devices -DevicePolicy=closed - -# Make /proc invisible to the service, enhancing isolation -ProtectProc=invisible - -# Make /usr, /boot, and /etc read-only (less restrictive than 'strict') -ProtectSystem=full - -# Prevent changes to control groups (redundant with earlier setting, can be removed) -ProtectControlGroups=yes - -# Prevent changing the execution domain -LockPersonality=yes - - -# System call filtering -# System call filterings which restricts which system calls a process can make -# @ means allowed -# ~ means not allowed -SystemCallFilter=@system-service -SystemCallFilter=~@reboot -SystemCallFilter=~@swap - -SystemCallErrorNumber=EPERM - -# Capability restrictions -# Remove the ability to block system suspends -CapabilityBoundingSet=~CAP_BLOCK_SUSPEND - -# Remove the ability to establish leases on files -CapabilityBoundingSet=~CAP_LEASE - -# Remove the ability to use system resource accounting -CapabilityBoundingSet=~CAP_SYS_PACCT - -# Remove the ability to configure TTY devices -CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG - -# Remov below capabilities: -# - CAP_SYS_ADMIN: Various system administration operations -# - CAP_SYS_PTRACE: Ability to trace processes -# - CAP_NET_ADMIN: Various network-related operations -CapabilityBoundingSet=~CAP_SYS_ADMIN ~CAP_SYS_PTRACE ~CAP_NET_ADMIN - - -# Address family restrictions -RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX - -# Filesystem Access - -ReadWritePaths=/var/log/opensearch -ReadWritePaths=/var/lib/opensearch -ReadWritePaths=/mnt/snapshots - -## Allow read access to system files -ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release - -## Allow read access to Linux IO stats -ReadOnlyPaths=/proc/self/mountinfo /proc/diskstats - -## Allow read access to control group stats -ReadOnlyPaths=/proc/self/cgroup /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/- -ReadOnlyPaths=/sys/fs/cgroup/cpuacct /sys/fs/cgroup/cpuacct/- /sys/fs/cgroup/memory /sys/fs/cgroup/memory/- - - -RestrictNamespaces=true - -NoNewPrivileges=true - -# Memory and execution protection -MemoryDenyWriteExecute=true # Prevent creating writable executable memory mappings -SystemCallArchitectures=native # Allow only native system calls -KeyringMode=private # Service does not share key material with other services -LockPersonality=true # Prevent changing ABI personality -RestrictSUIDSGID=true # Prevent creating SUID/SGID files -RestrictRealtime=true # Prevent acquiring realtime scheduling -ProtectHostname=true # Prevent changes to system hostname -ProtectKernelLogs=true # Prevent reading/writing kernel logs -ProtectClock=true # Prevent tampering with the system clock - -# Socket restrictions -SocketBindAllow=tcp:9200 -SocketBindAllow=tcp:9300 -SocketBindDeny=any # Deny all other socket bindings - [Install] WantedBy=multi-user.target From d784b96923dbb289ce35896ac21bddff4e0266e6 Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Tue, 28 Jan 2025 23:06:40 -0800 Subject: [PATCH 5/7] Remove SocketBind Directives and template unit file Signed-off-by: Rajat Gupta --- .../opensearch-systemd-template@.service | 161 ------------------ .../src/common/systemd/opensearch.service | 91 ++++++++++ 2 files changed, 91 insertions(+), 161 deletions(-) delete mode 100644 distribution/packages/src/common/systemd/opensearch-systemd-template@.service diff --git a/distribution/packages/src/common/systemd/opensearch-systemd-template@.service b/distribution/packages/src/common/systemd/opensearch-systemd-template@.service deleted file mode 100644 index 910c1603252cf..0000000000000 --- a/distribution/packages/src/common/systemd/opensearch-systemd-template@.service +++ /dev/null @@ -1,161 +0,0 @@ -[Unit] -Description=OpenSearch -Documentation=https://www.elastic.co -Wants=network-online.target -After=network-online.target - -[Service] -Type=notify -RuntimeDirectory=opensearch -PrivateTmp=true -Environment=OPENSEARCH_HOME=/usr/share/opensearch -Environment=OPENSEARCH_PATH_CONF=${path.conf} -Environment=PID_DIR=/var/run/opensearch -Environment=OPENSEARCH_SD_NOTIFY=true -EnvironmentFile=-${path.env} - -WorkingDirectory=/usr/share/opensearch - -User=opensearch -Group=opensearch - -ExecStart=/usr/share/opensearch/bin/systemd-entrypoint -p ${PID_DIR}/opensearch.pid --quiet - -# StandardOutput is configured to redirect to journalctl since -# some error messages may be logged in standard output before -# opensearch logging system is initialized. OpenSearch -# stores its logs in /var/log/opensearch and does not use -# journalctl by default. If you also want to enable journalctl -# logging, you can simply remove the "quiet" option from ExecStart. -StandardOutput=journal -StandardError=inherit -SyslogIdentifier=opensearch - -# Specifies the maximum file descriptor number that can be opened by this process -LimitNOFILE=65535 - -# Specifies the maximum number of processes -LimitNPROC=4096 - -# Specifies the maximum size of virtual memory -LimitAS=infinity - -# Specifies the maximum file size -LimitFSIZE=infinity - -# Disable timeout logic and wait until process is stopped -TimeoutStopSec=0 - -# SIGTERM signal is used to stop the Java process -KillSignal=SIGTERM - -# Send the signal only to the JVM rather than its control group -KillMode=process - -# Java process is never killed -SendSIGKILL=no - -# When a JVM receives a SIGTERM signal it exits with code 143 -SuccessExitStatus=143 - -# Allow a slow startup before the systemd notifier module kicks in to extend the timeout -TimeoutStartSec=75 - -# Prevent modifications to the control group filesystem -ProtectControlGroups=true - -# Prevent loading or reading kernel modules -ProtectKernelModules=true - -# Prevent altering kernel tunables (sysctl parameters) -ProtectKernelTunables=true - -# Set device access policy to 'closed', allowing access only to specific devices -DevicePolicy=closed - -# Make /proc invisible to the service, enhancing isolation -ProtectProc=invisible - -# Make /usr, /boot, and /etc read-only (less restrictive than 'strict') -ProtectSystem=full - -# Prevent changes to control groups (redundant with earlier setting, can be removed) -ProtectControlGroups=yes - -# Prevent changing the execution domain -LockPersonality=yes - - -# System call filtering -# System call filterings which restricts which system calls a process can make -# @ means allowed -# ~ means not allowed -SystemCallFilter=@system-service -SystemCallFilter=~@reboot -SystemCallFilter=~@swap - -SystemCallErrorNumber=EPERM - -# Capability restrictions -# Remove the ability to block system suspends -CapabilityBoundingSet=~CAP_BLOCK_SUSPEND - -# Remove the ability to establish leases on files -CapabilityBoundingSet=~CAP_LEASE - -# Remove the ability to use system resource accounting -CapabilityBoundingSet=~CAP_SYS_PACCT - -# Remove the ability to configure TTY devices -CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG - -# Remov below capabilities: -# - CAP_SYS_ADMIN: Various system administration operations -# - CAP_SYS_PTRACE: Ability to trace processes -# - CAP_NET_ADMIN: Various network-related operations -CapabilityBoundingSet=~CAP_SYS_ADMIN ~CAP_SYS_PTRACE ~CAP_NET_ADMIN - - -# Address family restrictions -RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX - -# Filesystem Access - -ReadWritePaths=/var/log/opensearch -ReadWritePaths=/var/lib/opensearch -ReadWritePaths=/mnt/snapshots - -## Allow read access to system files -ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release - -## Allow read access to Linux IO stats -ReadOnlyPaths=/proc/self/mountinfo /proc/diskstats - -## Allow read access to control group stats -ReadOnlyPaths=/proc/self/cgroup /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/- -ReadOnlyPaths=/sys/fs/cgroup/cpuacct /sys/fs/cgroup/cpuacct/- /sys/fs/cgroup/memory /sys/fs/cgroup/memory/- - - -RestrictNamespaces=true - -NoNewPrivileges=true - -# Memory and execution protection -MemoryDenyWriteExecute=true # Prevent creating writable executable memory mappings -SystemCallArchitectures=native # Allow only native system calls -KeyringMode=private # Service does not share key material with other services -LockPersonality=true # Prevent changing ABI personality -RestrictSUIDSGID=true # Prevent creating SUID/SGID files -RestrictRealtime=true # Prevent acquiring realtime scheduling -ProtectHostname=true # Prevent changes to system hostname -ProtectKernelLogs=true # Prevent reading/writing kernel logs -ProtectClock=true # Prevent tampering with the system clock - -# Socket restrictions -SocketBindAllow=tcp:%1-%2 -SocketBindDeny=any # Deny all other socket bindings - -[Install] -WantedBy=multi-user.target - -# Built for ${project.name}-${project.version} (${project.name}) diff --git a/distribution/packages/src/common/systemd/opensearch.service b/distribution/packages/src/common/systemd/opensearch.service index 962dc5d2aae72..c0bbfebcef0a2 100644 --- a/distribution/packages/src/common/systemd/opensearch.service +++ b/distribution/packages/src/common/systemd/opensearch.service @@ -29,6 +29,7 @@ ExecStart=/usr/share/opensearch/bin/systemd-entrypoint -p ${PID_DIR}/opensearch. # logging, you can simply remove the "quiet" option from ExecStart. StandardOutput=journal StandardError=inherit +SyslogIdentifier=opensearch # Specifies the maximum file descriptor number that can be opened by this process LimitNOFILE=65535 @@ -60,6 +61,96 @@ SuccessExitStatus=143 # Allow a slow startup before the systemd notifier module kicks in to extend the timeout TimeoutStartSec=75 +# Prevent modifications to the control group filesystem +ProtectControlGroups=true + +# Prevent loading or reading kernel modules +ProtectKernelModules=true + +# Prevent altering kernel tunables (sysctl parameters) +ProtectKernelTunables=true + +# Set device access policy to 'closed', allowing access only to specific devices +DevicePolicy=closed + +# Make /proc invisible to the service, enhancing isolation +ProtectProc=invisible + +# Make /usr, /boot, and /etc read-only (less restrictive than 'strict') +ProtectSystem=full + +# Prevent changes to control groups (redundant with earlier setting, can be removed) +ProtectControlGroups=yes + +# Prevent changing the execution domain +LockPersonality=yes + + +# System call filtering +# System call filterings which restricts which system calls a process can make +# @ means allowed +# ~ means not allowed +SystemCallFilter=@system-service +SystemCallFilter=~@reboot +SystemCallFilter=~@swap + +SystemCallErrorNumber=EPERM + +# Capability restrictions +# Remove the ability to block system suspends +CapabilityBoundingSet=~CAP_BLOCK_SUSPEND + +# Remove the ability to establish leases on files +CapabilityBoundingSet=~CAP_LEASE + +# Remove the ability to use system resource accounting +CapabilityBoundingSet=~CAP_SYS_PACCT + +# Remove the ability to configure TTY devices +CapabilityBoundingSet=~CAP_SYS_TTY_CONFIG + +# Remov below capabilities: +# - CAP_SYS_ADMIN: Various system administration operations +# - CAP_SYS_PTRACE: Ability to trace processes +# - CAP_NET_ADMIN: Various network-related operations +CapabilityBoundingSet=~CAP_SYS_ADMIN ~CAP_SYS_PTRACE ~CAP_NET_ADMIN + + +# Address family restrictions +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX + +# Filesystem Access + +ReadWritePaths=/var/log/opensearch +ReadWritePaths=/var/lib/opensearch +ReadWritePaths=/mnt/snapshots + +## Allow read access to system files +ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release + +## Allow read access to Linux IO stats +ReadOnlyPaths=/proc/self/mountinfo /proc/diskstats + +## Allow read access to control group stats +ReadOnlyPaths=/proc/self/cgroup /sys/fs/cgroup/cpu /sys/fs/cgroup/cpu/- +ReadOnlyPaths=/sys/fs/cgroup/cpuacct /sys/fs/cgroup/cpuacct/- /sys/fs/cgroup/memory /sys/fs/cgroup/memory/- + + +RestrictNamespaces=true + +NoNewPrivileges=true + +# Memory and execution protection +MemoryDenyWriteExecute=true # Prevent creating writable executable memory mappings +SystemCallArchitectures=native # Allow only native system calls +KeyringMode=private # Service does not share key material with other services +LockPersonality=true # Prevent changing ABI personality +RestrictSUIDSGID=true # Prevent creating SUID/SGID files +RestrictRealtime=true # Prevent acquiring realtime scheduling +ProtectHostname=true # Prevent changes to system hostname +ProtectKernelLogs=true # Prevent reading/writing kernel logs +ProtectClock=true # Prevent tampering with the system clock + [Install] WantedBy=multi-user.target From 890612e4d8702d20ea5dfa18509b34057adcbc22 Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Wed, 5 Feb 2025 23:49:22 -0800 Subject: [PATCH 6/7] Minor fixes Signed-off-by: Rajat Gupta --- distribution/packages/src/common/systemd/opensearch.service | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/distribution/packages/src/common/systemd/opensearch.service b/distribution/packages/src/common/systemd/opensearch.service index c0bbfebcef0a2..a1c55af1644e3 100644 --- a/distribution/packages/src/common/systemd/opensearch.service +++ b/distribution/packages/src/common/systemd/opensearch.service @@ -123,7 +123,8 @@ RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX ReadWritePaths=/var/log/opensearch ReadWritePaths=/var/lib/opensearch -ReadWritePaths=/mnt/snapshots +ReadWritePaths=/etc/opensearch +ReadWritePaths=-/mnt/snapshots ## Allow read access to system files ReadOnlyPaths=/etc/os-release /usr/lib/os-release /etc/system-release From facaca3531141f2547deff798f74686a5366bdad Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Thu, 6 Feb 2025 00:05:12 -0800 Subject: [PATCH 7/7] Add integration tests for systemd Signed-off-by: Rajat Gupta --- qa/systemd-test/build.gradle | 26 ++ qa/systemd-test/docker-compose.yml | 62 +++++ .../opensearch/systemdinteg/SystemdIT.java | 237 ++++++++++++++++++ 3 files changed, 325 insertions(+) create mode 100644 qa/systemd-test/build.gradle create mode 100644 qa/systemd-test/docker-compose.yml create mode 100644 qa/systemd-test/src/test/java/org/opensearch/systemdinteg/SystemdIT.java diff --git a/qa/systemd-test/build.gradle b/qa/systemd-test/build.gradle new file mode 100644 index 0000000000000..8a29941b094d1 --- /dev/null +++ b/qa/systemd-test/build.gradle @@ -0,0 +1,26 @@ +import org.opensearch.gradle.Architecture +import org.opensearch.gradle.VersionProperties +import org.opensearch.gradle.testfixtures.TestFixturesPlugin + +apply plugin: 'opensearch.standalone-rest-test' +apply plugin: 'opensearch.test.fixtures' + +testFixtures.useFixture() + +dockerCompose { + useComposeFiles = ['docker-compose.yml'] +} + + +tasks.register("integTest", Test) { + outputs.doNotCacheIf('Build cache is disabled for Docker tests') { true } + maxParallelForks = '1' + include '**/*IT.class' +} + +tasks.named("check").configure { dependsOn "integTest" } + +tasks.named("integTest").configure { + dependsOn "composeUp" + finalizedBy "composeDown" +} diff --git a/qa/systemd-test/docker-compose.yml b/qa/systemd-test/docker-compose.yml new file mode 100644 index 0000000000000..b5cea89d23f20 --- /dev/null +++ b/qa/systemd-test/docker-compose.yml @@ -0,0 +1,62 @@ +services: + # self-contained systemd example: run 'docker-compose up' to see it + amazonlinux: + image: opensearch-systemd-test + container_name: opensearch-systemd-test-container + build: + dockerfile_inline: | + FROM amazonlinux:2023 + # install systemd + RUN dnf -y install systemd && dnf clean all + # in practice, you'd COPY in the RPM you want to test right here + RUN dnf -y install https://artifacts.opensearch.org/releases/bundle/opensearch/2.18.0/opensearch-2.18.0-linux-x64.rpm && dnf clean all + # add a test-user + RUN useradd -ms /bin/bash testuser + # no colors + ENV SYSTEMD_COLORS=0 + # no escapes + ENV SYSTEMD_URLIFY=0 + # explicitly specify docker virtualization + ENV container=docker + # for debugging systemd issues in container, you want this, but it is very loud! + # ENV SYSTEMD_LOG_LEVEL=debug + # plumb journald logs to stdout + COPY <> /etc/opensearch/opensearch.yml + RUN echo "network.host: 0.0.0.0" >> /etc/opensearch/opensearch.yml + RUN echo "discovery.type: single-node" >> /etc/opensearch/opensearch.yml + # provide /dev/console for journal logs to go to stdout + tty: true + # capabilities to allow systemd to sandbox + cap_add: + # https://systemd.io/CONTAINER_INTERFACE/#what-you-shouldnt-do bullet 1 + - SYS_ADMIN + # https://systemd.io/CONTAINER_INTERFACE/#what-you-shouldnt-do bullet 2 + - MKNOD + # evil, but best you can do on docker? podman is better here. + cgroup: host + volumes: + - /sys/fs/cgroup:/sys/fs/cgroup + - ../../distribution/packages/src/common/systemd/opensearch.service:/etc/systemd/system/opensearch.service + # tmpfs mounts for systemd + tmpfs: + - /run + - /run/lock + # health check for opensearch + ports: + - "9200:9200" + - "9300:9300" + privileged: true + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9200/_cluster/health"] + start_period: 15s \ No newline at end of file diff --git a/qa/systemd-test/src/test/java/org/opensearch/systemdinteg/SystemdIT.java b/qa/systemd-test/src/test/java/org/opensearch/systemdinteg/SystemdIT.java new file mode 100644 index 0000000000000..395737d6bee31 --- /dev/null +++ b/qa/systemd-test/src/test/java/org/opensearch/systemdinteg/SystemdIT.java @@ -0,0 +1,237 @@ +/* +* SPDX-License-Identifier: Apache-2.0 +* +* The OpenSearch Contributors require contributions made to +* this file be licensed under the Apache-2.0 license or a +* compatible open source license. +*/ + +/* +* Licensed to Elasticsearch under one or more contributor +* license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright +* ownership. Elasticsearch licenses this file to you under +* the Apache License, Version 2.0 (the "License"); you may +* not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ +/* +* Modifications Copyright OpenSearch Contributors. See +* GitHub history for details. +*/ + +package org.opensearch.systemdinteg; + +import org.apache.hc.core5.http.HttpHeaders; +import org.apache.hc.core5.http.HttpHost; +import org.apache.hc.core5.http.HttpStatus; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.net.HttpURLConnection; +import java.net.URL; +import static org.junit.Assert.*; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + + +public class SystemdIT { + private static final String OPENSEARCH_URL = "http://localhost:9200"; // OpenSearch URL (port 9200) + private static String containerId; + private static String opensearchPid; + private static final String CONTAINER_NAME = "opensearch-systemd-test-container"; + + @BeforeClass + public static void setup() throws IOException, InterruptedException { + containerId = getContainerId(); + + String status = executeCommand("docker exec " + containerId + " systemctl status opensearch", "Failed to check OpenSearch status"); + + opensearchPid = getOpenSearchPid(); + + if (opensearchPid.isEmpty()) { + throw new RuntimeException("Failed to find OpenSearch process ID"); + } + } + + private static String getContainerId() throws IOException, InterruptedException { + return executeCommand("docker ps -qf name=" + CONTAINER_NAME, "OpenSearch container '" + CONTAINER_NAME + "' is not running"); + } + + private static String getOpenSearchPid() throws IOException, InterruptedException { + String command = "docker exec " + containerId + " systemctl show --property=MainPID opensearch"; + String output = executeCommand(command, "Failed to get OpenSearch PID"); + return output.replace("MainPID=", "").trim(); + } + + private boolean checkPathExists(String path) throws IOException, InterruptedException { + String command = String.format("docker exec %s test -e %s && echo true || echo false", containerId, path); + return Boolean.parseBoolean(executeCommand(command, "Failed to check path existence")); + } + + private boolean checkPathReadable(String path) throws IOException, InterruptedException { + String command = String.format("docker exec %s su opensearch -s /bin/sh -c 'test -r %s && echo true || echo false'", containerId, path); + return Boolean.parseBoolean(executeCommand(command, "Failed to check read permission")); + } + + private boolean checkPathWritable(String path) throws IOException, InterruptedException { + String command = String.format("docker exec %s su opensearch -s /bin/sh -c 'test -w %s && echo true || echo false'", containerId, path); + return Boolean.parseBoolean(executeCommand(command, "Failed to check write permission")); + } + + private String getPathOwnership(String path) throws IOException, InterruptedException { + String command = String.format("docker exec %s stat -c '%%U:%%G' %s", containerId, path); + return executeCommand(command, "Failed to get path ownership"); + } + + private static String executeCommand(String command, String errorMessage) throws IOException, InterruptedException { + Process process = Runtime.getRuntime().exec(new String[]{"bash", "-c", command}); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + StringBuilder output = new StringBuilder(); + String line; + while ((line = reader.readLine()) != null) { + output.append(line).append("\n"); + } + if (process.waitFor() != 0) { + throw new RuntimeException(errorMessage); + } + return output.toString().trim(); + } + } + + @Test + public void testClusterHealth() throws IOException { + HttpURLConnection healthCheck = (HttpURLConnection) new URL(OPENSEARCH_URL + "/_cluster/health").openConnection(); + healthCheck.setRequestMethod("GET"); + int healthResponseCode = healthCheck.getResponseCode(); + assertTrue(healthResponseCode == HttpURLConnection.HTTP_OK); + } + + @Test + public void testMaxProcesses() throws IOException, InterruptedException { + String limits = executeCommand("docker exec " + containerId + " cat /proc/" + opensearchPid + "/limits", "Failed to read process limits"); + assertTrue("Max processes limit should be 4096 or unlimited", + limits.contains("Max processes 4096 4096") || + limits.contains("Max processes unlimited unlimited")); + } + + @Test + public void testFileDescriptorLimit() throws IOException, InterruptedException { + String limits = executeCommand("docker exec " + containerId + " cat /proc/" + opensearchPid + "/limits", "Failed to read process limits"); + assertTrue("File descriptor limit should be at least 65535", + limits.contains("Max open files 65535 65535") || + limits.contains("Max open files unlimited unlimited")); + } + + + @Test + public void testSystemCallFilter() throws IOException, InterruptedException { + // Check if Seccomp is enabled + String seccomp = executeCommand("docker exec " + containerId + " grep Seccomp /proc/" + opensearchPid + "/status", "Failed to read Seccomp status"); + assertFalse("Seccomp should be enabled", seccomp.contains("0")); + + // Test specific system calls that should be blocked + String rebootResult = executeCommand("docker exec " + containerId + " su opensearch -c 'kill -s SIGHUP 1' 2>&1 || echo 'Operation not permitted'", "Failed to test reboot system call"); + assertTrue("Reboot system call should be blocked", rebootResult.contains("Operation not permitted")); + + String swapResult = executeCommand("docker exec " + containerId + " su opensearch -c 'swapon -a' 2>&1 || echo 'Operation not permitted'", "Failed to test swap system call"); + assertTrue("Swap system call should be blocked", swapResult.contains("Operation not permitted")); + } + + + @Test + public void testReadOnlyPaths() throws IOException, InterruptedException { + String[] readOnlyPaths = { + "/etc/os-release", "/usr/lib/os-release", "/etc/system-release", + "/proc/self/mountinfo", "/proc/diskstats", + "/proc/self/cgroup", "/sys/fs/cgroup/cpu", "/sys/fs/cgroup/cpu/-", + "/sys/fs/cgroup/cpuacct", "/sys/fs/cgroup/cpuacct/-", + "/sys/fs/cgroup/memory", "/sys/fs/cgroup/memory/-" + }; + + for (String path : readOnlyPaths) { + if (checkPathExists(path)) { + assertTrue("Path should be readable: " + path, checkPathReadable(path)); + assertFalse("Path should not be writable: " + path, checkPathWritable(path)); + } + } + } + + @Test + public void testReadWritePaths() throws IOException, InterruptedException { + String[] readWritePaths = {"/var/log/opensearch", "/var/lib/opensearch"}; + for (String path : readWritePaths) { + assertTrue("Path should exist: " + path, checkPathExists(path)); + assertTrue("Path should be readable: " + path, checkPathReadable(path)); + assertTrue("Path should be writable: " + path, checkPathWritable(path)); + assertEquals("Path should be owned by opensearch:opensearch", "opensearch:opensearch", getPathOwnership(path)); + } + } + + @Test + public void testProcessExit() throws IOException, InterruptedException { + + String scriptContent = "#!/bin/sh\n" + + "if [ $# -ne 1 ]; then\n" + + " echo \"Usage: $0 \"\n" + + " exit 1\n" + + "fi\n" + + "if kill -15 $1 2>/dev/null; then\n" + + " echo \"SIGTERM signal sent to process $1\"\n" + + "else\n" + + " echo \"Failed to send SIGTERM to process $1\"\n" + + "fi\n" + + "sleep 2\n" + + "if kill -0 $1 2>/dev/null; then\n" + + " echo \"Process $1 is still running\"\n" + + "else\n" + + " echo \"Process $1 has terminated\"\n" + + "fi"; + + String[] command = { + "docker", + "exec", + "-u", "testuser", + containerId, + "sh", + "-c", + "echo '" + scriptContent.replace("'", "'\"'\"'") + "' > /tmp/terminate.sh && chmod +x /tmp/terminate.sh && /tmp/terminate.sh " + opensearchPid + }; + + ProcessBuilder processBuilder = new ProcessBuilder(command); + Process process = processBuilder.start(); + + // Wait a moment for any potential termination to take effect + Thread.sleep(2000); + + // Check if the OpenSearch process is still running + String processCheck = executeCommand( + "docker exec " + containerId + " kill -0 " + opensearchPid + " 2>/dev/null && echo 'Running' || echo 'Not running'", + "Failed to check process status" + ); + + // Verify the OpenSearch service status + String serviceStatus = executeCommand( + "docker exec " + containerId + " systemctl is-active opensearch", + "Failed to check OpenSearch service status" + ); + + assertTrue("OpenSearch process should still be running", processCheck.contains("Running")); + assertEquals("OpenSearch service should be active", "active", serviceStatus.trim()); + } + +}