Skip to content

Commit

Permalink
Merge branch 'fix_dev_sgx' into add_meituan_hdfs_to_sgx
Browse files Browse the repository at this point in the history
  • Loading branch information
gejielun authored Mar 20, 2024
2 parents b0ef68b + 561e259 commit 9260efa
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 36 deletions.
57 changes: 45 additions & 12 deletions deploy/scripts/sgx/enclave_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

EXEC_DIR=/app/exec_dir

function get_env() {
gramine-sgx-get-token -s python.sig -o /dev/null | grep $1 | awk -F ":" '{print $2}' | xargs
}

function make_custom_env() {
cd $EXEC_DIR

export DEBUG=0
export CUDA_VISIBLE_DEVICES=""
export DNNL_VERBOSE=0
Expand Down Expand Up @@ -59,17 +63,20 @@ function make_custom_env() {
# need meituan's
jq --arg mr_enclave "$PEER_MR_ENCLAVE" --arg mr_signer "$PEER_MR_SIGNER" \
'.sgx_mrs[0].mr_enclave = $mr_enclave | .sgx_mrs[0].mr_signer = $mr_signer' \
$GRPC_PATH/examples/dynamic_config.json > ./dynamic_config.json
$GRPC_PATH/examples/dynamic_config.json > $EXEC_DIR/dynamic_config.json

cd -
}

function generate_token() {
cd /gramine/CI-Examples/generate-token/
./generate.sh
mkdir -p /app/sgx/token/
cp python.sig /app/sgx/token/
cp python.manifest.sgx /app/sgx/token/
cp python.token /app/sgx/token/
cp python.manifest /app/sgx/token/
mkdir -p $EXEC_DIR
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py $EXEC_DIR
cp python.sig $EXEC_DIR
cp python.manifest.sgx $EXEC_DIR
cp python.token $EXEC_DIR
cp python.manifest $EXEC_DIR
cd -
}

Expand All @@ -81,18 +88,44 @@ elif [ -n "$PCCS_URL" ]; then
sed -i "s|PCCS_URL=[^ ]*|PCCS_URL=$PCCS_URL|" /etc/sgx_default_qcnl.conf
fi

TEMPLATE_PATH="/gramine/CI-Examples/generate-token/python.manifest.template"
if [ -n "$GRAMINE_LOG_LEVEL" ]; then
FILE="/gramine/CI-Examples/generate-token/python.manifest.template"
sed -i "/loader.log_level/ s/\"[^\"]*\"/\"$GRAMINE_LOG_LEVEL\"/" "$FILE"
# 检查sed命令是否成功执行
sed -i "/loader.log_level/ s/\"[^\"]*\"/\"$GRAMINE_LOG_LEVEL\"/" "$TEMPLATE_PATH"
if [ $? -eq 0 ]; then
echo "Log level changed to $GRAMINE_LOG_LEVEL in $FILE"
echo "Log level changed to $GRAMINE_LOG_LEVEL in $TEMPLATE_PATH"
else
echo "Failed to change log level in $FILE"
echo "Failed to change log level in $TEMPLATE_PATH"
fi
fi

if [ -n "$GRAMINE_ENCLAVE_SIZE" ]; then
sed -i "/sgx.enclave_size/ s/\"[^\"]*\"/\"$GRAMINE_ENCLAVE_SIZE\"/" "$TEMPLATE_PATH"
if [ $? -eq 0 ]; then
echo "Enclave size changed to $GRAMINE_ENCLAVE_SIZE in $TEMPLATE_PATH"
else
echo "Failed to change enclave size in $TEMPLATE_PATH"
fi
fi

if [ -n "$GRAMINE_THREAD_NUM" ]; then
sed -i "s/sgx.thread_num = [0-9]\+/sgx.thread_num = $GRAMINE_THREAD_NUM/" "$TEMPLATE_PATH"
if [ $? -eq 0 ]; then
echo "Thread number changed to $GRAMINE_THREAD_NUM in $TEMPLATE_PATH"
else
echo "Failed to change thread number in $TEMPLATE_PATH"
fi
fi

if [ -n "$GRAMINE_STACK_SIZE" ]; then
sed -i "/sys.stack.size/ s/\"[^\"]*\"/\"$GRAMINE_STACK_SIZE\"/" "$TEMPLATE_PATH"
if [ $? -eq 0 ]; then
echo "Stack size changed to $GRAMINE_STACK_SIZE in $TEMPLATE_PATH"
else
echo "Failed to change stack size in $TEMPLATE_PATH"
fi
fi

sed -i 's/USE_SECURE_CERT=TRUE/USE_SECURE_CERT=FALSE/' /etc/sgx_default_qcnl.conf
mkdir -p /data

generate_token
generate_token
1 change: 0 additions & 1 deletion deploy/scripts/sgx/run_data_join_master.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ kvstore_type=$(normalize_env_to_args '--kvstore_type' $KVSTORE_TYPE)

source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/token/* ./
unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
Expand Down
1 change: 0 additions & 1 deletion deploy/scripts/sgx/run_data_join_worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ fi

source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/token/* ./
unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
Expand Down
1 change: 0 additions & 1 deletion deploy/scripts/sgx/run_data_portal_master.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ files_per_job_limit=$(normalize_env_to_args '--files_per_job_limit' $FILES_PER_J

source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/token/* ./
unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
Expand Down
1 change: 0 additions & 1 deletion deploy/scripts/sgx/run_data_portal_worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ input_data_validation_ratio=$(normalize_env_to_args '--input_data_validation_rat

source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/token/* ./
unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
Expand Down
11 changes: 7 additions & 4 deletions deploy/scripts/sgx/run_trainer_master_sgx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,12 @@ if [[ -n "${CODE_KEY}" ]]; then
else
pull_code ${CODE_TAR} $PWD
fi
cd ${ROLE}
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./

cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/follower/
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/leader/
source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/token/* ./

unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
source /root/start_aesm_service.sh
Expand All @@ -104,14 +106,15 @@ fi

server_port=$(normalize_env_to_args "--server-port" "$PORT1")

cd $EXEC_DIR
if [[ -z "${START_CPU_SN}" ]]; then
START_CPU_SN=0
fi
if [[ -z "${END_CPU_SN}" ]]; then
END_CPU_SN=3
fi

taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python main.py --master \
taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python /gramine/$ROLE/main.py --master \
--application-id=$APPLICATION_ID \
--data-source=$DATA_SOURCE \
--data-path=$DATA_PATH \
Expand Down
15 changes: 11 additions & 4 deletions deploy/scripts/sgx/run_trainer_ps_sgx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,32 @@ export CUDA_VISIBLE_DEVICES=
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
source /app/deploy/scripts/hdfs_common.sh || true
source /app/deploy/scripts/pre_start_hook.sh || true
source /app/deploy/scripts/env_to_args.sh

LISTEN_PORT=50052
if [[ -n "${PORT1}" ]]; then
LISTEN_PORT=${PORT1}
fi

if [[ -n "${CODE_KEY}" ]]; then
pull_code ${CODE_KEY} $PWD
else
pull_code ${CODE_TAR} $PWD
fi

cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/leader
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/follower
source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/token/* ./
unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

make_custom_env 4
source /root/start_aesm_service.sh

cd $EXEC_DIR
if [[ -z "${START_CPU_SN}" ]]; then
START_CPU_SN=0
fi
if [[ -z "${END_CPU_SN}" ]]; then
END_CPU_SN=3
fi

taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python -m fedlearner.trainer.parameter_server $POD_IP:${LISTEN_PORT}
taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python -m fedlearner.trainer.parameter_server $POD_IP:${LISTEN_PORT}
18 changes: 12 additions & 6 deletions deploy/scripts/sgx/run_trainer_worker_sgx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@ LISTEN_PORT=50051
if [[ -n "${PORT0}" ]]; then
LISTEN_PORT=${PORT0}
fi

echo $LISTEN_PORT > /pod-data/listen_port

unset HTTPS_PROXY https_proxy http_proxy ftp_proxy
PROXY_LOCAL_PORT=50053
if [[ -n "${PORT2}" ]]; then
PROXY_LOCAL_PORT=${PORT2}
fi
echo $PROXY_LOCAL_PORT > /pod-data/proxy_local_port

cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
source /app/deploy/scripts/hdfs_common.sh || true
source /app/deploy/scripts/pre_start_hook.sh || true
Expand All @@ -40,10 +44,11 @@ else
pull_code ${CODE_TAR} $PWD
fi

cd ${ROLE}
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py ./
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/follower/
cp /app/sgx/gramine/CI-Examples/tensorflow_io.py /gramine/leader/
source /app/deploy/scripts/sgx/enclave_env.sh
cp /app/sgx/token/* ./

unset HTTPS_PROXY https_proxy http_proxy ftp_proxy

mode=$(normalize_env_to_args "--mode" "$MODE")
sparse_estimator=$(normalize_env_to_args "--sparse-estimator" "$SPARSE_ESTIMATOR")
Expand Down Expand Up @@ -100,14 +105,15 @@ source /root/start_aesm_service.sh

server_port=$(normalize_env_to_args "--server-port" "$PORT1")

cd $EXEC_DIR
if [[ -z "${START_CPU_SN}" ]]; then
START_CPU_SN=0
fi
if [[ -z "${END_CPU_SN}" ]]; then
END_CPU_SN=3
fi

taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python main.py --worker \
taskset -c $START_CPU_SN-$END_CPU_SN stdbuf -o0 gramine-sgx python /gramine/$ROLE/main.py --worker \
--application-id="$APPLICATION_ID" \
--master-addr="$MASTER_HOST:50051" \
--cluster-spec="$CLUSTER_SPEC" \
Expand Down
14 changes: 8 additions & 6 deletions sgx/gramine/CI-Examples/generate-token/python.manifest.template
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ loader.env.SECRET_PROVISION_SET_PF_KEY = "1"

sys.enable_sigterm_injection = true
sys.enable_extra_runtime_domain_names_conf = true
sys.stack.size = "2M"
sys.stack.size = "1M"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
Expand All @@ -26,6 +26,8 @@ fs.mounts = [
{ path = "{{ python.stdlib }}", uri = "file:{{ python.stdlib }}" },
{ path = "{{ python.distlib }}", uri = "file:{{ python.distlib }}" },
{ path = "/opt", uri = "file:/opt" },
{ path = "/gramine/leader", uri = "file:/gramine/leader"},
{ path = "/gramine/follower", uri = "file:/gramine/follower"},
{ path = "/etc", uri = "file:/etc" },
{ path = "/tmp", uri = "file:/tmp" },
{ path = "/bin", uri = "file:/bin" },
Expand All @@ -47,7 +49,7 @@ sgx.preheat_enclave = false
sgx.nonpie_binary = true
sgx.enable_stats = false
sgx.enclave_size = "16G"
sgx.thread_num = 512
sgx.thread_num = 256
sgx.rpc_thread_num = 0
sgx.protected_files_key = "ffeeddccbbaa99887766554433221100"

Expand All @@ -58,13 +60,11 @@ sgx.trusted_files = [
"file:/usr/{{ arch_libdir }}/",
"file:/etc/ssl/certs/ca-certificates.crt",
"file:/etc/default/apport",
"file:/etc/mime.types",
"file:/etc/mime.types"
]

sgx.allowed_files = [
"file:tensorflow_io.py",
"file:main.py",
"file:config.py",
"file:/opt/tiger/",
"file:/opt/meituan/",
"file:{{ python.stdlib }}/",
Expand Down Expand Up @@ -93,6 +93,8 @@ sgx.allowed_files = [
"file:/usr/local/lib",
"file:/lib/",
"file:/bin/",
"file:/data/"
"file:/data/",
"file:/gramine/leader/",
"file:/gramine/follower/"
]

0 comments on commit 9260efa

Please sign in to comment.