Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve crd-extractor script #441

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 62 additions & 53 deletions Utilities/crd-extractor.sh
Original file line number Diff line number Diff line change
@@ -1,108 +1,117 @@
#!/usr/bin/env bash

fetch_crd() {
filename=${1%% *}
kubectl get crds "$filename" -o yaml >"$TMP_CRD_DIR/$filename.yaml" 2>&1
}

# Check if python3 is installed
if ! command -v python3 &> /dev/null; then
if ! command -v python3 &>/dev/null; then
printf "python3 is required for this utility, and is not installed on your machine"
printf "please visit https://www.python.org/downloads/ to install it"
exit 1
fi
# Check if kubectl is installed
if ! command -v kubectl &> /dev/null; then
if ! command -v kubectl &>/dev/null; then
printf "kubectl is required for this utility, and is not installed on your machine"
printf "please visit https://kubernetes.io/docs/tasks/tools/#kubectl to install it"
exit 1
fi

# Check if the pyyaml module is installed
if ! echo 'import yaml' | python3 &> /dev/null; then
if ! echo 'import yaml' | python3 &>/dev/null; then
printf "the python3 module 'yaml' is required, and is not installed on your machine.\n"

while true; do
read -p "Do you wish to install this program? (y/n) " yn
read -p -r "Do you wish to install this program? (y/n) " yn
case $yn in
[Yy] ) pip3 install pyyaml; break;;
"" ) pip3 install pyyaml; break;;
[Nn] ) echo "Exiting..."; exit;;
* ) echo "Please answer 'y' (yes) or 'n' (no).";;
[Yy])
pip3 install pyyaml
break
;;
"")
pip3 install pyyaml
break
;;
[Nn])
echo "Exiting..."
exit
;;
*) echo "Please answer 'y' (yes) or 'n' (no)." ;;
esac
done
fi

# Create temp folder for CRDs
TMP_CRD_DIR=$HOME/.datree/crds
mkdir -p $TMP_CRD_DIR
mkdir -p "$TMP_CRD_DIR"

# Create final schemas directory
SCHEMAS_DIR=$HOME/.datree/crdSchemas
mkdir -p $SCHEMAS_DIR
cd $SCHEMAS_DIR

# Create array to store CRD kinds and groups
ORGANIZE_BY_GROUP=true
declare -A CRD_GROUPS 2>/dev/null
if [ $? -ne 0 ]; then
# Array creation failed, signal to skip organization by group
ORGANIZE_BY_GROUP=false
fi

# Extract CRDs from cluster
NUM_OF_CRDS=0
while read -r crd
do
filename=${crd%% *}
kubectl get crds "$filename" -o yaml > "$TMP_CRD_DIR/$filename.yaml" 2>&1

resourceKind=$(grep "kind:" "$TMP_CRD_DIR/$filename.yaml" | awk 'NR==2{print $2}' | tr '[:upper:]' '[:lower:]')
resourceGroup=$(grep "group:" "$TMP_CRD_DIR/$filename.yaml" | awk 'NR==1{print $2}')
mkdir -p "$SCHEMAS_DIR"
cd "$SCHEMAS_DIR" || exit 1

# Save name and group for later directory organization
CRD_GROUPS["$resourceKind"]="$resourceGroup"

let ++NUM_OF_CRDS
done < <(kubectl get crds 2>&1 | sed -n '/NAME/,$p' | tail -n +2)
# Get a list of all CRDs
printf "Fetching list of CRDs...\n"
IFS=$'\n' read -r -d '' -a CRD_LIST < <(kubectl get crds 2>&1 | sed -n '/NAME/,$p' | tail -n +2 && printf '\0')

# If no CRDs exist in the cluster, exit
if [ $NUM_OF_CRDS == 0 ]; then
if [ ${#CRD_LIST[@]} == 0 ]; then
printf "No CRDs found in the cluster, exiting...\n"
exit 0
fi

# Extract CRDs from cluster
FETCHED_CRDS=0
PARALLELISM=10
for crd in "${CRD_LIST[@]}"; do
printf "Fetching CRD %s/%s...\n" $((FETCHED_CRDS + 1)) ${#CRD_LIST[@]}

# Fetch CRD
fetch_crd "$crd" &

# allow to execute up to $PARALLELISM jobs in parallel
if [[ $(jobs -r -p | wc -l) -ge $PARALLELISM ]]; then
# now there are $PARALLELISM jobs already running, so wait here for any job
# to be finished so there is a place to start next one.
wait -n
fi
((++FETCHED_CRDS))
done

# Download converter script
curl https://raw.githubusercontent.com/yannh/kubeconform/master/scripts/openapi2jsonschema.py --output $TMP_CRD_DIR/openapi2jsonschema.py 2>/dev/null
curl https://raw.githubusercontent.com/yannh/kubeconform/master/scripts/openapi2jsonschema.py --output "$TMP_CRD_DIR/openapi2jsonschema.py" 2>/dev/null

# Convert crds to jsonSchema
python3 $TMP_CRD_DIR/openapi2jsonschema.py $TMP_CRD_DIR/*.yaml
FILENAME_FORMAT="{fullgroup}_{kind}_{version}" python3 "$TMP_CRD_DIR/openapi2jsonschema.py" "$TMP_CRD_DIR"/*.yaml
conversionResult=$?

# Copy and rename files to support kubeval
rm -rf $SCHEMAS_DIR/master-standalone
mkdir -p $SCHEMAS_DIR/master-standalone
cp $SCHEMAS_DIR/*.json $SCHEMAS_DIR/master-standalone
find $SCHEMAS_DIR/master-standalone -name '*json' -exec bash -c ' mv -f $0 ${0/\_/-stable-}' {} \;
rm -rf "$SCHEMAS_DIR/master-standalone"
mkdir -p "$SCHEMAS_DIR/master-standalone"
cp "$SCHEMAS_DIR"/*.json "$SCHEMAS_DIR/master-standalone"
find "$SCHEMAS_DIR/master-standalone" -name '*json' -exec bash -c ' mv -f $0 ${0/\_/-stable-}' {} \;

# Organize schemas by group
if [ $ORGANIZE_BY_GROUP == true ]; then
for schema in $SCHEMAS_DIR/*.json
do
crdFileName=$(basename $schema .json)
crdKind=${crdFileName%%_*}
crdGroup=${CRD_GROUPS[$crdKind]}
mkdir -p $crdGroup
mv $schema ./$crdGroup
done
fi
for schema in "$SCHEMAS_DIR"/*.json; do
crdFileName=$(basename "$schema")
crdGroup=$(echo "$crdFileName" | cut -d"_" -f1)
outName=$(echo "$crdFileName" | cut -d"_" -f2-)
mkdir -p "$crdGroup"
mv "$schema" "./$crdGroup/$outName"
done

CYAN='\033[0;36m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

if [ $conversionResult == 0 ]; then
printf "${GREEN}Successfully converted $NUM_OF_CRDS CRDs to JSON schema${NC}\n"
printf "${GREEN}Successfully converted $FETCHED_CRDS CRDs to JSON schema${NC}\n"

printf "\nTo validate a CR using various tools, run the relevant command:\n"
printf "\n- ${CYAN}datree:${NC}\n\$ datree test /path/to/file\n"
printf "\n- ${CYAN}kubeconform:${NC}\n\$ kubeconform -summary -output json -schema-location default -schema-location '$HOME/.datree/crdSchemas/{{ .ResourceKind }}_{{ .ResourceAPIVersion }}.json' /path/to/file\n"
printf "\n- ${CYAN}kubeval:${NC}\n\$ kubeval --additional-schema-locations file:\"$HOME/.datree/crdSchemas\" /path/to/file\n\n"
fi

rm -rf $TMP_CRD_DIR
rm -rf "$TMP_CRD_DIR"