From f85bd3f702921ddb06c842f7e21bb7db94990f65 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 4 Dec 2024 02:02:48 +0100 Subject: [PATCH 01/13] Workflow for publishing UCD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Basic separation of input/ouput: - $1 and $2 are now static because the workflow is run in an isolated environment and the cwd is always the repo root. - Set dynamic inputs in the workflow as env variables. - Upload built files as a workflow artifact. Git doesn’t preserve complete file permissions, and the zipping/unzipping process doesn’t reliably preserve file permissions. Therefore file permissions need to be normalized at deployment site. The “Copy files from elsewhere” note needs to be documented in data-workflow.md#publication. --- .../workflows/publish-ucd.sh | 30 ++----------------- .github/workflows/publish-ucd.yml | 23 ++++++++++++++ 2 files changed, 26 insertions(+), 27 deletions(-) rename pub/copy-ucd-to-draft.sh => .github/workflows/publish-ucd.sh (55%) create mode 100644 .github/workflows/publish-ucd.yml diff --git a/pub/copy-ucd-to-draft.sh b/.github/workflows/publish-ucd.sh similarity index 55% rename from pub/copy-ucd-to-draft.sh rename to .github/workflows/publish-ucd.sh index 041cd042c..2e85093a9 100755 --- a/pub/copy-ucd-to-draft.sh +++ b/.github/workflows/publish-ucd.sh @@ -1,19 +1,7 @@ -# Script for -# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-a-ucd-snapshot -# -# Invoke like this: -# -# pub/copy-ucd-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft +# See publish-ucd.yml -UNICODETOOLS=$1 -DRAFT=$2 - -UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data - -# Adjust the following for each year and version as needed. -COPY_YEAR=2024 -UNI_VER=17.0.0 -EMOJI_VER=17.0 +UNITOOLS_DATA=unicodetools/data +DRAFT=dist TODAY=`date --iso-8601` @@ -37,9 +25,6 @@ rm -r $DRAFT/UCD/ucd/emoji mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt -# Fix permissions. Everyone can read, and search directories. -chmod a+rX -R $DRAFT - # Update the readmes in-place (-i) as set up above. find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt @@ -49,12 +34,3 @@ rm $DRAFT/UCD/ucd/UCD.zip # Cleanup rm $DRAFT/sed-readmes.txt - -rm $DRAFT/ucd-snapshot.zip -(cd $DRAFT; zip -r ucd-snapshot.zip *) - -echo "--------------------" -echo "Copy files from elsewhere:" -echo "- Unihan.zip to $DRAFT/UCD/ucd" -echo "- Unihan.zip to $DRAFT/zipped" - diff --git a/.github/workflows/publish-ucd.yml b/.github/workflows/publish-ucd.yml new file mode 100644 index 000000000..b4e381815 --- /dev/null +++ b/.github/workflows/publish-ucd.yml @@ -0,0 +1,23 @@ +# See https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publication + +# Test locally with https://github.com/nektos/act: act --workflows .github/workflows/publish-ucd.yml + +name: Publish UCD + +on: + workflow_dispatch: + +env: + COPY_YEAR: "2024" + UNI_VER: "17.0.0" + EMOJI_VER: "17.0" + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: .github/workflows/publish-ucd.sh + - uses: actions/upload-artifact@v4 + with: + path: dist From 4d7cad1c2d795b3f05d7d5c5b904eaac105efbd9 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 4 Dec 2024 02:20:02 +0100 Subject: [PATCH 02/13] Alpha mode --- .github/workflows/publish-ucd.sh | 16 ++++++- .github/workflows/publish-ucd.yml | 12 +++++- pub/copy-alpha-to-draft.sh | 71 ------------------------------- 3 files changed, 26 insertions(+), 73 deletions(-) delete mode 100755 pub/copy-alpha-to-draft.sh diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-ucd.sh index 2e85093a9..d68641b25 100755 --- a/.github/workflows/publish-ucd.sh +++ b/.github/workflows/publish-ucd.sh @@ -21,10 +21,24 @@ mkdir -p $DRAFT/UCD/ucd mkdir -p $DRAFT/zipped cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd rm -r $DRAFT/UCD/ucd/Unihan -rm -r $DRAFT/UCD/ucd/emoji mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt +if [ $MODE = "alpha" ]; then + mkdir -p $DRAFT/emoji + cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji + + mkdir -p $DRAFT/idna + cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna + + mkdir -p $DRAFT/idna2008derived + rm $DRAFT/idna2008derived/* + cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived + cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived +else + rm -r $DRAFT/UCD/ucd/emoji +fi + # Update the readmes in-place (-i) as set up above. find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt diff --git a/.github/workflows/publish-ucd.yml b/.github/workflows/publish-ucd.yml index b4e381815..6ab46e685 100644 --- a/.github/workflows/publish-ucd.yml +++ b/.github/workflows/publish-ucd.yml @@ -1,16 +1,26 @@ # See https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publication -# Test locally with https://github.com/nektos/act: act --workflows .github/workflows/publish-ucd.yml +# Test locally with https://github.com/nektos/act: +# act --workflows .github/workflows/publish-ucd.yml --input mode=snapshot name: Publish UCD on: workflow_dispatch: + inputs: + mode: + description: Publication mode + type: choice + options: + - snapshot + - alpha + default: snapshot env: COPY_YEAR: "2024" UNI_VER: "17.0.0" EMOJI_VER: "17.0" + MODE: ${{ inputs.mode }} jobs: build: diff --git a/pub/copy-alpha-to-draft.sh b/pub/copy-alpha-to-draft.sh deleted file mode 100755 index e388c1d37..000000000 --- a/pub/copy-alpha-to-draft.sh +++ /dev/null @@ -1,71 +0,0 @@ -# Script for -# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-an-alpha-snapshot -# -# Invoke like this: -# -# pub/copy-alpha-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft - -UNICODETOOLS=$1 -DRAFT=$2 - -UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data - -# Adjust the following for each year and version as needed. -COPY_YEAR=2024 -UNI_VER=17.0.0 -EMOJI_VER=17.0 - -TODAY=`date --iso-8601` - -mkdir -p $DRAFT - -cat > $DRAFT/sed-readmes.txt << eof -s/COPY_YEAR/$COPY_YEAR/ -s/PUB_DATE/$TODAY/ -s/PUB_STATUS/draft/ -s/UNI_VER/$UNI_VER/ -s/EMOJI_VER/$EMOJI_VER/ -s%PUBLIC_EMOJI%Public/draft/emoji% -s%PUBLIC_UCD%Public/draft/UCD% -eof - -mkdir -p $DRAFT/UCD/ucd -mkdir -p $DRAFT/zipped -cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd -rm -r $DRAFT/UCD/ucd/Unihan -mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt -mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt - -mkdir -p $DRAFT/emoji -cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji - -mkdir -p $DRAFT/idna -cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna - -mkdir -p $DRAFT/idna2008derived -rm $DRAFT/idna2008derived/* -cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived -cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived - -# Fix permissions. Everyone can read, and search directories. -chmod a+rX -R $DRAFT - -# Update the readmes in-place (-i) as set up above. -find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt - -# Zip files for some types of data, after fixing permissions -rm $DRAFT/UCD/ucd/UCD.zip -(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) - -# Cleanup -rm $DRAFT/sed-readmes.txt - -rm $DRAFT/alpha.zip -(cd $DRAFT; zip -r alpha.zip *) - -echo "--------------------" -echo "Copy files from elsewhere:" -echo "- Unihan.zip to $DRAFT/UCD/ucd" -echo "- Unihan.zip to $DRAFT/zipped" -echo "- alpha charts to $DRAFT/UCD/charts" - From 97e269cbf7390445903e094ffae8e2d827de127c Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 4 Dec 2024 02:42:44 +0100 Subject: [PATCH 03/13] Revise shell script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrected how the dist directory is referenced when moving UCD.zip. UNITOOLS_DATA and DRAFT are no longer necessary. “dist” as a conventional target direction is easier to read than a variable. No need to remove old ZIP file because a workflow runs in an isolated environment. --- .github/workflows/publish-ucd.sh | 44 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-ucd.sh index d68641b25..8add71fd0 100755 --- a/.github/workflows/publish-ucd.sh +++ b/.github/workflows/publish-ucd.sh @@ -1,13 +1,10 @@ # See publish-ucd.yml -UNITOOLS_DATA=unicodetools/data -DRAFT=dist - TODAY=`date --iso-8601` -mkdir -p $DRAFT +mkdir dist -cat > $DRAFT/sed-readmes.txt << eof +cat > dist/sed-readmes.txt << eof s/COPY_YEAR/$COPY_YEAR/ s/PUB_DATE/$TODAY/ s/PUB_STATUS/draft/ @@ -17,34 +14,33 @@ s%PUBLIC_EMOJI%Public/draft/emoji% s%PUBLIC_UCD%Public/draft/UCD% eof -mkdir -p $DRAFT/UCD/ucd -mkdir -p $DRAFT/zipped -cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd -rm -r $DRAFT/UCD/ucd/Unihan -mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt -mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt +mkdir -p dist/UCD/ucd +mkdir dist/zipped +cp -r unicodetools/data/ucd/dev/* dist/UCD/ucd +rm -r dist/UCD/ucd/Unihan +mv dist/UCD/ucd/version-ReadMe.txt dist/UCD/ReadMe.txt +mv dist/UCD/ucd/zipped-ReadMe.txt dist/zipped/ReadMe.txt if [ $MODE = "alpha" ]; then - mkdir -p $DRAFT/emoji - cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji + mkdir dist/emoji + cp unicodetools/data/emoji/dev/* dist/emoji - mkdir -p $DRAFT/idna - cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna + mkdir dist/idna + cp unicodetools/data/idna/dev/* dist/idna - mkdir -p $DRAFT/idna2008derived - rm $DRAFT/idna2008derived/* - cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived - cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived + mkdir dist/idna2008derived + cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt dist/idna2008derived + cp unicodetools/data/idna/idna2008derived/ReadMe.txt dist/idna2008derived else - rm -r $DRAFT/UCD/ucd/emoji + rm -r dist/UCD/ucd/emoji fi # Update the readmes in-place (-i) as set up above. -find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt +find dist -name '*ReadMe.txt' | xargs sed -i -f dist/sed-readmes.txt # Zip files for some types of data, after fixing permissions -rm $DRAFT/UCD/ucd/UCD.zip -(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) +(cd dist/UCD/ucd; zip -r UCD.zip *) +mv dist/UCD/ucd/UCD.zip dist/zipped # Cleanup -rm $DRAFT/sed-readmes.txt +rm dist/sed-readmes.txt From 273655eaf54c25f5f9fa34a63a2a1445e3f359dd Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 4 Dec 2024 23:16:02 +0100 Subject: [PATCH 04/13] Customize run-name; try sparse-checkout --- .github/workflows/publish-ucd.sh | 2 +- .github/workflows/publish-ucd.yml | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-ucd.sh index 8add71fd0..62053061c 100755 --- a/.github/workflows/publish-ucd.sh +++ b/.github/workflows/publish-ucd.sh @@ -21,7 +21,7 @@ rm -r dist/UCD/ucd/Unihan mv dist/UCD/ucd/version-ReadMe.txt dist/UCD/ReadMe.txt mv dist/UCD/ucd/zipped-ReadMe.txt dist/zipped/ReadMe.txt -if [ $MODE = "alpha" ]; then +if [ $MODE = "Alpha" ]; then mkdir dist/emoji cp unicodetools/data/emoji/dev/* dist/emoji diff --git a/.github/workflows/publish-ucd.yml b/.github/workflows/publish-ucd.yml index 6ab46e685..1588a1f6c 100644 --- a/.github/workflows/publish-ucd.yml +++ b/.github/workflows/publish-ucd.yml @@ -1,10 +1,12 @@ # See https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publication # Test locally with https://github.com/nektos/act: -# act --workflows .github/workflows/publish-ucd.yml --input mode=snapshot +# act --workflows .github/workflows/publish-ucd.yml --input mode=Alpha name: Publish UCD +run-name: "${{ github.workflow }}: ${{ inputs.mode }}" + on: workflow_dispatch: inputs: @@ -12,9 +14,9 @@ on: description: Publication mode type: choice options: - - snapshot - - alpha - default: snapshot + - Snapshot + - Alpha + default: Snapshot env: COPY_YEAR: "2024" @@ -27,6 +29,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/workflows + unicodetools/data/ucd/dev + unicodetools/data/emoji/dev + unicodetools/data/idna/dev + unicodetools/data/idna/idna2008derived - run: .github/workflows/publish-ucd.sh - uses: actions/upload-artifact@v4 with: From 55166cdc3740feb2469372637dd21a43b8c73552 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 4 Dec 2024 23:42:10 +0100 Subject: [PATCH 05/13] Beta mode; organize code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use modern syntax $(…) in place of `…`. Quote $MODE in comparison to avoid the confusing “unary operator expected” error when value is empty in testing. Copy directories directly (rather copy content of a directory to a ready-made directory) whenever possible, for simpler code. --- .github/workflows/publish-ucd.sh | 48 +++++++++------- .github/workflows/publish-ucd.yml | 2 + pub/copy-beta-to-draft.sh | 93 ------------------------------- 3 files changed, 31 insertions(+), 112 deletions(-) delete mode 100755 pub/copy-beta-to-draft.sh diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-ucd.sh index 62053061c..48b48cd0f 100755 --- a/.github/workflows/publish-ucd.sh +++ b/.github/workflows/publish-ucd.sh @@ -1,46 +1,56 @@ # See publish-ucd.yml -TODAY=`date --iso-8601` - mkdir dist +PUB_DATE=$(date --iso-8601) + cat > dist/sed-readmes.txt << eof s/COPY_YEAR/$COPY_YEAR/ -s/PUB_DATE/$TODAY/ +s/PUB_DATE/$PUB_DATE/ s/PUB_STATUS/draft/ s/UNI_VER/$UNI_VER/ s/EMOJI_VER/$EMOJI_VER/ +s/TR10_REV/$TR10_REV/ s%PUBLIC_EMOJI%Public/draft/emoji% s%PUBLIC_UCD%Public/draft/UCD% eof -mkdir -p dist/UCD/ucd -mkdir dist/zipped -cp -r unicodetools/data/ucd/dev/* dist/UCD/ucd -rm -r dist/UCD/ucd/Unihan +mkdir dist/UCD +cp -R unicodetools/data/ucd/dev dist/UCD/ucd mv dist/UCD/ucd/version-ReadMe.txt dist/UCD/ReadMe.txt -mv dist/UCD/ucd/zipped-ReadMe.txt dist/zipped/ReadMe.txt +rm -r dist/UCD/ucd/Unihan +if [ "$MODE" = "Snapshot" ]; then + rm -r dist/UCD/ucd/emoji +fi -if [ $MODE = "Alpha" ]; then - mkdir dist/emoji - cp unicodetools/data/emoji/dev/* dist/emoji +if [ "$MODE" = "Alpha" ] || [ "$MODE" = "Beta" ]; then + cp -R unicodetools/data/emoji/dev dist/emoji - mkdir dist/idna - cp unicodetools/data/idna/dev/* dist/idna + cp -R unicodetools/data/idna/dev dist/idna mkdir dist/idna2008derived - cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt dist/idna2008derived cp unicodetools/data/idna/idna2008derived/ReadMe.txt dist/idna2008derived -else - rm -r dist/UCD/ucd/emoji + cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt dist/idna2008derived + + if [ "$MODE" = "Beta" ]; then + cp -R unicodetools/data/uca/dev dist/UCA + sed -i -f dist/sed-readmes.txt dist/UCA/CollationTest.html + + cp -R unicodetools/data/security/dev dist/security + fi fi # Update the readmes in-place (-i) as set up above. find dist -name '*ReadMe.txt' | xargs sed -i -f dist/sed-readmes.txt +rm dist/sed-readmes.txt -# Zip files for some types of data, after fixing permissions +mkdir dist/zipped +mv dist/UCD/ucd/zipped-ReadMe.txt dist/zipped/ReadMe.txt (cd dist/UCD/ucd; zip -r UCD.zip *) mv dist/UCD/ucd/UCD.zip dist/zipped -# Cleanup -rm dist/sed-readmes.txt +if [ "$MODE" = "Beta" ]; then + (cd dist/UCA; zip -r CollationTest.zip CollationTest; rm -r CollationTest) + + (cd dist/security; zip -r uts39-data-$UNI_VER.zip *) +fi diff --git a/.github/workflows/publish-ucd.yml b/.github/workflows/publish-ucd.yml index 1588a1f6c..e576f6df9 100644 --- a/.github/workflows/publish-ucd.yml +++ b/.github/workflows/publish-ucd.yml @@ -16,12 +16,14 @@ on: options: - Snapshot - Alpha + - Beta default: Snapshot env: COPY_YEAR: "2024" UNI_VER: "17.0.0" EMOJI_VER: "17.0" + TR10_REV: "tr10-52" # UTS #10 release revision number to be used in CollationTest.html: One more than the last release revision number. MODE: ${{ inputs.mode }} jobs: diff --git a/pub/copy-beta-to-draft.sh b/pub/copy-beta-to-draft.sh deleted file mode 100755 index 84c30ae08..000000000 --- a/pub/copy-beta-to-draft.sh +++ /dev/null @@ -1,93 +0,0 @@ -# Script for -# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-a-beta-snapshot -# -# Invoke like this: -# -# pub/copy-beta-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft - -UNICODETOOLS=$1 -DRAFT=$2 - -UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data - -# Adjust the following for each year and version as needed. -COPY_YEAR=2024 -UNI_VER=17.0.0 -EMOJI_VER=17.0 -# UTS #10 release revision number to be used in CollationTest.html: -# One more than the last release revision number. -TR10_REV=tr10-52 - -TODAY=`date --iso-8601` - -mkdir -p $DRAFT - -cat > $DRAFT/sed-readmes.txt << eof -s/COPY_YEAR/$COPY_YEAR/ -s/PUB_DATE/$TODAY/ -s/PUB_STATUS/draft/ -s/UNI_VER/$UNI_VER/ -s/EMOJI_VER/$EMOJI_VER/ -s/TR10_REV/$TR10_REV/ -s%PUBLIC_EMOJI%Public/draft/emoji% -s%PUBLIC_UCD%Public/draft/UCD% -eof - -mkdir -p $DRAFT/UCD/ucd -mkdir -p $DRAFT/zipped -cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd -rm -r $DRAFT/UCD/ucd/Unihan -mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt -mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt - -mkdir -p $DRAFT/UCA -cp -r $UNITOOLS_DATA/uca/dev/* $DRAFT/UCA -sed -i -f $DRAFT/sed-readmes.txt $DRAFT/UCA/CollationTest.html - -mkdir -p $DRAFT/emoji -cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji - -mkdir -p $DRAFT/idna -cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna - -mkdir -p $DRAFT/idna2008derived -rm $DRAFT/idna2008derived/* -cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived -cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived - -mkdir -p $DRAFT/security -cp $UNITOOLS_DATA/security/dev/* $DRAFT/security - -# Fix permissions. Everyone can read, and search directories. -chmod a+rX -R $DRAFT - -# Update the readmes in-place (-i) as set up above. -find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt - -# Zip files for some types of data, after fixing permissions -rm $DRAFT/UCD/ucd/UCD.zip -(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) - -rm $DRAFT/UCA/CollationTest.zip -(cd $DRAFT/UCA; zip -r CollationTest.zip CollationTest && rm -r CollationTest) - -rm $DRAFT/security/*.zip -(cd $DRAFT/security; zip -r uts39-data-$UNI_VER.zip *) - -# Fix permissions again to catch the zip files -chmod a+rX -R $DRAFT - -# Cleanup -rm $DRAFT/sed-readmes.txt - -# Zip file to deliver the whole set of data files -rm $DRAFT/beta.zip -(cd $DRAFT; zip -r beta.zip *) - -echo "--------------------" -echo "Copy files from elsewhere:" -echo "- Unihan.zip to $DRAFT/UCD/ucd" -echo "- Unihan.zip to $DRAFT/zipped" -echo "- UCDXML files to $DRAFT/UCD/ucdxml" -echo "- beta charts to $DRAFT/UCD/charts" - From 73c74f14ebccd4879461a5412b798ec2637c2045 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Wed, 11 Dec 2024 11:47:02 +0100 Subject: [PATCH 06/13] Flatten control flow --- .github/workflows/publish-ucd.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-ucd.sh index 48b48cd0f..90f992d9b 100755 --- a/.github/workflows/publish-ucd.sh +++ b/.github/workflows/publish-ucd.sh @@ -19,6 +19,7 @@ mkdir dist/UCD cp -R unicodetools/data/ucd/dev dist/UCD/ucd mv dist/UCD/ucd/version-ReadMe.txt dist/UCD/ReadMe.txt rm -r dist/UCD/ucd/Unihan + if [ "$MODE" = "Snapshot" ]; then rm -r dist/UCD/ucd/emoji fi @@ -31,13 +32,13 @@ if [ "$MODE" = "Alpha" ] || [ "$MODE" = "Beta" ]; then mkdir dist/idna2008derived cp unicodetools/data/idna/idna2008derived/ReadMe.txt dist/idna2008derived cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt dist/idna2008derived +fi - if [ "$MODE" = "Beta" ]; then - cp -R unicodetools/data/uca/dev dist/UCA - sed -i -f dist/sed-readmes.txt dist/UCA/CollationTest.html +if [ "$MODE" = "Beta" ]; then + cp -R unicodetools/data/uca/dev dist/UCA + sed -i -f dist/sed-readmes.txt dist/UCA/CollationTest.html - cp -R unicodetools/data/security/dev dist/security - fi + cp -R unicodetools/data/security/dev dist/security fi # Update the readmes in-place (-i) as set up above. From 24bfa68755997da44fb07bf7d31f846a6d8b2377 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 00:43:24 +0100 Subject: [PATCH 07/13] Reluctantly restore deleted files This is gonna make the change history funny, but Markus requested this: https://github.com/unicode-org/unicodetools/pull/980#issuecomment-2536762628 --- pub/copy-alpha-to-draft.sh | 71 +++++++++++++++++++++++++++++ pub/copy-beta-to-draft.sh | 93 ++++++++++++++++++++++++++++++++++++++ pub/copy-ucd-to-draft.sh | 60 ++++++++++++++++++++++++ 3 files changed, 224 insertions(+) create mode 100755 pub/copy-alpha-to-draft.sh create mode 100755 pub/copy-beta-to-draft.sh create mode 100755 pub/copy-ucd-to-draft.sh diff --git a/pub/copy-alpha-to-draft.sh b/pub/copy-alpha-to-draft.sh new file mode 100755 index 000000000..e388c1d37 --- /dev/null +++ b/pub/copy-alpha-to-draft.sh @@ -0,0 +1,71 @@ +# Script for +# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-an-alpha-snapshot +# +# Invoke like this: +# +# pub/copy-alpha-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft + +UNICODETOOLS=$1 +DRAFT=$2 + +UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data + +# Adjust the following for each year and version as needed. +COPY_YEAR=2024 +UNI_VER=17.0.0 +EMOJI_VER=17.0 + +TODAY=`date --iso-8601` + +mkdir -p $DRAFT + +cat > $DRAFT/sed-readmes.txt << eof +s/COPY_YEAR/$COPY_YEAR/ +s/PUB_DATE/$TODAY/ +s/PUB_STATUS/draft/ +s/UNI_VER/$UNI_VER/ +s/EMOJI_VER/$EMOJI_VER/ +s%PUBLIC_EMOJI%Public/draft/emoji% +s%PUBLIC_UCD%Public/draft/UCD% +eof + +mkdir -p $DRAFT/UCD/ucd +mkdir -p $DRAFT/zipped +cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd +rm -r $DRAFT/UCD/ucd/Unihan +mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt +mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt + +mkdir -p $DRAFT/emoji +cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji + +mkdir -p $DRAFT/idna +cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna + +mkdir -p $DRAFT/idna2008derived +rm $DRAFT/idna2008derived/* +cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived +cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived + +# Fix permissions. Everyone can read, and search directories. +chmod a+rX -R $DRAFT + +# Update the readmes in-place (-i) as set up above. +find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt + +# Zip files for some types of data, after fixing permissions +rm $DRAFT/UCD/ucd/UCD.zip +(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) + +# Cleanup +rm $DRAFT/sed-readmes.txt + +rm $DRAFT/alpha.zip +(cd $DRAFT; zip -r alpha.zip *) + +echo "--------------------" +echo "Copy files from elsewhere:" +echo "- Unihan.zip to $DRAFT/UCD/ucd" +echo "- Unihan.zip to $DRAFT/zipped" +echo "- alpha charts to $DRAFT/UCD/charts" + diff --git a/pub/copy-beta-to-draft.sh b/pub/copy-beta-to-draft.sh new file mode 100755 index 000000000..84c30ae08 --- /dev/null +++ b/pub/copy-beta-to-draft.sh @@ -0,0 +1,93 @@ +# Script for +# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-a-beta-snapshot +# +# Invoke like this: +# +# pub/copy-beta-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft + +UNICODETOOLS=$1 +DRAFT=$2 + +UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data + +# Adjust the following for each year and version as needed. +COPY_YEAR=2024 +UNI_VER=17.0.0 +EMOJI_VER=17.0 +# UTS #10 release revision number to be used in CollationTest.html: +# One more than the last release revision number. +TR10_REV=tr10-52 + +TODAY=`date --iso-8601` + +mkdir -p $DRAFT + +cat > $DRAFT/sed-readmes.txt << eof +s/COPY_YEAR/$COPY_YEAR/ +s/PUB_DATE/$TODAY/ +s/PUB_STATUS/draft/ +s/UNI_VER/$UNI_VER/ +s/EMOJI_VER/$EMOJI_VER/ +s/TR10_REV/$TR10_REV/ +s%PUBLIC_EMOJI%Public/draft/emoji% +s%PUBLIC_UCD%Public/draft/UCD% +eof + +mkdir -p $DRAFT/UCD/ucd +mkdir -p $DRAFT/zipped +cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd +rm -r $DRAFT/UCD/ucd/Unihan +mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt +mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt + +mkdir -p $DRAFT/UCA +cp -r $UNITOOLS_DATA/uca/dev/* $DRAFT/UCA +sed -i -f $DRAFT/sed-readmes.txt $DRAFT/UCA/CollationTest.html + +mkdir -p $DRAFT/emoji +cp $UNITOOLS_DATA/emoji/dev/* $DRAFT/emoji + +mkdir -p $DRAFT/idna +cp $UNITOOLS_DATA/idna/dev/* $DRAFT/idna + +mkdir -p $DRAFT/idna2008derived +rm $DRAFT/idna2008derived/* +cp $UNITOOLS_DATA/idna/idna2008derived/Idna2008-$UNI_VER.txt $DRAFT/idna2008derived +cp $UNITOOLS_DATA/idna/idna2008derived/ReadMe.txt $DRAFT/idna2008derived + +mkdir -p $DRAFT/security +cp $UNITOOLS_DATA/security/dev/* $DRAFT/security + +# Fix permissions. Everyone can read, and search directories. +chmod a+rX -R $DRAFT + +# Update the readmes in-place (-i) as set up above. +find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt + +# Zip files for some types of data, after fixing permissions +rm $DRAFT/UCD/ucd/UCD.zip +(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) + +rm $DRAFT/UCA/CollationTest.zip +(cd $DRAFT/UCA; zip -r CollationTest.zip CollationTest && rm -r CollationTest) + +rm $DRAFT/security/*.zip +(cd $DRAFT/security; zip -r uts39-data-$UNI_VER.zip *) + +# Fix permissions again to catch the zip files +chmod a+rX -R $DRAFT + +# Cleanup +rm $DRAFT/sed-readmes.txt + +# Zip file to deliver the whole set of data files +rm $DRAFT/beta.zip +(cd $DRAFT; zip -r beta.zip *) + +echo "--------------------" +echo "Copy files from elsewhere:" +echo "- Unihan.zip to $DRAFT/UCD/ucd" +echo "- Unihan.zip to $DRAFT/zipped" +echo "- UCDXML files to $DRAFT/UCD/ucdxml" +echo "- beta charts to $DRAFT/UCD/charts" + diff --git a/pub/copy-ucd-to-draft.sh b/pub/copy-ucd-to-draft.sh new file mode 100755 index 000000000..041cd042c --- /dev/null +++ b/pub/copy-ucd-to-draft.sh @@ -0,0 +1,60 @@ +# Script for +# https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publish-a-ucd-snapshot +# +# Invoke like this: +# +# pub/copy-ucd-to-draft.sh ~/unitools/mine/src /tmp/unicode/Public/draft + +UNICODETOOLS=$1 +DRAFT=$2 + +UNITOOLS_DATA=$UNICODETOOLS/unicodetools/data + +# Adjust the following for each year and version as needed. +COPY_YEAR=2024 +UNI_VER=17.0.0 +EMOJI_VER=17.0 + +TODAY=`date --iso-8601` + +mkdir -p $DRAFT + +cat > $DRAFT/sed-readmes.txt << eof +s/COPY_YEAR/$COPY_YEAR/ +s/PUB_DATE/$TODAY/ +s/PUB_STATUS/draft/ +s/UNI_VER/$UNI_VER/ +s/EMOJI_VER/$EMOJI_VER/ +s%PUBLIC_EMOJI%Public/draft/emoji% +s%PUBLIC_UCD%Public/draft/UCD% +eof + +mkdir -p $DRAFT/UCD/ucd +mkdir -p $DRAFT/zipped +cp -r $UNITOOLS_DATA/ucd/dev/* $DRAFT/UCD/ucd +rm -r $DRAFT/UCD/ucd/Unihan +rm -r $DRAFT/UCD/ucd/emoji +mv $DRAFT/UCD/ucd/version-ReadMe.txt $DRAFT/UCD/ReadMe.txt +mv $DRAFT/UCD/ucd/zipped-ReadMe.txt $DRAFT/zipped/ReadMe.txt + +# Fix permissions. Everyone can read, and search directories. +chmod a+rX -R $DRAFT + +# Update the readmes in-place (-i) as set up above. +find $DRAFT -name '*ReadMe.txt' | xargs sed -i -f $DRAFT/sed-readmes.txt + +# Zip files for some types of data, after fixing permissions +rm $DRAFT/UCD/ucd/UCD.zip +(cd $DRAFT/UCD/ucd; zip -r UCD.zip * && mv UCD.zip $DRAFT/zipped) + +# Cleanup +rm $DRAFT/sed-readmes.txt + +rm $DRAFT/ucd-snapshot.zip +(cd $DRAFT; zip -r ucd-snapshot.zip *) + +echo "--------------------" +echo "Copy files from elsewhere:" +echo "- Unihan.zip to $DRAFT/UCD/ucd" +echo "- Unihan.zip to $DRAFT/zipped" + From a4691ebda05e7fc03de4c4df273598f1683cdcea Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 17:26:35 +0100 Subject: [PATCH 08/13] Rename workflow files --- .github/workflows/{publish-ucd.sh => publish-data.sh} | 0 .github/workflows/{publish-ucd.yml => publish-data.yml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{publish-ucd.sh => publish-data.sh} (100%) rename .github/workflows/{publish-ucd.yml => publish-data.yml} (100%) diff --git a/.github/workflows/publish-ucd.sh b/.github/workflows/publish-data.sh similarity index 100% rename from .github/workflows/publish-ucd.sh rename to .github/workflows/publish-data.sh diff --git a/.github/workflows/publish-ucd.yml b/.github/workflows/publish-data.yml similarity index 100% rename from .github/workflows/publish-ucd.yml rename to .github/workflows/publish-data.yml From 03710abf32bd42a97f8ea15fddaee4ba4feabc29 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 17:31:22 +0100 Subject: [PATCH 09/13] =?UTF-8?q?Workflow=20renaming,=20mode=20=E2=86=92?= =?UTF-8?q?=20release=20phase,=20=20dist=20=E2=86=92=20TMP=3Dpub/tmp?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/publish-data.sh | 57 +++++++++++++++--------------- .github/workflows/publish-data.yml | 20 +++++------ 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/.github/workflows/publish-data.sh b/.github/workflows/publish-data.sh index 90f992d9b..02d0beacc 100755 --- a/.github/workflows/publish-data.sh +++ b/.github/workflows/publish-data.sh @@ -1,10 +1,11 @@ -# See publish-ucd.yml +# See publish-data.yml -mkdir dist +TMP=pub/tmp +mkdir $TMP PUB_DATE=$(date --iso-8601) -cat > dist/sed-readmes.txt << eof +cat > $TMP/sed-readmes.txt << eof s/COPY_YEAR/$COPY_YEAR/ s/PUB_DATE/$PUB_DATE/ s/PUB_STATUS/draft/ @@ -15,43 +16,43 @@ s%PUBLIC_EMOJI%Public/draft/emoji% s%PUBLIC_UCD%Public/draft/UCD% eof -mkdir dist/UCD -cp -R unicodetools/data/ucd/dev dist/UCD/ucd -mv dist/UCD/ucd/version-ReadMe.txt dist/UCD/ReadMe.txt -rm -r dist/UCD/ucd/Unihan +mkdir $TMP/UCD +cp -R unicodetools/data/ucd/dev $TMP/UCD/ucd +mv $TMP/UCD/ucd/version-ReadMe.txt $TMP/UCD/ReadMe.txt +rm -r $TMP/UCD/ucd/Unihan -if [ "$MODE" = "Snapshot" ]; then - rm -r dist/UCD/ucd/emoji +if [ "$RELEASE_PHASE" = "Dev" ]; then + rm -r $TMP/UCD/ucd/emoji fi -if [ "$MODE" = "Alpha" ] || [ "$MODE" = "Beta" ]; then - cp -R unicodetools/data/emoji/dev dist/emoji +if [ "$RELEASE_PHASE" = "Alpha" ] || [ "$RELEASE_PHASE" = "Beta" ]; then + cp -R unicodetools/data/emoji/dev $TMP/emoji - cp -R unicodetools/data/idna/dev dist/idna + cp -R unicodetools/data/idna/dev $TMP/idna - mkdir dist/idna2008derived - cp unicodetools/data/idna/idna2008derived/ReadMe.txt dist/idna2008derived - cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt dist/idna2008derived + mkdir $TMP/idna2008derived + cp unicodetools/data/idna/idna2008derived/ReadMe.txt $TMP/idna2008derived + cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt $TMP/idna2008derived fi -if [ "$MODE" = "Beta" ]; then - cp -R unicodetools/data/uca/dev dist/UCA - sed -i -f dist/sed-readmes.txt dist/UCA/CollationTest.html +if [ "$RELEASE_PHASE" = "Beta" ]; then + cp -R unicodetools/data/uca/dev $TMP/UCA + sed -i -f $TMP/sed-readmes.txt $TMP/UCA/CollationTest.html - cp -R unicodetools/data/security/dev dist/security + cp -R unicodetools/data/security/dev $TMP/security fi # Update the readmes in-place (-i) as set up above. -find dist -name '*ReadMe.txt' | xargs sed -i -f dist/sed-readmes.txt -rm dist/sed-readmes.txt +find $TMP -name '*ReadMe.txt' | xargs sed -i -f $TMP/sed-readmes.txt +rm $TMP/sed-readmes.txt -mkdir dist/zipped -mv dist/UCD/ucd/zipped-ReadMe.txt dist/zipped/ReadMe.txt -(cd dist/UCD/ucd; zip -r UCD.zip *) -mv dist/UCD/ucd/UCD.zip dist/zipped +mkdir $TMP/zipped +mv $TMP/UCD/ucd/zipped-ReadMe.txt $TMP/zipped/ReadMe.txt +(cd $TMP/UCD/ucd; zip -r UCD.zip *) +mv $TMP/UCD/ucd/UCD.zip $TMP/zipped -if [ "$MODE" = "Beta" ]; then - (cd dist/UCA; zip -r CollationTest.zip CollationTest; rm -r CollationTest) +if [ "$RELEASE_PHASE" = "Beta" ]; then + (cd $TMP/UCA; zip -r CollationTest.zip CollationTest; rm -r CollationTest) - (cd dist/security; zip -r uts39-data-$UNI_VER.zip *) + (cd $TMP/security; zip -r uts39-data-$UNI_VER.zip *) fi diff --git a/.github/workflows/publish-data.yml b/.github/workflows/publish-data.yml index e576f6df9..aeab2408c 100644 --- a/.github/workflows/publish-data.yml +++ b/.github/workflows/publish-data.yml @@ -1,30 +1,30 @@ # See https://github.com/unicode-org/unicodetools/blob/main/docs/data-workflow.md#publication # Test locally with https://github.com/nektos/act: -# act --workflows .github/workflows/publish-ucd.yml --input mode=Alpha +# act --workflows .github/workflows/publish-data.yml --input releasePhase=Alpha -name: Publish UCD +name: Publish Data -run-name: "${{ github.workflow }}: ${{ inputs.mode }}" +run-name: "${{ github.workflow }}: ${{ inputs.releasePhase }}" on: workflow_dispatch: inputs: - mode: - description: Publication mode + releasePhase: # See ReleasePhase in https://github.com/unicode-org/unicodetools/blob/main/unicodetools/src/main/java/org/unicode/text/utility/Settings.java + description: Release phase type: choice options: - - Snapshot + - Dev - Alpha - Beta - default: Snapshot + default: Dev env: COPY_YEAR: "2024" UNI_VER: "17.0.0" EMOJI_VER: "17.0" TR10_REV: "tr10-52" # UTS #10 release revision number to be used in CollationTest.html: One more than the last release revision number. - MODE: ${{ inputs.mode }} + RELEASE_PHASE: ${{ inputs.releasePhase }} jobs: build: @@ -38,7 +38,7 @@ jobs: unicodetools/data/emoji/dev unicodetools/data/idna/dev unicodetools/data/idna/idna2008derived - - run: .github/workflows/publish-ucd.sh + - run: .github/workflows/publish-data.sh - uses: actions/upload-artifact@v4 with: - path: dist + path: pub/tmp # See TMP in publish-data.sh From 607cfa8e101272b764addbce7437e3b79abbe10a Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 17:59:21 +0100 Subject: [PATCH 10/13] Set COPY_YEAR to current year; use UTC time and macOS-compatible format -I could be used in place of --iso-8601, but we also need +%Y anyway. --- .github/workflows/publish-data.sh | 3 ++- .github/workflows/publish-data.yml | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-data.sh b/.github/workflows/publish-data.sh index 02d0beacc..575ea0ebd 100755 --- a/.github/workflows/publish-data.sh +++ b/.github/workflows/publish-data.sh @@ -3,7 +3,8 @@ TMP=pub/tmp mkdir $TMP -PUB_DATE=$(date --iso-8601) +COPY_YEAR=$(date -u +%Y) +PUB_DATE=$(date -u +%Y-%m-%d) cat > $TMP/sed-readmes.txt << eof s/COPY_YEAR/$COPY_YEAR/ diff --git a/.github/workflows/publish-data.yml b/.github/workflows/publish-data.yml index aeab2408c..355c278cf 100644 --- a/.github/workflows/publish-data.yml +++ b/.github/workflows/publish-data.yml @@ -20,7 +20,6 @@ on: default: Dev env: - COPY_YEAR: "2024" UNI_VER: "17.0.0" EMOJI_VER: "17.0" TR10_REV: "tr10-52" # UTS #10 release revision number to be used in CollationTest.html: One more than the last release revision number. From 5bb2e08484ec7b4c795943b4f38a13adb16a0aa0 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 18:20:34 +0100 Subject: [PATCH 11/13] =?UTF-8?q?Document=20Gregg=20as=20Rick=E2=80=99s=20?= =?UTF-8?q?replacement,=20and=20existence=20of=20publish-data.yml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/data-workflow.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/data-workflow.md b/docs/data-workflow.md index 01cd0a68f..f59fa92e0 100644 --- a/docs/data-workflow.md +++ b/docs/data-workflow.md @@ -79,6 +79,9 @@ https://github.com/unicode-org/unicodetools/tree/main/unicodetools/data/emoji/de ## Publication +> An experimental GitHub workflow, [publish-data.yml](/.github/workflows/publish-data.yml), +can create a dev (UCD), alpha, or beta snapshot. + Certain snapshots of the .../dev/ files are copied into https://www.unicode.org/Public/draft/ for Unicode alpha, beta, and final releases, and more as appropriate. * UCD files go into https://www.unicode.org/Public/draft/UCD/ @@ -104,12 +107,10 @@ script from an up-to-date repo workspace. The script copies the set of the .../dev/ data files for an alpha snapshot from a unicodetools workspace to a target folder with the layout of https://www.unicode.org/Public/draft/ . -Send the resulting zip file to Rick for posting to https://www.unicode.org/Public/draft/ . -Ask Rick to add other files that are not tracked in the unicodetools repo: +Send the resulting zip file to Gregg for posting to https://www.unicode.org/Public/draft/ . +Ask Gregg to add other files that are not tracked in the unicodetools repo: * Unihan.zip to .../draft/UCD/ucd -TODO: Figure out new process & people replacing Rick in 2025. - Note: No version/delta infixes in names of data files. We simply use the “draft” folder and the file-internal time stamps for versioning. @@ -124,8 +125,8 @@ script from an up-to-date repo workspace. The script copies the set of the .../dev/ data files for an alpha snapshot from a unicodetools workspace to a target folder with the layout of https://www.unicode.org/Public/draft/ . -Send the resulting zip file to Rick for posting to https://www.unicode.org/Public/draft/ . -Ask Rick to add other files that are not tracked in the unicodetools repo: +Send the resulting zip file to Gregg for posting to https://www.unicode.org/Public/draft/ . +Ask Gregg to add other files that are not tracked in the unicodetools repo: * Unihan.zip to .../draft/UCD/ucd * alpha charts to .../draft/UCD/charts @@ -141,8 +142,8 @@ script from an up-to-date repo workspace. The script copies the set of the .../dev/ data files for a beta snapshot from a unicodetools workspace to a target folder with the layout of https://www.unicode.org/Public/draft/ . -Send the resulting zip file to Rick for posting to https://www.unicode.org/Public/draft/ . -Ask Rick to add other files that are not tracked in the unicodetools repo: +Send the resulting zip file to Gregg for posting to https://www.unicode.org/Public/draft/ . +Ask Gregg to add other files that are not tracked in the unicodetools repo: * Unihan.zip to .../draft/UCD/ucd * UCDXML files to .../draft/UCD/ucdxml * beta charts to .../draft/UCD/charts @@ -158,8 +159,8 @@ Verify the final set of files in the draft folder. Run the [pub/copy-final.sh](https://github.com/unicode-org/unicodetools/blob/main/pub/copy-final.sh) script from an up-to-date repo workspace. -Send the resulting zip file to Rick for posting to https://www.unicode.org/Public/ (not .../Public/draft/). -Ask Rick to add other files that are not tracked in the unicodetools repo: +Send the resulting zip file to Gregg for posting to https://www.unicode.org/Public/ (not .../Public/draft/). +Ask Gregg to add other files that are not tracked in the unicodetools repo: * Unihan.zip to .../{version}/ucd * UCDXML files to .../{version}/ucdxml * final charts to .../{version}/charts From a6fc78e524324fc5ecf41b4ba23b362d5430a56a Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 19:45:17 +0100 Subject: [PATCH 12/13] Use the [[ syntax in place of [ --- .github/workflows/publish-data.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish-data.sh b/.github/workflows/publish-data.sh index 575ea0ebd..051277c00 100755 --- a/.github/workflows/publish-data.sh +++ b/.github/workflows/publish-data.sh @@ -22,11 +22,11 @@ cp -R unicodetools/data/ucd/dev $TMP/UCD/ucd mv $TMP/UCD/ucd/version-ReadMe.txt $TMP/UCD/ReadMe.txt rm -r $TMP/UCD/ucd/Unihan -if [ "$RELEASE_PHASE" = "Dev" ]; then +if [[ "$RELEASE_PHASE" == "Dev" ]]; then rm -r $TMP/UCD/ucd/emoji fi -if [ "$RELEASE_PHASE" = "Alpha" ] || [ "$RELEASE_PHASE" = "Beta" ]; then +if [[ "$RELEASE_PHASE" == "Alpha" || "$RELEASE_PHASE" == "Beta" ]]; then cp -R unicodetools/data/emoji/dev $TMP/emoji cp -R unicodetools/data/idna/dev $TMP/idna @@ -36,7 +36,7 @@ if [ "$RELEASE_PHASE" = "Alpha" ] || [ "$RELEASE_PHASE" = "Beta" ]; then cp unicodetools/data/idna/idna2008derived/Idna2008-$UNI_VER.txt $TMP/idna2008derived fi -if [ "$RELEASE_PHASE" = "Beta" ]; then +if [[ "$RELEASE_PHASE" == "Beta" ]]; then cp -R unicodetools/data/uca/dev $TMP/UCA sed -i -f $TMP/sed-readmes.txt $TMP/UCA/CollationTest.html @@ -52,7 +52,7 @@ mv $TMP/UCD/ucd/zipped-ReadMe.txt $TMP/zipped/ReadMe.txt (cd $TMP/UCD/ucd; zip -r UCD.zip *) mv $TMP/UCD/ucd/UCD.zip $TMP/zipped -if [ "$RELEASE_PHASE" = "Beta" ]; then +if [[ "$RELEASE_PHASE" == "Beta" ]]; then (cd $TMP/UCA; zip -r CollationTest.zip CollationTest; rm -r CollationTest) (cd $TMP/security; zip -r uts39-data-$UNI_VER.zip *) From 7004cd7afb041a09e4a5d584988826a5f4d8d1f4 Mon Sep 17 00:00:00 2001 From: Liang Hai Date: Thu, 12 Dec 2024 19:53:44 +0100 Subject: [PATCH 13/13] Correct $TMP creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pub directory is not included in the sparse checkout, and the nektos/act tool for testing locally doesn’t do an actual sparse checkout (it does `docker cp` instead) so I didn’t catch this issue locally. --- .github/workflows/publish-data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish-data.sh b/.github/workflows/publish-data.sh index 051277c00..3d74c4ef2 100755 --- a/.github/workflows/publish-data.sh +++ b/.github/workflows/publish-data.sh @@ -1,7 +1,7 @@ # See publish-data.yml TMP=pub/tmp -mkdir $TMP +mkdir -p $TMP COPY_YEAR=$(date -u +%Y) PUB_DATE=$(date -u +%Y-%m-%d)