From 4d08c079d08a66c7e9f38b6557568fecf8d85539 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 14 Jan 2025 18:24:50 +0800 Subject: [PATCH] Build R docs with pkgdown. install dir. Restore. system dependencies. Update ubuntu version. work on script. --- .github/workflows/r_tests.yml | 22 ++++++++++ R-package/.Rbuildignore | 3 ++ R-package/.gitignore | 1 + R-package/README.md | 12 ++---- R-package/pkgdown/_pkgdown.yml | 4 ++ R-package/vignettes/xgboost_introduction.Rmd | 15 ++++--- doc/R-package/index.rst | 9 ++++ doc/conf.py | 43 +++++++++++++++++--- ops/pipeline/build-r-docs-impl.sh | 24 +++++++++++ ops/pipeline/build-r-docs.sh | 19 +++++++++ 10 files changed, 132 insertions(+), 20 deletions(-) create mode 100644 R-package/.gitignore create mode 100644 R-package/pkgdown/_pkgdown.yml create mode 100644 ops/pipeline/build-r-docs-impl.sh create mode 100644 ops/pipeline/build-r-docs.sh diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index 43ad372a1e84..57131b96b96d 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -101,3 +101,25 @@ jobs: if: steps.changes.outputs.r_package == 'true' run: | python3 ops/script/test_r_package.py --r=/usr/bin/R --task=doc + + build-r-docs: + name: Build docs for the R package + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-amd64-cpu + - tag=r-tests-build-jvm-docs + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4 + with: + submodules: "true" + - name: Log into Docker registry (AWS ECR) + run: bash ops/pipeline/login-docker-registry.sh + - run: bash ops/pipeline/build-r-docs.sh + - name: Upload R doc + run: | + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-docs \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + R-package/r-docs-${{ env.BRANCH_NAME }}.tar.bz2 diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index b1932e324589..ca9c76e2411a 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -6,3 +6,6 @@ README.md ^doc$ ^Meta$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/R-package/.gitignore b/R-package/.gitignore new file mode 100644 index 000000000000..d8f8d46921aa --- /dev/null +++ b/R-package/.gitignore @@ -0,0 +1 @@ +docs diff --git a/R-package/README.md b/R-package/README.md index f68b1954be7d..50f773755e6f 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -3,11 +3,11 @@ XGBoost R Package for Scalable GBM [![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](https://cran.r-project.org/web/packages/xgboost) [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](https://cran.rstudio.com/web/packages/xgboost/index.html) -[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](http://xgboost.readthedocs.org/en/latest/R-package/index.html) +[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/R-package/index.html) Resources --------- -* [XGBoost R Package Online Documentation](http://xgboost.readthedocs.org/en/latest/R-package/index.html) +* [XGBoost R Package Online Documentation](https://xgboost.readthedocs.org/en/latest/R-package/index.html) - Check this out for detailed documents, examples and tutorials. Installation @@ -19,13 +19,7 @@ We are [on CRAN](https://cran.r-project.org/web/packages/xgboost/index.html) now install.packages('xgboost') ``` -For more detailed installation instructions, please see [here](http://xgboost.readthedocs.org/en/latest/build.html#r-package-installation). - -Examples --------- - -* Please visit [walk through example](demo). -* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd). +For more detailed installation instructions, please see [here](https://xgboost.readthedocs.io/en/stable/install.html). Development ----------- diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml new file mode 100644 index 000000000000..3a3c60f97169 --- /dev/null +++ b/R-package/pkgdown/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://github.com/dmlc/xgboost + +template: + bootstrap: 5 diff --git a/R-package/vignettes/xgboost_introduction.Rmd b/R-package/vignettes/xgboost_introduction.Rmd index 97f812a0ba75..c4a514fa1751 100644 --- a/R-package/vignettes/xgboost_introduction.Rmd +++ b/R-package/vignettes/xgboost_introduction.Rmd @@ -12,7 +12,10 @@ output: toc_float: true --- -# Introduction +XGBoost for R introduction +========================== + +## Introduction **XGBoost** is an optimized distributed gradient boosting library designed to be highly **efficient**, **flexible** and **portable**. It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework. XGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way. The same code runs on major distributed environment (Hadoop, SGE, MPI) and can solve problems beyond billions of examples. @@ -22,7 +25,7 @@ For more details about XGBoost's features and usage, see the [online documentati This short vignette outlines the basic usage of the R interface for XGBoost, assuming the reader has some familiarity with the underlying concepts behind statistical modeling with gradient-boosted decision trees. -# Building a predictive model +## Building a predictive model At its core, XGBoost consists of a C++ library which offers bindings for different programming languages, including R. The R package for XGBoost provides an idiomatic interface similar to those of other statistical modeling packages using and x/y design, as well as a lower-level interface that interacts more directly with the underlying core library and which is similar to those of other language bindings like Python, plus various helpers to interact with its model objects such as by plotting their feature importances or converting them to other formats. @@ -62,7 +65,7 @@ model_abserr <- xgboost(x, y, objective = "reg:absoluteerror", nthreads = 1, nro _Note: the objective must match with the type of the "y" response variable - for example, classification objectives for discrete choices require "factor" types, while regression models for real-valued data require "numeric" types._ -# Model parameters +## Model parameters XGBoost models allow a large degree of control over how they are built. By their nature, gradient-boosted decision tree ensembles are able to capture very complex patterns between features in the data and a response variable, which also means they can suffer from overfitting if not controlled appropirately. @@ -105,7 +108,7 @@ xgboost( ) ``` -# Examining model objects +## Examining model objects XGBoost model objects for the most part consist of a pointer to a C++ object where most of the information is held and which is interfaced through the utility functions and methods in the package, but also contains some R attributes that can be retrieved (and new ones added) through `attributes()`: @@ -131,7 +134,7 @@ xgb.importance(model) xgb.model.dt.tree(model) ``` -# Other features +## Other features XGBoost supports many additional features on top of its traditional gradient-boosting framework, including, among others: @@ -143,7 +146,7 @@ XGBoost supports many additional features on top of its traditional gradient-boo See the [online documentation](https://xgboost.readthedocs.io/en/stable/index.html) - particularly the [tutorials section](https://xgboost.readthedocs.io/en/stable/tutorials/index.html) - for a glimpse over further functionalities that XGBoost offers. -# The low-level interface +## The low-level interface In addition to the `xgboost(x, y, ...)` function, XGBoost also provides a lower-level interface for creating model objects through the function `xgb.train()`, which resembles the same `xgb.train` functions in other language bindings of XGBoost. diff --git a/doc/R-package/index.rst b/doc/R-package/index.rst index 2479e11a77f0..b449ee7fe00c 100644 --- a/doc/R-package/index.rst +++ b/doc/R-package/index.rst @@ -15,6 +15,15 @@ Get Started * Checkout the :doc:`Installation Guide ` contains instructions to install xgboost, and :doc:`Tutorials ` for examples on how to use XGBoost for various tasks. * Read the `API documentation `_. +********* +Vignettes +********* + +.. toctree:: + + xgboost_introduction + xgboostfromJSON + ************ Other topics ************ diff --git a/doc/conf.py b/doc/conf.py index 89dc0f4eaee2..782e5409f266 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -12,7 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. import os -import re import shutil import subprocess import sys @@ -35,7 +34,7 @@ release = xgboost.__version__ -def run_doxygen(): +def run_doxygen() -> None: """Run the doxygen make command in the designated folder.""" curdir = os.path.normpath(os.path.abspath(os.path.curdir)) if os.path.exists(TMP_DIR): @@ -67,8 +66,8 @@ def run_doxygen(): os.chdir(curdir) -def build_jvm_docs(): - """Build docs for the JVM packages""" +def get_branch() -> str: + """Guess the git branch.""" git_branch = os.getenv("READTHEDOCS_VERSION_NAME", default=None) print(f"READTHEDOCS_VERSION_NAME = {git_branch}") @@ -79,6 +78,12 @@ def build_jvm_docs(): elif git_branch == "stable": git_branch = f"release_{xgboost.__version__}" print(f"git_branch = {git_branch}") + return git_branch + + +def build_jvm_docs() -> None: + """Build docs for the JVM packages""" + git_branch = get_branch() def try_fetch_jvm_doc(branch): """ @@ -106,10 +111,37 @@ def try_fetch_jvm_doc(branch): return False if not try_fetch_jvm_doc(git_branch): - print(f"Falling back to the master branch...") + print("Falling back to the master branch...") try_fetch_jvm_doc("master") +def build_r_docs() -> None: + """Fetch R document from s3.""" + git_branch = get_branch() + + def try_fetch_r_doc(branch: str) -> bool: + try: + url = f"https://s3-us-west-2.amazonaws.com/xgboost-docs/r-docs-{branch}.tar.bz2" + filename, _ = urllib.request.urlretrieve(url) + if not os.path.exists(TMP_DIR): + print(f"Create directory {TMP_DIR}") + os.mkdir(TMP_DIR) + r_doc_dir = os.path.join(TMP_DIR, "r_docs") + if os.path.exists(r_doc_dir): + shutil.rmtree(r_doc_dir) + os.mkdir(r_doc_dir) + + with tarfile.open(filename, "r:bz2") as t: + t.extractall(r_doc_dir) + return True + except HTTPError: + print(f"R doc not found at {url}.") + return False + + if not try_fetch_r_doc(git_branch): + try_fetch_r_doc("master") + + def is_readthedocs_build(): if os.environ.get("READTHEDOCS", None) == "True": return True @@ -125,6 +157,7 @@ def is_readthedocs_build(): if is_readthedocs_build(): run_doxygen() build_jvm_docs() + build_r_docs() # If extensions (or modules to document with autodoc) are in another directory, diff --git a/ops/pipeline/build-r-docs-impl.sh b/ops/pipeline/build-r-docs-impl.sh new file mode 100644 index 000000000000..d9f5eb6bf0bc --- /dev/null +++ b/ops/pipeline/build-r-docs-impl.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +if [[ $# -ne 1 ]] +then + echo "Usage: $0 [branch name]" + exit 1 +fi + +set -euo pipefail + +branch_name=$1 + +# See instructions at: https://cran.r-project.org/bin/linux/ubuntu/ + +wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc +# add the R 4.0 repo from CRAN -- adjust 'focal' to 'groovy' or 'bionic' as needed +sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/" + +sudo apt install --no-install-recommends r-base +Rscript -e "install.packages(c('pkgdown'), repos = 'https://mirror.las.iastate.edu/CRAN/')" +cd R-package +Rscript -e "pkgdown::build_site()" +cd - +tar cvjf r-docs-${branch_name}.tar.bz2 R-package/docs diff --git a/ops/pipeline/build-r-docs.sh b/ops/pipeline/build-r-docs.sh new file mode 100644 index 000000000000..61ae16e92ecc --- /dev/null +++ b/ops/pipeline/build-r-docs.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -euo pipefail + +if [[ -z ${BRANCH_NAME:-} ]] +then + echo "Make sure to define environment variable BRANCH_NAME." + exit 1 +fi + +source ops/pipeline/get-docker-registry-details.sh + +IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.cpu + +echo "--- Build R package doc" +set -x +python3 ops/docker_run.py \ + --image-uri ${IMAGE_URI} \ + -- ops/pipeline/build-r-docs-impl.sh ${BRANCH_NAME}