diff --git a/.gitignore b/.gitignore index 465ac6d..e68366a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ **.DS_Store # temp jupyter notebook used for quarto jupyter execution -.quarto_ipynb +**.quarto_ipynb /.quarto/ _site/ diff --git a/DESCRIPTION_temp b/DESCRIPTION_temp new file mode 100644 index 0000000..5b5df57 --- /dev/null +++ b/DESCRIPTION_temp @@ -0,0 +1,4 @@ +# notes for package we installed + +- RMarkdown +- here diff --git a/Makefile b/Makefile index dedffd5..eb57080 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ render: setup_python_venv: pip freeze | xargs pip uninstall -y + pip install --upgrade pip pip install -r requirements.txt pip freeze > requirements_snapshot.txt diff --git a/_quarto.yml b/_quarto.yml index f8f8752..3b53557 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,9 +1,27 @@ project: type: website + preview: + port: 54321 + browser: true website: title: "DSCI 521: Computing Platforms for Data Science" + site-url: https://ubc-mds.github.io/DSCI_521_platforms-dsci_book/ + issue-url: https://github.com/UBC-MDS/DSCI_521_platforms-dsci_book/issues/new + repo-actions: [edit, source, issue] + google-analytics: "G-1RNLL8GPXK" + open-graph: true + twitter-card: true + page-navigation: true + bread-crumbs: true + search: + show-item-context: true + type: overlay navbar: + right: + - icon: github + href: https://github.com/UBC-MDS/DSCI_521_platforms-dsci_book + aria-label: GitHub left: - href: index.qmd text: Home @@ -16,27 +34,52 @@ website: style: "docked" search: true contents: - - text: "Lecture 0a: JupyterLab Orientation" - href: lectures/0-jupyterlab-orientation-intro.qmd - - text: "Lecture 0b: RStudio Orientation" - href: lectures/0-rstudio-orientation-intro.qmd - - text: "Lecture 1: Introduction to MDS software and Bash" - href: lectures/1-MDStools-bash-filesystem.qmd - - text: "Lecture 2: Introduction to version control with Git and GitHub" - href: lectures/2-git-github-ssh-basic-workflow.qmd - - text: "Lecture 3: Getting grovy with Git and GitHub" - href: lectures/3-git-history-merge-conflicts-stash.qmd - - text: "Lecture 4: Introduction to Quarto and Github Pages" - href: lectures/4-quarto_github_pages.qmd - - text: "Lecture 5: Dynamic documents: from markdown to Rmarkdown documents" - href: lectures/5-rstudio-projects-notebooks.qmd - - text: "Lecture 6: Quarto, RMarkdown, and Jupyter Slides" - href: lectures/6-rmarkdown-quarto-slides-ghpages.qmd - - text: "Lecture 7: Virtual environments" - href: lectures/7-virtual-environments.qmd - - text: "Lecture 8: Organization of Data Science projects and some useful tools" - href: lectures/8-regex-filenames-project-organization.qmd + - section: "Orientation" + contents: + - text: "Lecture 0a: JupyterLab Orientation" + href: lectures/0-jupyterlab-orientation-intro.qmd + - text: "Lecture 0b: RStudio Orientation" + href: lectures/0-rstudio-orientation-intro.qmd + + - section: "Lectures" + contents: + - text: "Lecture 1: Introduction to MDS software and Bash" + href: lectures/1-MDStools-bash-filesystem.qmd + + - text: "Lecture 2: Introduction to version control with Git and GitHub" + href: lectures/2-git-github-ssh-basic-workflow.qmd + + - text: "Lecture 3: Git: History, Conflicts, and Ignores" + href: lectures/3-git-history-merge-conflicts-stash.qmd + + - text: "Lecture 4: Introduction to Quarto and Github Pages" + href: lectures/4-quarto_github_pages.qmd + + - text: "Lecture 5: RStudio Projects and Literate Programming Documents" + href: lectures/5-rstudio-projects-notebooks.qmd + + - text: "Lecture 6: Quarto, RMarkdown, and Jupyter Slides" + href: lectures/6-rmarkdown-quarto-slides-ghpages.qmd + + - text: "Lecture 7a: Virtual environments: conda" + href: lectures/7a-virtual-environments-conda.qmd + - text: "Lecture 7b: Virtual environments: renv" + href: lectures/7b-virtual-environments-renv.qmd + + - text: "Lecture 8a: Organization of Data Science projects" + href: lectures/8-regex-filenames-project-organization.qmd + - text: "Lecture 8b: Introduction to Regular Expressions (RegEx)" + href: lectures/8b-regex.qmd + + - section: "Appendix" + contents: + - text: "Git Configuration" + href: appendix/git_config.qmd + - text: "RStudio Configuration" + href: appendix/rstudio_config.qmd + - text: "Semantic Line Breaks" + href: appendix/semantic_line_breaks.qmd format: html: diff --git a/appendix/git_config.qmd b/appendix/git_config.qmd new file mode 100644 index 0000000..76a1648 --- /dev/null +++ b/appendix/git_config.qmd @@ -0,0 +1,90 @@ +--- +title: "Git Configuration" +--- + +A short list of commands for git setup from the installation instructions +and other settings to get goit working from the course + +Configure your git commit messages (User and Email) + +Use your email for github.com (not ubc github) + +``` +git config --global user.name "" +git config --global user.email "" +``` + +SSH keys + +``` +sudo systemctl enable teamviewerd +``` + +Edit `~/.ssh/config` + +::: {.panel-tabset} + +## MacOS + *nix + +``` +Host github.com + AddKeysToAgent yes + UseKeychain yes + IdentityFile ~/.ssh/id_ed25519 + +Host github.ubc.ca + AddKeysToAgent yes + UseKeychain yes + IdentityFile ~/.ssh/id_ed25519 +``` + + +## Git Bash (Windows) + +``` +Host github.com + AddKeysToAgent yes + IdentityFile ~/.ssh/id_ed25519 + +Host github.ubc.ca + AddKeysToAgent yes + IdentityFile ~/.ssh/id_ed25519 +``` +::: + + +Use your `~/.ssh/id_ed25519.pub` file and copy paste the contents to register your SSH key +in **BOTH** the `github.com` and `github.ubc.ca` repositories + +## Git pull merge option + +If you push and try to pull with changes in the repote, +you need to tell git how to reconsize the differences + +You may see this + +``` +hint: You have divergent branches and need to specify how to reconcile them. +hint: You can do so by running one of the following commands sometime before +hint: your next pull: +hint: +hint: git config pull.rebase false # merge +hint: git config pull.rebase true # rebase +hint: git config pull.ff only # fast-forward only +hint: +hint: You can replace "git config" with "git config --global" to set a default +hint: preference for all repositories. You can also pass --rebase, --no-rebase, +hint: or --ff-only on the command line to override the configured default per +hint: invocation. +fatal: Need to specify how to reconcile divergent branches. +``` + +We will use the old git default setting of `rebase false` (the first option). + +You can do this as a repo specific setting or as a global setting (add a `--global` flag) +like you did with `user.name` and `user.email` + +- Repo setting: `git config pull.rebase false` +- Global: `git config --global pull.rebase false` + +THen you can `pull` again, where you may or may not see a merge conflict. diff --git a/appendix/rstudio_config.qmd b/appendix/rstudio_config.qmd new file mode 100644 index 0000000..826597a --- /dev/null +++ b/appendix/rstudio_config.qmd @@ -0,0 +1,39 @@ +--- +title: "RStudio Configuration" +--- + +## Global Setup + +In the `Tools` > `Global Options` > `General` Tab, +you want to uncheck the boxes that open previous files and projects, +and also make sure that no `.RData` is saved when you quit RStudio, +and to never save the `.RData` +(this prevents RStudio from loading up previous data when you open it, +it does not refer to its ability to load and save data as you use it). + +![](../lectures/img/rstudio-global_options-general.png) + +## LaTeX and XeLaTeX + +In the `Sweave` option + +change the `Sweave` option to `Knitr`, +and `pdflatex` to `xelatex`. + + + + +## Code snippets + +As we started to type `for` above, +the code completion popped up +and the first entry said "snippet", +what is that? +A code snippet is a text macro, +which means that you can type a short string of characters +to insert a template or snippet of text by pressing TAB. +You can see all the default snippets +and define your own by going to +`Tools -> Global options -> Code -> Edit snippets`. +Snippets are available anywhere in RStudio, +not just in R Markdown documents. diff --git a/appendix/semantic_line_breaks.qmd b/appendix/semantic_line_breaks.qmd new file mode 100644 index 0000000..d177956 --- /dev/null +++ b/appendix/semantic_line_breaks.qmd @@ -0,0 +1,75 @@ +--- +title: "Semantic Line Breaks" +--- + +> Most documents go through several versions (always more than you expected) before they are finally finished. Accordingly, you should do whatever possible to make the job of changing them easy. +> +> First, when you do the purely mechanical operations of typing, type so subsequent editing will be easy. Start each sentence on a new line. Make lines short, and break lines at natural places, such as after commas and semicolons, rather than randomly. Since most people change documents by rewriting phrases and adding, deleting and rearranging sentences, these precautions simplify any editing you have to do later. +> +> — Brian W. Kernighan, 1974 [9] + + +Semantic line breaks utilize how markdown treats line breaks and tries to combine it +with regular prose text. +You can read more about semantic line breaks here: + +Here are examples of how it works with markdown syntax + + +```markdown +this is a very long line that has over 80 characterslasdfja asldfja sljf lsd jflkadjsf +``` + +is rendered as: + +this is a very long line that has over 80 characterslasdfja asldfja sljf lsd jflkadjsf + + +```markdown +this is a very long line that has +over 80 characterslasdfja asldfja +sljf lsd jflkadjsf +``` + +is rendered as: + +this is a very long line that has +over 80 characterslasdfja asldfja +sljf lsd jflkadjsf + + +### Add a line break + +Markdown allows 2 spaces at the end of a line for a line break + +```markdown +this is a very long line that has
+over 80 characterslasdfja asldfja
+sljf lsd jflkadjsf +``` + +this is a very long line that has
+over 80 characterslasdfja asldfja
+sljf lsd jflkadjsf + +Markdown also allows 2 spaces at the end of a line for a line break. +THis may not work with many text editor options that strip trailing white space + + +### Add an empty line for a paragraph break + +```markdown +this is a very long line that has + +over 80 characterslasdfja asldfja + +sljf lsd jflkadjsf +``` + +Rendered as: + +this is a very long line that has + +over 80 characterslasdfja asldfja + +sljf lsd jflkadjsf diff --git a/learning_objectives/lo-ch-05.qmd b/learning_objectives/lo-ch-05.qmd index 2bf5ff6..816f536 100644 --- a/learning_objectives/lo-ch-05.qmd +++ b/learning_objectives/lo-ch-05.qmd @@ -1,7 +1,7 @@ -1. Create RProjects in Rstudio using `here` to define robust file paths. -2. Detect the basic components of a dynamic document in Jupyter Notebooks and in R Markdown -3. Explain markdown usage in relation to dynamic documents -4. Differentiate between code chunks and code cells in Rmarkdown and Jupyter Notebooks. -5. Select appropiate code chunk options for RMarkdown. -6. Use semantic line breaks for version control files -7. Specify metadata in the YAML header block +1. Create RProjects in RStudio using `here` to define robust file paths. +2. Detect the basic components of a dynamic document in Jupyter Notebooks and in R Markdown. +3. Explain markdown usage in relation to dynamic documents. +4. Differentiate between code chunks and code cells in RMarkdown and Jupyter Notebooks. +5. Select appropriate code chunk options for RMarkdown. +6. Use semantic line breaks for version control files. +7. Specify metadata in the YAML header block. diff --git a/learning_objectives/lo-ch-06.qmd b/learning_objectives/lo-ch-06.qmd index 5fdd4d1..20bbe50 100644 --- a/learning_objectives/lo-ch-06.qmd +++ b/learning_objectives/lo-ch-06.qmd @@ -1,4 +1,4 @@ -1. Understand how Quarto extend RMarkdown documents functionalities. +1. Understand how Quarto extend R Markdown documents functionalities. 2. Explore different data science products to communicate your results: slides, blogs and books. 3. Create slides using Jupyter Notebook and Quarto slides with `reveal.js` 4. Create a Jupyter Book and a Quarto books. diff --git a/lectures/0-jupyterlab-orientation-intro.qmd b/lectures/0-jupyterlab-orientation-intro.qmd index f27a80b..97a8f18 100644 --- a/lectures/0-jupyterlab-orientation-intro.qmd +++ b/lectures/0-jupyterlab-orientation-intro.qmd @@ -7,6 +7,8 @@ jupyter: python3 {{< include ../learning_objectives/lo-ch-00.qmd >}} +**Platform in focus** Jupyter Lab + ## What is JupyterLab The most rudimentary interaction with programming languages diff --git a/lectures/0-rstudio-orientation-intro.qmd b/lectures/0-rstudio-orientation-intro.qmd index 945f9fe..ba7338f 100644 --- a/lectures/0-rstudio-orientation-intro.qmd +++ b/lectures/0-rstudio-orientation-intro.qmd @@ -6,6 +6,8 @@ title: "RStudio Orientation" {{< include ../learning_objectives/lo-ch-00-rstudio.qmd >}} +**Platform in focus** RStudio IDE + ## What is RStudio RStudio is another integrated development environment (IDE), @@ -38,7 +40,7 @@ you can run RStudio as a desktop application. ## RStudio Panes -RStudio follows a "four-pane datascience" IDE view: +RStudio follows a "four-pane data science" IDE view: 1. Editor: Where you will type your code and work with other source documents 2. Environment: Contains the variables in your current session diff --git a/lectures/1-MDStools-bash-filesystem.qmd b/lectures/1-MDStools-bash-filesystem.qmd index 8bf2bf8..143a7dc 100644 --- a/lectures/1-MDStools-bash-filesystem.qmd +++ b/lectures/1-MDStools-bash-filesystem.qmd @@ -7,6 +7,8 @@ jupyter: python3 {{< include ../learning_objectives/lo-ch-01.qmd >}} +**Platform in focus** BASH + ## Introduction Welcome to Computer Platforms in Data Science! @@ -360,7 +362,7 @@ Which command would move you to the parent directory? ``` A. cd . -B. cd .. +B. cd .. C. cd ~ D. cd ``` diff --git a/lectures/2-git-github-ssh-basic-workflow.qmd b/lectures/2-git-github-ssh-basic-workflow.qmd index 59b91b0..476ccc2 100644 --- a/lectures/2-git-github-ssh-basic-workflow.qmd +++ b/lectures/2-git-github-ssh-basic-workflow.qmd @@ -7,6 +7,8 @@ jupyter: python3 {{< include ../learning_objectives/lo-ch-02.qmd >}} +**Platform in focus** Git and GitHub + ## Introduction In this lecture, you'll learn how to manage project changes, collaborate effectively, and maintain a complete project history using Git and GitHub. Let's get started! @@ -25,21 +27,21 @@ Let's start with some definitions: ### Definitions -**Secure SHell (SSH) `ssh`** is a secure method commonly used for remotely logging into another computer. +**Secure SHell (SSH) `ssh`** is a secure method commonly used for remotely logging into another computer. **The server** is the machine you are connecting to via SSH, which passively waits for incoming connections. **The client** is usually your own machine, which initiates the contact with the server. -## What are some password-based authentication +### What are some password-based authentication Passwords are often short and relatively easy to guess or "break." For instance, consider a password with 12 characters, where each character can be one of 26 uppercase letters, 26 lowercase letters, 10 digits, or approximately 10 special characters. This results in around 70 possible choices per character, making the total number of possible combinations approximately $70^{12}\approx 10^{22}$. While this is an extraordinarily large number, patterns in password creation can make them more predictable and easier to guess. More detailed discussions on security and privacy will be covered in future courses. -## SSH key-based authentication +### SSH key-based authentication SSH key-based authentication involves two components: *a public key* and a *private key*. These keys have an asymmetrical relationship. *The public key* cannot decrypt messages that are encrypted using the *private key*. Conversely, the *private key* can decrypt messages that are encrypted using the *public key*. This setup ensures secure communication by allowing only the holder of the private key to access messages encrypted with the *public key*. -## Understanding public key private key concepts +### Understanding public key private key concepts Consider the public key not as a key but as a padlock. You can make copies of this padlock and place them wherever you like. To secure another machine with your padlock, you would copy it to the `authorized_keys` file in the `~/.ssh` folder on that machine. Conversely, think of the private key as an actual key, which is used to unlock the padlock stored on the other machine. This arrangement ensures secure access to the machine that holds the corresponding padlock. @@ -47,7 +49,7 @@ Consider the public key not as a key but as a padlock. You can make copies of th *source: http://blakesmith.me/2010/02/08/understanding-public-key-private-key-concepts.html* -## You can put your lock at many places +### You can put your lock at many places As long as you are using the same lock (public key), you will be able to open it with the same private key. @@ -55,11 +57,11 @@ you will be able to open it with the same private key. ![](img/keys_2.png) *source: http://blakesmith.me/2010/02/08/understanding-public-key-private-key-concepts.html* -## How the lock works +### How the lock works SSH keys are generated using the `ssh-keygen` command, which creates a pair consisting of a private key (usually named `id_rsa`) and a public key (typically called `id_rsa.pub`). You can distribute copies of the public key, which acts like a padlock, to other machines. When you attempt to connect, the other machine encrypts a challenge message using your public key. To gain access, you must demonstrate that you can decrypt this message, thereby proving possession of the corresponding private key. This process ensures that the connection is both secure and authorized. -## Why SSH keys over passwords +### Why SSH keys over passwords SSH keys use the [RSA cryptosystem](https://en.wikipedia.org/wiki/RSA_(cryptosystem)) @@ -67,22 +69,22 @@ The private key is much longer than a password. A standard now is 4096-bit keys, Aside: Quantum computers will be able to break RSA encryption. It is very hard to predict whether this is years or decades away. -## Keeping your private key safe +### Keeping your private key safe -The `ssh-keygen` command allows you to add a layer of security to your private key by setting a password or passphrase. +The `ssh-keygen` command allows you to add a layer of security to your private key by setting a password or passphrase. -It's crucial that this passphrase remains confidential and not shared with anyone. +It's crucial that this passphrase remains confidential and not shared with anyone. If your private key were to fall into the wrong hands, the passphrase would still be required to use it, providing an additional level of security. -## Different type of keys +### Different type of keys There may be different methods of how to generate private-public key pairs. The [GitHub SSH key generation instructions](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) uses the `-t` flag to indicate the "type" of key to create, -in thier instructions they use the `ed25519` algorithm to create private and public values. +in their instructions they use the `ed25519` algorithm to create private and public values. -The general premice to SSH keys is the same, there is a private key that stays on your machine and is never shared with other people, +The general premie to SSH keys is the same, there is a private key that stays on your machine and is never shared with other people, and the public key you can freely share that will be used to authenticate the machine with the private key on it. ### Authentication vs. encryption @@ -94,6 +96,72 @@ This authentication process is distinct from the encryption of data transmitted ![](img/password_strength.png) *source - https://xkcd.com/936/* + +### tl;dr SSH Key Setup + +The [GitHub SSH key generation instructions](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) +walks through how to create, configure, and set up your SSH keys. +Below is a summary of the steps from the GitHub documentation and uses the default set of configurations. + +#### Create your SSH key + +```bash +ssh-keygen -t ed25519 -C "your_email@example.com" +``` + +#### Configure Your Remotes + +Configure github.com and github.ubc.ca to use your new key. + +```bash +touch ~/.ssh/config +``` + +Open the `~/.ssh/config` and add the following lines: + +::: {.panel-tabset} + +## MacOS + *nix + +``` +Host github.com + AddKeysToAgent yes + UseKeychain yes + IdentityFile ~/.ssh/id_ed25519 + +Host github.ubc.ca + AddKeysToAgent yes + UseKeychain yes + IdentityFile ~/.ssh/id_ed25519 +``` + + +## Git Bash (Windows) + +``` +Host github.com + AddKeysToAgent yes + IdentityFile ~/.ssh/id_ed25519 + +Host github.ubc.ca + AddKeysToAgent yes + IdentityFile ~/.ssh/id_ed25519 +``` +::: + +#### Add Your Key to the Remotes + +Use your `~/.ssh/id_ed25519.pub` file and copy paste the contents to register your SSH key +in **BOTH** the `github.com` and `github.ubc.ca` repositories + +:::{.callout-tip} +We suggest you change the theme for `github.com` and `github.ubc.ca` so one uses +a light theme and the other uses a dark theme so you can easily recognize which +site you are using. + +You can change these in your account Settings. +::: + ## What is a Git repository? A Git repository stores all the necessary information @@ -207,21 +275,21 @@ you are automatically getting a copy of `.git` and thus, working on a Git. ## Cloning repositories from GitHub -If you only need to make minor edits, you can directly edit files on GitHub using their online editor, eliminating the need to clone the repository. +If you only need to make minor edits, you can directly edit files on GitHub using their online editor, eliminating the need to clone the repository. However, if you intend to run the code on your own machine and make modifications there, you will need to clone the repository to facilitate these changes. ### Steps to follow: -* Step 1: start by navigating to the specific repository on GitHub. +* Step 1: start by navigating to the specific repository on GitHub. -* Step 2: Click the green "Clone or download" button, ensuring the pop-up indicates "Clone with HTTPS" (we will cover SSH authentication later). +* Step 2: Click the green "Clone or download" button, ensuring the pop-up indicates "Clone with HTTPS" (we will cover SSH authentication later). -* Step 3: Copy the provided URL to your clipboard. +* Step 3: Copy the provided URL to your clipboard. -* Step 4: Next, open Bash on your laptop and navigate to the directory where you wish to clone the repository using the cd command. +* Step 4: Next, open Bash on your laptop and navigate to the directory where you wish to clone the repository using the cd command. -* Step 5: Type git clone followed by the URL, for example `git clone https://github.com/github_username/repository_name.git`and press enter. +* Step 5: Type git clone followed by the URL, for example `git clone https://github.com/github_username/repository_name.git`and press enter. * Step 6: After the download completes, you can use cd to enter the cloned directory and ls to verify that the contents match what you saw online. @@ -326,7 +394,7 @@ D. A directory is always a repository, but a repository cannot be your current w To make a change to a file inside a Git repository, start by editing it as you would any other local file. For example, you can use a text editor with a command like `code name-of-file.txt`. -Once you have made changes to a file in a local Git repository (such as on your laptop), you need to tell Git that you want to record these changes in the version history. This process involves two main steps: **adding** the changes to the staging area and then **committing** them. +Once you have made changes to a file in a local Git repository (such as on your laptop), you need to tell Git that you want to record these changes in the version history. This process involves two main steps: **adding** the changes to the staging area and then **committing** them. Committing saves the differences between the current and previous version of the file along with a message describing what you did. These changes are saved in the hidden .git directory within the Git repository. @@ -470,4 +538,3 @@ D. git status ## Attribution 1. [Happy Git and GitHub for the useR by Jenny Bryan and the STAT 545 TAs](http://happygitwithr.com/) 2. [Software Carpentry](https://software-carpentry.org/), specifically the Unix Shell and Git lessons - diff --git a/lectures/3-git-history-merge-conflicts-stash.qmd b/lectures/3-git-history-merge-conflicts-stash.qmd index c571fd7..36676cb 100644 --- a/lectures/3-git-history-merge-conflicts-stash.qmd +++ b/lectures/3-git-history-merge-conflicts-stash.qmd @@ -1,12 +1,18 @@ --- +<<<<<<< HEAD title: Getting groovy with Git and GitHub jupyter: python3 +======= +title: "Git: History, Conflicts, and Ignores" +>>>>>>> 0c38f5839173dd775aebc526f4f9927dd7cb0164 --- ## Learning outcomes {{< include ../learning_objectives/lo-ch-03.qmd >}} +**Platform in focus** Git and GitHub + ## Introduction In this lecture, we'll explore how to view your Git history and restore older versions of files. Understanding your project's history is crucial for tracking changes and fixing mistakes. Let's dive into the different methods for accessing your commit history and how to revert to previous file versions when needed. @@ -16,9 +22,9 @@ In this lecture, we'll explore how to view your Git history and restore older ve Do you remember the commit messages that we used to write at the time of making a commit, for saving the state of a project? It is possible to have a look at the history of the full project with any of these 3 different methods -You can view the Git history of a project in two main ways. +You can view the Git history of a project in two main ways. -On the remote, you can use GitHub through the repository's code commit view. +On the remote, you can use GitHub through the repository's code commit view. On your computer, you can use Jupyter Lab through the repository's code commit view or the terminal by using the git log command. @@ -675,4 +681,4 @@ that undoes an old one. 7. How do you restore an older version of a file in a Git repository? :::: -::: \ No newline at end of file +::: diff --git a/lectures/4-quarto_github_pages.qmd b/lectures/4-quarto_github_pages.qmd index eae2623..1a280dd 100644 --- a/lectures/4-quarto_github_pages.qmd +++ b/lectures/4-quarto_github_pages.qmd @@ -6,12 +6,14 @@ title: Introduction to Quarto and Github Pages {{< include ../learning_objectives/lo-ch-04.qmd >}} +**Platform in focus** Quarto and GitHub + :::{.activity} ::::{.activity-header} ## Lecture 4 Activity 1 :::: ::::{.activity-container} -Have you ever considered creating a website to showcase your projects, research, or personal brand? +Have you ever considered creating a website to showcase your projects, research, or personal brand? :::: ::: @@ -190,6 +192,18 @@ If you do create a new folder, remember you will need to `cd` into it. :::: ::: +Your folder structure should look like the one below. +If it does not, you need to move your files to the correct path, +or start over + +``` +USERNAME.github.io/ +├── about.qmd +├── index.qmd +├── _quarto.yml +└── styles.css + +``` The most important file is the `_quarto.yml` file, this is how quarto knows you are using a quarto project. @@ -226,15 +240,15 @@ D. styles.css The `_quarto.yml` file defines many (if not all) of the options you can use to tweak how your website (or project) renders or behaves. -This file is a YAML another mfile. +This file is a YAML file. This is a common format many tools use to provide configuration settings. -Just note the indentation, white space, and dashes `-` are all really imporatnat and implies different things. -YAML is a "recursive acronum", it stands for "YAML Ain't Markup Language". +Just note the indentation, white space, and dashes `-` are all really important and implies different things. +YAML is a "recursive acronym", it stands for "YAML Ain't Markup Language". You can go through the official quarto website documentation guide for more information: https://quarto.org/docs/websites/ -### Renering your website +### Rendering your website At the end of the day, we need a series of `.html` files that our web browser can open and render. @@ -243,12 +257,19 @@ Luckily `quarto` can take the source documents and create the `.html` files need There are 2 commands that will be helpful for us here: -- `quarto preview`: will "preview" the website, any changes to the files will automatically regnerate and refresh the website files +- `quarto preview`: will "preview" the website, any changes to the files will automatically regenerate and refresh the website files - `quarto render`: will generate all the html files Think of `preview` as a way to quickly see how the website changes as you are working, and `render` as creating the "final" website from scratch. When using `quarto preview` your current terminal will be used to look for changes and re-render your site. +:::{.callout-tip} +If you are using VSCode, you may need to set the `render-on-save` option as a default, +in either the IDE, the `_quarto.yml` file or the individual `.qmd` document: + + +::: + ::: {.tip} ::::{.tip-header} Tip @@ -374,13 +395,10 @@ If you create a repository that is _just_ your github username, with only a `README.md` file in it. Github will render the `README.md` file into your github landing page. -Here is an example of a former MDS student's Github landing page: - -https://github.com/mrnabiz/ +Here is an example of a former MDS student's Github landing page and their accompanying repository. -and their accompanying repository: - -https://github.com/mrnabiz/mrnabiz +- Landing: +- Repository: ### Github Pages and Quarto @@ -424,7 +442,9 @@ $ quarto render Output created: docs/index.html ``` -Dont forget to save your `yaaml` file, re-`render`, remove the old `_site` folder, `add`, `commit`, and `push` your changes. +:::{.callout-important} +Don't forget to save your `yaml` file, re-`render`, remove the old `_site` folder, `add`, `commit`, and `push` your changes. +::: On your main github repo, you'll notice a burnt orange dot next to the latest commit hash. This is github's continuous integration / deployment system creating and publishing your site. @@ -611,6 +631,6 @@ D. docs Quarto works with a few IDEs and text editors. -- VSCode extension: https://marketplace.visualstudio.com/items?itemName=quarto.quarto +- VSCode extension: - RStudio: Comes with RStudio (you can also manually install it as a terminal application) - Jupyter Lab: there is no official jupyter quarto extension, but quarto can work with jupyter notebooks in the command line tool diff --git a/lectures/5-rstudio-projects-notebooks.qmd b/lectures/5-rstudio-projects-notebooks.qmd index 0fea42c..e3e47f1 100644 --- a/lectures/5-rstudio-projects-notebooks.qmd +++ b/lectures/5-rstudio-projects-notebooks.qmd @@ -1,6 +1,5 @@ --- -title: 'Dynamic documents: from markdown to Rmarkdown documents' -jupyter: python3 +title: 'RStudio Projects and Literate Programming Documents' --- ## Learning outcomes @@ -9,41 +8,54 @@ jupyter: python3 **Platform in focus** RStudio IDE -# RStudio - -You have the [following report]() and you have realized that there was an error in one of the calculations you did to generate it. This means that several results are going to be modified. +:::{#activity1 .activity} +::::{.activity-header} +## Activity 1 +:::: +::::{.activity-container} +You have the +[following report](https://ttimbers.github.io/breast_cancer_predictor/doc/breast_cancer_predict_report.html) +and you have realized that there was an error in one of the calculations you did to generate it. This means that several results are going to be modified. Let's run the scripts again and replace the plots and results affected. I hope not to have more errors! Do you think that should be other way to solve this? +:::: +::: -Introducing RStudio! An Interactive Development Environment (IDE), +## RStudio + +We have already seen RStudio in [Lecture 0b](0-rstudio-orientation-intro.html). +RStudio is an Interactive Development Environment (IDE), just like JupyterLab. This means it comes with some conveniences, such as code completion when pressing TAB, highlighting of syntax errors, and debugging tools. + +![](img/rstudio_ide.png) + The interface is also similar to JupyterLab (and many other IDEs) as it has panels for browsing files, reading help documentation, and running code in a console. You are already familiar with the concept of many of these from running JupyterLab, -but they are arranged slightly different in RStudio, -so you'll need to familiarize yourself with this new interface. +but they are arranged slightly different in RStudio. While you can write both R and Python code in RStudio, we'll use it mostly for R in MDS. -## The RStudio interface +Please see the +[Lecture 0b: Recommended RStudio Setup](0-rstudio-orientation-intro.html#recommended-rstudio-setup) +to configure the IDE with safe defaults on launch. -The default panel layout is shown below. -You can customize where the individual panels go, -and if you have opend RStudio earlier, -they might not be arranged in exactly this layout. -Each panel has multiple tabs -and here we are highlighting the tabs -we will use the most in each panel. +![](img/rstudio-global_options-general.png) -![](img/rstudio_ide.png) +### Editing RStudio settings to not save workspace data -The "Editor" tab is where you can write R code that will be saved as a file. In contrast, the "Console" tab is where R commands are sent and evaluated by R, but any R code written here is not saved. The "Environment" tab displays the currently defined variables, providing a snapshot of your workspace. The "Files" tab allows you to navigate your file system, and this panel also includes the "Plots" and "Help" tabs, which show figures you create and provide function documentation, respectively. +By default, +R asks us to save workspace data and load it when we start a new session. +In general, +we **NEVER** want to save our workspace (or load a previous one). +We can avoid accidentally doing this +through turning it off in the RStudio global options in the Tools menu. ## R Scripts @@ -88,20 +100,11 @@ and clicking to run it plot(cars) ``` -## Editing RStudio settings to not save workspace data - -By default, -R asks us to save workspace data and load it when we start a new session. -In general, -we NEVER want to save our workspace (or load a previous one). -We can avoid accidentally doing this -through turning it off in the RStudio global options in the Tools menu. - -## Where are we? +## R Working Directory Different from JupyterLab, when you open RStudio you open a `.R` or `.Rmd` file the RStudio, -the current working directory is **not** neccesarily the project working directory, +the current working directory is **not** necessarily the project working directory, or the directory of the file you opened. **EVERY SESSION** you need to tell RStudio where you are working. Especially if you are loading other files outside of your `.R` or `.Rmd` file. @@ -109,39 +112,73 @@ or the directory of the file you opened. So, where are you when you open RStudio? You can find out one of two ways: 1. type `getwd()` in the console - -2. In the files pane, click the cog/More button and then click "Go To Working Directory" +2. In the files pane, click the Cog/More button and then click "Go To Working Directory" ### Setting the working directory -### Three Ways to follow: -* Way 1. *Use an RStudio project*: RStudio projects help organize your work and manage your files and settings. More details on RStudio projects are provided below. +There are three ways to set your working directory: -* Way 2. *Set the working directory through the Files pane*: Navigate to the desired location in your file structure using the Files pane. Once there, click the cog/More button and select "Set As Working Directory". +* Method 1. *Use an RStudio project* (Best Option): RStudio projects help organize your work and manage your files and settings. More details on RStudio projects are provided below. -* Way 3. *Set the working directory via the Session menu*: Go to the Session menu, click "Set Working Directory", and then choose "Choose Directory". This will open a file browser where you can navigate to and select the desired directory. +* Method 2. *Set the working directory through the Files pane*: Navigate to the desired location in your file structure using the Files pane. Once there, click the cog/More button and select "Set As Working Directory". -* Way 4. *Set the working directory using the console*: Type `setwd("PATH")` in the console, replacing "PATH" with the path to your desired directory. Use this method with care, as explained in Jenny's [article on workflow vs. script](https://www.tidyverse.org/articles/2017/12/workflow-vs-script/). +* Method 3. *Set the working directory via the Session menu*: Go to the Session menu, click "Set Working Directory", and then choose "Choose Directory". This will open a file browser where you can navigate to and select the desired directory. -## RStudio projects +* Method 4. *Set the working directory using the console*: Type `setwd("PATH")` in the console, replacing "PATH" with the path to your desired directory. Use this method with care, as explained in Jenny's [article on workflow vs. script](https://www.tidyverse.org/articles/2017/12/workflow-vs-script/). + +:::{.callout-important} +The RStudio project method (Method 1) is the **only** way you can consistently set +a working directory that also works in a collaborative workflow. +::: + +## RStudio Projects An RStudio project is essentially a directory containing a special file: `*.Rproj`, which holds metadata for R about this project. RProjects set the working directory by default to be the root of the project -and we recommend using it together with the `here()` package, -which can easily create paths relative to root project dir: +and we recommend using it together with the `{here}` package, +which can easily create paths relative from the root project dir -```R -install.packages("here") +### The {here} Package + +When you load the `{here}` package, +it will echo the current project root directory. +You should always confirm that this is the expected starting location +for your project. + +```{r} +# install.packages("here") library(here) +``` -# If you data is in project_root/files/data/ -# this will make sure that anyone using your project -# will get the correct path +If your `iris.csv` data is in `project_root/files/data/` +this will make sure that anyone using your project +will get the correct path + +```{r} here("files", "data", "iris.csv") ``` -## Creating RStudio projects +The `{here}` package finds the project root directory, +and allows you to provide a relative path to any of the files from that +root project directory. +In turn, `here::here()` will return the absolute path to the specified +file or directory. + +This method of having the user code always use a relative path, +and the function resolving to an absolute path is a way +files can be located across different computers, +especially when working on a collaborative project. + +Another benefit of the `here::here()` function, +is that if your data location does not change, +and you move your analysis file into a different folder, +you would not need to change your code, +because the relative path to the data from the root directory +stayed the same. + + +### Creating RStudio projects You can either create an empty RStudio project, or clone an existing GitHub repo, @@ -151,6 +188,8 @@ which also allows you to use the RStudio interface to control Git. 2. Copy the URL needed to clone that repo from GitHub 3. Then select **File > New Project > Version Control > Git** and fill in. +![](img/new_project.png) + Now you can use RStudio as a Git client via the colorful icon in the toolbar of the edit panel. This includes adding parts of a document do the staging area, committing, pushing, pulling, and more. @@ -158,10 +197,15 @@ Using terminal Git will still be more reliable, since it is available anywhere Git is installed, but feel free to use the RStudio (or VS Code) interfaces as a complement. -![](img/new_project.png) - -## Dynamic documents with Jupyter Notebooks and RMarkdown +Creating an RStudio Project this way is the same way as running `git clone` +on the remote repository and then selecting "Existing Directory" +in the New Project Wizard. +The benefit of creating the repository on the remote first, +is the same reason we discussed in the Git + GitHub chapters, +and saves us the steps of running `git init` and manually hooking +up the remote repositories. +## Literate programming documents Our journey to understand what is a dynamic document is linked with the literate programming paradigm [2] proposed bu Knuth in 1984 [1]. @@ -170,36 +214,49 @@ Our journey to understand what is a dynamic document is linked with the literate > > --Donald Knuth. "Literate Programming (1984)" in Literate Programming. CSLI, 1992, pg. 99. -In the literate programming paradigm, an author has two (equally important) tasks [2]: -1. write program code to do computing, and -2. write narratives to explain what is being done by the program code +In the **literate programming** paradigm, an author has two (equally important) tasks [2]: + +1. Write program code to do computing +2. Write narratives to explain what is being done by the program code -Going back to our first question in lecture 5, we could agree to only mantain the code used to generate the plots and results produced as part of the analysis. But we will also agree that this code, even if we include comments and docstrings, it is not possible to be easily read for an human. Adding a narrative to the results will not only improve the capacity to understand the analysis and serve as a detailed documentation if not will allow to easily render again the document after changes in the code. +Going back to [first exercise of this lesson](5-rstudio-projects-notebooks.html#activity-1), +the best way would be to make sure all the plots and results can be replicated by the code +and we can reference the generated artifacts in the report. -The idea of accompanying the code with so much detailed text, was not widely adopted for software projects [3] but turn into an amazing tool for data scientists who frecquently uses these documents as reports products of their analysis. The rendering of the plots as figures next to the text in a manuscript allows giving valuable context to the visualizations. +However, code, even with documentation strings (i.e., docstrings), +may not be easily understandable for people. +Adding a narrative to the results will not only improve the capacity to understand the analysis and serve as a detailed documentation if not will allow to easily render again the document after changes in the code. -Most of you are probably familiarized with dynamic documents as notebooks, our well known **Jupyter Notebooks** (`.ipynb`) and **RMarkdown documents** (`.Rmd`) . +The idea of accompanying the code with so much detailed text, +was not widely adopted for software projects [3] +but turn into an amazing tool for data scientists who frequently uses these documents +as reports products of their analysis. +The rendering of the plots, figures, and tables next to the text in a manuscript +allows giving valuable context to the visualizations. -From the two elements that compose the dynamic documents, there are three principal components that are the similar for Jupyter Notebooks and RMarkdown documents: +Most of you are probably familiarized with dynamic literate programming documents as **Jupyter Notebooks** (`.ipynb`), **RMarkdown documents** (`.Rmd`), and +**Quarto documents** (`.qmd`). + +From the two elements that compose the dynamic documents, there are three principal components that are the similar for +Jupyter Notebooks, +RMarkdown documents, +and Quarto documents: * The narrative is created with text formatted with **markdown** * The code can be run and it is combined with the text in two different ways: - * Interleaved as part of the text, called **inline code** (only present in RMarkdown documents). - * In separated code blocks called **code cells** (Jupyter Notebooks) or **code chunks** (RMarkdown) that allow rendering code. + * Interleaved as part of the text, called **inline code**. + * In separated code blocks called **code cells** (Jupyter Notebooks, Quarto), + or **code chunks** (RMarkdown) that allow rendering code. ![alttag](img/rmd-inline.png) - *example inline code in RMarkdown* - https://rmarkdown.rstudio.com/articles_intro.html + *Example inline code in RMarkdown* - ![alttag](img/rmd-code.png) - *example code chunk in RMarkdown* - https://rmarkdown.rstudio.com/articles_intro.html - - - - + *Example code chunk in RMarkdown* - :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 1 +## Activity 2 :::: ::::{.activity-container} Let's go back to the document of the first activity. How can we make changes to one of the calculation? @@ -215,70 +272,66 @@ D. No, it's best to keep the original report unchanged :::: ::: -## ~~R~~Markdown +## Markdown > The overriding design goal for Markdown’s formatting syntax - is to make it as readable as possible. - The idea is that a Markdown-formatted document should be publishable as-is, + is to make it as readable as possible. + The idea is that a Markdown-formatted document should be publishable as-is, as plain text, without looking like - it’s been marked up with tags or formatting instructions. --John Gruber + it’s been marked up with tags or formatting instructions. +> +> --John Gruber -Jupyter Notebooks and RMarkdown documents -use markdown to format the text, -but markdown has it's own identity! -You already known markdown files really well, -at least you are using them as the `README.md` file -of most of the project you have created. -Let's start by describing it and then we can talk about RMarkdown. +Jupyter Notebooks, RMarkdown documents, and Quarto documents +use markdown to format the text. +But, markdown has it's own identity! +We've already worked with markdown many times, +including the `README.md` files +of most of the projects we have created. -[Markdown](https://glosario.carpentries.org/en/#markdown) is basically a [markup language](https://glosario.carpentries.org/en/#markup_language) +[Markdown](https://glosario.carpentries.org/en/#markdown) is a +[markup language](https://glosario.carpentries.org/en/#markup_language) that you can use to add formatting elements to easy to read plain-text text documents [4,5]. Markdown files have their own file extension (`.md`) -that could be easily converted to [`HTML`](https://glosario.carpentries.org/en/#html) -and because of that, rendered as a web page. +that could be easily converted other formats. +One example is +[HTML](https://glosario.carpentries.org/en/#html), +that can be rendered as a web page. -Now we can start to understand markdown power: +Now we can start to understand the power of markdown, and why it is so popular. -1. Its simple **formatting syntax** allows translation of content to `HTML` - without having to be an expert. +1. **Simple formatting syntax**: + Allows translation of content to HTML + (and many other formats) without having to be an expert. -2. It is **human readable**. -Compare markdown and `HTML`, which one - is easier to understand by reading only the code? +2. **Human readable**: + Compare markdown and `HTML`, + which one is easier to understand by reading only the code? -3. It is a **plain-text document**. -This means that it is open by mostly any editor, - making `.md` files quite reproducible. +3. **Plain-text document**: + This means that it is open by mostly any editor, + making `.md` files quite reproducible. -Converting the `.md` files to `HTML` documents (`.html`) - involves the use of specific designed software for that purpuse. - One of the most popular markdown converters is called [pandoc](https://pandoc.org/). - Pandoc can not only convert markdown to `HTML` - if not to other popular and widely used formats - as Word documents (`.docx`) and PDFs (`.pdf`). +The markdown, `.md`, file is a common file format that can be used by tools +such as []`pandoc`](https://pandoc.org/) +to convert to many other types, +e.g., `pdf`, `html`, `epub`, `docx`, `ppt`, etc. -If your goal is to render the final `HTML` document, +If your goal is to render the final `.html` document, keep in mind that you will need an extra step to view the file in a web browser. -Also, `HTML` documents can not only be render as web pages, +Also, `.html` documents can not only be render as web pages, you can create with them blogs, books, articles. -At the end of lecture 8 we will learn how to use GitHub Pages -to render part of the `HTML` documents that we are going to generate. -Therefore, some editors as RStudio, JupyterLab and VS Code -will resume many steps at the same time -rendering and displaying the `.md` file directly on the console. -GitHub does something similar with the `README.md` file -that appears automatically rendered as the first page of your GitHub repository. +We saw this in the [Quarto and Git Hub Pages Lesson](4-quarto_github_pages.html). +GitHub does will also render the `README.md` file of **every** directory in a repository. ![](img/markdown-flowchart.png) Image extracted from: https://www.markdownguide.org/getting-started/ - - :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 2 +## Activity 3 :::: ::::{.activity-container} What are the primary benefits of using Markdown for formatting text documents? @@ -293,60 +346,46 @@ D. It is not suitable for web pages :::: ::: - - -## Markdown and its flavours - -We love markdown! But not all are good news... - -First, we will mention that the official markdown syntax documentation - is not unambiguosly specified. -What does this means? That there some implementations of [markdown have diverged](https://babelmark.github.io/faq/) [6]. -Meaning that the same markdown syntax -can render different outputs in relation to the technology you are using. -How a `README.md` file renders on GitHub could not be equal -as how it will look like after being converted by `pandoc`. -The webpage [Babelmark III](https://babelmark.github.io) -enable to detect this differences through many implementations, -also listing some of the most well known pitfalls. - -Secondly, other way in which Markdown has diverged - it is reflected in the number of **extensions**. -This means that there is specific markdown syntax -created for some implementations that will not work for other technologies. - -Don't panic! In MDS we handle a limited number -of markdown flavours (in order of appeareance in DSCI 521): - -1. [Jupyter Notebook (and Jupyter Book) Markdown](https://jupyterbook.org/en/stable/file-types/markdown.html): is an extension of a Markdown flavour -called [CommonMark Markdown](https://commonmark.org/). -If you completed the tutorial that we were recommending on lecture 0 -you have some practice on it! https://commonmark.org/help/tutorial/ - +### Markdown and its flavours + +The official markdown specifications is ambiguous which lead to +different implementations of +[markdown](https://babelmark.github.io/faq/) [6]. +The same markdown syntax +can render different outputs depending on the rendering engine. +For example, a `README.md` file can render differently on GitHub than from `pandoc`. +[Babelmark III](https://babelmark.github.io) +is a website that can detect and return all the different markdown variants, +as well as list well known pitfalls. +Markdown also has many **extensions**, and not all extensions will +work on all flavors of markdown. + +In MDS we handle a limited number +of markdown flavours (in order of appearance in DSCI 521): + +1. [Jupyter Notebook (and Jupyter Book) Markdown](https://jupyterbook.org/en/stable/file-types/markdown.html): + is an extension of a Markdown flavour called + [CommonMark Markdown](https://commonmark.org/). + If you completed the tutorial from Lecture 0, + you already have some practice on it! + 2. [GitHub flavoured Markdown](https://github.github.com/gfm/#what-is-github-flavored-markdown-). -To write issues, pull request, render your `README.md` file in the first page of your repository -you will be using this Markdown flavour. -It is a (different) extension of the CommonMark Spec -also used for Jupyter Notebooks. - -Read how to use it [here!](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) - - + To write issues, pull request, render your `README.md` file in the first page of your repository + you will be using this Markdown flavour. + It is a (different) extension of the CommonMark Spec + also used for Jupyter Notebooks. + - Read how to use it + [here!](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) 3. [R Markdown documents](https://bookdown.org/yihui/rmarkdown/markdown-syntax.html). R Markdown is an implementation of [Pandoc Markdown (with a wide list of interesting extensions!)](https://pandoc.org/MANUAL.html#pandocs-markdown) Don't panic! The important take-home message of this is that you should always check the documentation of the technology you are using. Even if for most of the cases could be quite similar, there is not an universal markdown cheatsheet or proceeding -that will possible to be used with all of them! - - +that will possible to be used with all of them. - - - -#### A good practice when writting in markdown: **semantic line breaks** +#### A good practice when writing in markdown: **Semantic line breaks** > **Hints for Preparing Documents** @@ -357,29 +396,36 @@ that will possible to be used with all of them! > > — Brian W. Kernighan, 1974 [9] -You need 2 spaces to create a line break using markdown. -This means that you can actually break a line -without creating a line break in the rendered file. +Semantic line breaks help your source code files with version control, +while also putting breaks that can make it easier to read the source file. +When version controlling markdown files, +the semantic line breaks make it easier to see where the actual differences +and changes to the file were made when running `git diff`. + +How markdown renders a new line break: -The use of shorter phrases could be a benefit when you are version controlling files. +- Use the `
` HTML tag at the end of a line +- Place 2 spaces at the end of a line followed by a new line +- Use an extra empty line for a line break +We provide more examples in the [Semantic Line Breaks Chapter](../appendix/semantic_line_breaks.html) + :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 3 +## Activity 4 :::: ::::{.activity-container} -### Activities Question: Check the documentation of [Pandoc Markdown](https://pandoc.org/MANUAL.html#pandocs-markdown) and [GitHub Flavour Markdown](https://github.github.com/gfm/#what-is-github-flavored-markdown-). Which of the following is true? - + A. Pandoc Markdown and GitHub Flavour Markdown render the same way. B. GitHub Flavour Markdown supports emojis, while Pandoc Markdown does not. - + C. Pandoc Markdown cannot be converted to HTML. - + D. Both render exactly the same across all platforms. @@ -405,8 +451,8 @@ whereas "Knit" first restarts the R session and runs through the entire document from the beginning to make sure it will work when you send it to someone else. -### A helpful hint for successfully working with R Markdown documents +:::{.callout-tip} Given that you need to render the entire document to see your Markdown and LaTeX rendered, **it is important to "knit" often as you make changes**. If you make an error in a LaTeX equation for example, @@ -415,7 +461,7 @@ and you will not get to see the rendered document. So by knitting/rendering often you will know where the last changes you made are and then will be able to easily identify and fix your errors. - +::: #### R Markdown (`.Rmd`) -> Markdown (`.md`) @@ -435,7 +481,7 @@ which is responsible for creating the finished format. :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 4 +## Activity 5 :::: ::::{.activity-container} ### Activities @@ -460,14 +506,15 @@ D. There is no difference between `.Rmd` and `.md` files in terms of functionali R Markdown is a "flavor" of Markdown that allows you to run R code in addition to supporting all the Markdown syntax that you have already learned. -R Markdown documents (`.Rmd`) are most commonly run via `R Notebooks` -which work similarly to Jupyter notebooks + +R Markdown documents (`.Rmd`) work similarly to Jupyter notebooks in the sense that you can interleave writing code, formatted narrative text, and view output all in the same document. These are both examples of what it called "literate programming" where the goal is to mix code and paragraph text seamlessly. +R Notebooks are a little different. Let's see an example of this by creating a new R Notebook via `File -> New -> R Notebook`. As you can see, @@ -480,10 +527,21 @@ the plot shows up inside the notebook rather than in the side panel as when we ran the same line from within the script script. +:::{.callout-important} +The main difference in an R Markdown document and an R Notebook document, +is that R Markdown will "knit" the document in a new R session from top to bottom +**every time**. +The R Notebook, will convert whatever you have directly into HTML as-is. +It does not execute any code on "Preview". + +You can tell the difference by either looking at the YAML header of the document, +or in RStudio, you will see a different button (Knit vs Preview). +::: + :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 5 +## Activity 6 :::: ::::{.activity-container} Is an R Notebook the same as an .Rmd document? @@ -494,11 +552,7 @@ Is an R Notebook the same as an .Rmd document? ::: - - - - -### 1.2. Running, editing and creating code chunks +### Create, Run, and Edit Code Chunks Just like Jupyter notebooks, R Markdown has code cells, @@ -513,15 +567,23 @@ which for r looks like this `{r}`. Additional metadata can be included, for example a name to reference the code chunk: -```` -```{r my first code chunk} +```{r my-first-code-chunk} +#| echo: fenced x <- 5 x ``` -```` There are other language engines that can be used in RMarkdown, -you can learn more about that [here](https://bookdown.org/yihui/rmarkdown/language-engines.html). +you can learn more about that +[here](https://bookdown.org/yihui/rmarkdown/language-engines.html). + +:::{.callout-note} +R Markdown documents can also execute Python code, +you will need to have the R `{reticulate}` package installed, +and then point your R installation to the Python version you need. + +You can learn more about `{reticulate}`: +::: All code cells are run when you knit/render the entire document (like pressing "Run all" in JupyterLab). @@ -532,7 +594,7 @@ You can also run the code by clicking the green play button on the right-hand si :::{.activity} ::::{.activity-header} -## Lecture 5 Activity 6 +## Activity 7 :::: ::::{.activity-container} Clone a folder and try to render the .Rmd file. What happens? Try to use here() for the filepath. @@ -547,7 +609,7 @@ Clone a folder and try to render the .Rmd file. What happens? Try to use here() -### 1.3. Naming code chunks and R Markdown document sections +### Naming code chunks and R Markdown document sections When you include Markdown headers (using the `#` symbol) @@ -573,10 +635,10 @@ But in reality those names are not that useful and it is more helpful to give code chunks meaningful names. For example, in the code chunk below where we use a for loop to sum the numbers from 1 to 10, -we name the chunk "for loop sum". +we name the chunk `for loop sum`. -````R ```{r for loop sum} +#| echo: fenced # initialize sum to 0 loop_sum <- 0 @@ -587,11 +649,10 @@ for (i in seq(1:10)){ print(loop_sum) ``` -```` :::{.exercise} ::::{.exercise-header} -### Lecture 5 Exercise 1 +## Exercise 1 :::: ::::{.exercise-container} @@ -634,10 +695,7 @@ Questions: :::: ::: - - - -### 1.4. Code chunk options +### Code chunk options There are many code chunk options that you can set. These options let you customize chunk behavior, @@ -656,19 +714,22 @@ or locally for a specific chunk (these will override the global chunk options if they are contradictory). Global options are usually set in one chunk at the top of the document -and looks like this (this is a screenshot): +and looks like this: -````r + +````{r} +#| error: true +#| eval: false ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE) ``` ```` -Global chunk options are set by adding them as arguments to `knitr::opts_chunk$set(...)` -(put them in place of `...` and separate multiple options with a comma). -The only global chunk options set in this document is `echo = FALSE`, -which hides the code chunks and only shows the output, -something that can be useful for non-technical reports. +Global chunk options are set by adding them as arguments to `knitr::opts_chunk$set(...)`; +replace the `...` with comma separated options. +For example, if you set the global chunk option to `echo = FALSE`, +it will hide the code chunks and only shows the output. +This is something that can be useful for non-technical reports. Local chunk options are set by adding the options in the curly braces of a code chunk after the language engine and code chunk name. @@ -676,57 +737,83 @@ For example, to not display warnings in a single code chunk we would use the `warning = FALSE` code chunk as follows: -````r ```{r correlation no warning, warning = FALSE} # some R code that throws a warning cor( c( 1 , 1 ), c( 2 , 3 ) ) ``` -```` + +R Markdown and `knitr` also support the Quarto `#|` comment option. +At the beginning of your code chunk, +you can supply your chunk options with a comment. +This allows you to put many options (and long option text) on separate lines. +This plays well with version control and tries to follow semantic line break rules. + +For example, +you can re-write the `correlation no warning` chunk as + +```{r, correlation no warning 2} +#| echo: fenced +#| warning: false +cor( c( 1 , 1 ), c( 2 , 3 ) ) +``` + +:::{.callout-note} +The `#|` chunk option notation takes in a different format than the inline option. +E.g., we use YAML `true`, and `false` values, instead of R's `TRUE`, and `FALSE`. +We also use `:` to set key-value pairs, instead of `=`. + +Also for the `correlation no warning` examples above we needed to change the +name of the second chunk because documents cannot have chunks with the same name. +::: + +See this page for more information about chunk options: + ### A few tips and tricks -- R Markdown support inline evaluated code via the following syntax - ``` - Adding 3 to 4 gives `r 4 + 3`. - The value of `x` is currently `r x`. - ``` +- R Markdown (and Quarto) support inline evaluated code via the following syntax + +```{markdown} +Adding 3 to 4 gives `r 4 + 3`. +The value of `x` is currently `r x`. +``` + - Latex equations can be written the same way as in Jupyter notebooks and standard markdown documents. - - `$\alpha = 5$` for inline latex and `$$\alpha = 5$$` for a math block. - - When hovering over equations, - R will display the rendered equation in a pop up. + - `$\alpha = 5$` for inline latex and `$$\alpha = 5$$` for a math block. + - When hovering over equations, R Studio will display the rendered equation in a pop up. - R Markdown is built upon the Pandoc Markdown engine. This is useful to know since the [Pandoc manual](https://pandoc.org/MANUAL.html) is a great exhaustive resource for looking up anything Markdown related. - One of the features made available thanks to Pandoc is support for citations and bibliographies. - - Let's cite the R-package by typing `citation()` into the console, - and copying the BibTex citation into a new document - that we call `rstudio-demo.bib - and adding an identifier string (a key) before the first comma, - e.g. `r-lang`. - - Include the following field in the YAML metadata - in the beginning of the document: `bibliography: rstudio-demo.bib`, - then cite it somewhere in the text by adding `[@r-lang]`. - The bibliography will be appended to the document, - so it is advisable to add a heading saying `# References` - at the very end. -- When working with R Markdown - (and code in general) - be careful that you don't copy stylized quotation marks - because these will not work. + - Let's cite the R-package by typing `citation()` into the console, + and copying the BibTex citation into a new document + that we call `rstudio-demo.bib` + and adding an identifier string (a key) before the first comma, + e.g. `r-lang`. + - Include the following field in the YAML metadata + in the beginning of the document: `bibliography: rstudio-demo.bib`, + then cite it somewhere in the text by adding `[@r-lang]`. + The bibliography will be appended to the document, + so it is advisable to add a heading saying `# References` + at the very end. + +When working with R Markdown (and code in general) + be careful that you don't copy stylized quotation marks because these will not work. For example, this will throw an error: - ``` - a = “This string” - ``` - It should look like this instead: +```{markdown} +a = “This string” +``` - ``` - a = "This string" - ``` +It should look like this instead: -# R Markdown templates +```{markdown} +a = "This string" +``` + +## R Markdown templates We have seen how to use the output formats `html_document` and `pdf_document`, but as noted in lab3 during the lab, @@ -740,22 +827,19 @@ This format does not have as many options as HTML, and you can use it like this: ```yaml - output: github_document - ``` Another way of creating a a `github_document` is to go to `New -> R Markdown -> From Template -> GitHub Document`. As you can see there are other useful templates here. -We will see how to create a presentation soon, -but first lets see how to work with a word template. +We will see how to create a presentation soon. + + A word document can be created with this YAML header: ```yaml - output: word_document - ``` We can modify the style of a word document @@ -766,21 +850,20 @@ saving this under a new name, and then using it as a template: ```yaml - output: word_document: reference_docx: "our-style-doc.docx" ``` -There are [more details in this article](https://bookdown.org/yihui/rmarkdown/document-templates.html), -including how to create your own custom template, -for example for a CV. +There are +[more details in this article](https://bookdown.org/yihui/rmarkdown/document-templates.html), +including how to create your own custom template, for example for a CV. :::{.exercise} ::::{.exercise-header} -### Lecture 5 Exercise 2 +## Exercise 2 :::: ::::{.exercise-container} @@ -833,30 +916,11 @@ Questions: ## References -1 - D. E. Knuth, Literate Programming, The Computer Journal, Volume 27, Issue 2, 1984, Pages 97–111, https://doi.org/10.1093/comjnl/27.2.97 -2 - Y. Xie Dynamic Documents with R and knitr -3 - -4 - https://www.markdownguide.org/ -5- https://daringfireball.net/projects/markdown/ -6 - https://babelmark.github.io/faq/ -7 - RStudio Conf 2022 - Quarto Workshop -8 - -9 - https://rhodesmill.org/brandon/2012/one-sentence-per-line/ UNIX for Beginners - - -NOT INCLUDED FOR NOW: - -### Code snippets - -As we started to type `for` above, -the code completion popped up -and the first entry said "snippet", -what is that? -A code snippet is a text macro, -which means that you can type a short string of characters -to insert a template or snippet of text by pressing TAB. -You can see all the default snippets -and define your own by going to -`Tools -> Global options -> Code -> Edit snippets`. -Snippets are available anywhere in RStudio, -not just in R Markdown documents. + +1. D. E. Knuth, Literate Programming, The Computer Journal, Volume 27, Issue 2, 1984, Pages 97–111, https://doi.org/10.1093/comjnl/27.2.97 +2. Y. Xie, Dynamic Documents with R and knitr +3. +4. +5. +6. RStudio Conf 2022 - Quarto Workshop +7. UNIX for Beginners diff --git a/lectures/6-rmarkdown-quarto-slides-ghpages.qmd b/lectures/6-rmarkdown-quarto-slides-ghpages.qmd index bfdd77c..4f95534 100644 --- a/lectures/6-rmarkdown-quarto-slides-ghpages.qmd +++ b/lectures/6-rmarkdown-quarto-slides-ghpages.qmd @@ -7,7 +7,7 @@ jupyter: python3 {{< include ../learning_objectives/lo-ch-06.qmd >}} -**Platform in focus** RStudio IDE + Quarto +**Platform in focus** Jupyter Lab, RStudio IDE, Quarto :::{.activity} ::::{.activity-header} diff --git a/lectures/7-virtual-environments.qmd b/lectures/7a-virtual-environments-conda.qmd similarity index 99% rename from lectures/7-virtual-environments.qmd rename to lectures/7a-virtual-environments-conda.qmd index 45da1b1..fb7bd10 100644 --- a/lectures/7-virtual-environments.qmd +++ b/lectures/7a-virtual-environments-conda.qmd @@ -7,6 +7,8 @@ jupyter: python3 {{< include ../learning_objectives/lo-ch-07.qmd >}} +**Platform in focus** Conda + :::{.activity} ::::{.activity-header} ## Lecture 7 Activity 1 diff --git a/lectures/7b-virtual-environments-renv.qmd b/lectures/7b-virtual-environments-renv.qmd new file mode 100644 index 0000000..fb7bd10 --- /dev/null +++ b/lectures/7b-virtual-environments-renv.qmd @@ -0,0 +1,658 @@ +--- +title: Virtual environments +jupyter: python3 +--- + +## Learning outcomes + +{{< include ../learning_objectives/lo-ch-07.qmd >}} + +**Platform in focus** Conda + +:::{.activity} +::::{.activity-header} +## Lecture 7 Activity 1 +:::: +::::{.activity-container} +Try to run this code. What do you think it is going on? + +```python +from palmerpenguins import load_penguins +penguins = load_penguins() +penguins.head() +``` +:::: +::: + +--- + +# Virtual environments + +Virtual environments let's you have multiple versions of packages +and programs on the same computer +without them creating conflicts with each other. +You will be using virtual Python and R environments +throughout the program to setup your packages for different courses. + +--- + +Which of the following items is NOT a benefit of using Conda environments? + +``` +A. Increase code performance +B. Helping with reproducibility +C. Using different versions of the same package +D. Creating isolated computational environment for testing new packages +``` +--- + +## Conda + +[**conda**](http://conda.pydata.org/docs/) is an **open source `package` and `environment` management system for any programming language**; +though it is the most popular in the python community. +[Anaconda](https://www.continuum.io/why-anaconda) is a data science platform for Python +that comes with a lot of packages by default. Unlike Anaconda, +Miniconda doesn't come with any installed packages by default, +and we can pick and choose which ones we want. +Both include Python and conda. + +For example, the +[MDS Python installation instructions](https://ubc-mds.github.io/resources_pages/install_ds_stack_mac/#python-conda-and-jupyterlab) +had you install Miniconda. +All flavors of `*conda` give you the `conda` function in the terminal. + +### Managing Conda + +Let's first start by checking if conda is installed. + +```bash +conda --version +which conda +``` + +To see which conda commands are available, +type `conda --help`. +To see the full documentation for any command of these commands, +type the command followed by `--help`. +For example, +to learn about the conda update command: + +```bash +conda update --help +``` + +Let's update our conda to the latest version. +Note that you might already have the latest version since we downloaded it recently. + +```bash +conda update conda +``` + +You will see some information about what there is to update +and be asked if you want to confirm. +The default choice is indicated with `[]`, +and you can press Enter to accept it. +It would look similar to this: + +```bash +Using Anaconda Cloud api site https://api.anaconda.org +Fetching package metadata: .... +.Solving package specifications: ......... + +Package plan for installation in environment //anaconda: + +The following packages will be downloaded: + + package | build + ---------------------------|----------------- + conda-env-2.6.0 | 0 601 B + ruamel_yaml-0.11.14 | py27_0 184 KB + conda-4.2.12 | py27_0 376 KB + ------------------------------------------------------------ + Total: 560 KB + +The following NEW packages will be INSTALLED: + + ruamel_yaml: 0.11.14-py27_0 + +The following packages will be UPDATED: + + conda: 4.0.7-py27_0 --> 4.2.12-py27_0 + conda-env: 2.4.5-py27_0 --> 2.6.0-0 + python: 2.7.11-0 --> 2.7.12-1 + sqlite: 3.9.2-0 --> 3.13.0-0 + +Proceed ([y]/n)? y + +Fetching packages ... +conda-env-2.6. 100% |################################| Time: 0:00:00 360.78 kB/s +ruamel_yaml-0. 100% |################################| Time: 0:00:00 5.53 MB/s +conda-4.2.12-p 100% |################################| Time: 0:00:00 5.84 MB/s +Extracting packages ... +[ COMPLETE ]|###################################################| 100% +Unlinking packages ... +[ COMPLETE ]|###################################################| 100% +Linking packages ... +[ COMPLETE ]|###################################################| 100% +``` + +In this case, +conda itself needed to be updated, +and along with this update some dependencies also needed to be updated. +There is also a NEW package that was INSTALLED in order to update conda. +You don't need to worry about remembering to update conda, +it will let you know if it is out of date when you are installing new packages. + +## Managing Environments + +### What is a conda environment and why is it so useful? + +Using `conda`, you can create an isolated python *environment* for your project. +An environment is a set of packages that can be used in one or multiple projects. +There are several major benefits of using environments: + + +- You can guarantee that someone else can reproduce your project + by specifying which package versions your used + and making it easy for others to install the same versions. +- If two of your projects relies on different versions of the same package, + you can install these in different environments. +- If you want to play around with a new package, + you don't have to change the packages you use for your data analysis + and risk messing something up. +- When you develop your own packages, + it is essential to use environments, + since you want to to make sure you know exactly which packages yours depend on, + so that it runs on other systems than your own. + +The default environment is the `base` environment, +which contains only the essential packages from Miniconda +(assuming you installed Miniconda). +You can see that your shell's prompt string is prefaced with `(base)` +when you are inside this environment. +In the setup guide, +we gave your instructions for how to activate this environment by default +every time you open Bash. +There are two ways of creating a conda environment. + +1. Manual specifications of packages. +2. An environment file in YAML format (`environment.yaml`). + +### Creating environment by manually specifying packages + +We can create `test_env` conda environment by typing `conda -n `. +However, +it is often useful to specify more than just the name of the environment, +e.g. the channel from which to install packages, the Python version, +and a list of packages to install into the new env. +In the example below, +I am creating the `test_env` environment +that uses python 3.7 and a list of libraries: `jupyterlab` and `pandas`. +I am explicitely installing my packages from the `conda-forge` repository +(also part of the MDS setup instructions). + +```bash +conda create -n test_env -c conda-forge python=3.7 jupyterlab pandas=1.0.2 +``` + +conda will solve any dependencies between the packages like before +and create a new environment with those packages. +Usually, +we don't need to specify the channel, +but in this case I want to get the very latest version of these packages, +and they are made available in `conda-forge` +before they reach the default conda channel. + +To activate this new environment, +you can type `conda activate test_env` +(and `conda deactivate` for deactivating). +Since you will do this often, +we created an alias shortcut `ca` +that you can use to activate environments. +To know the current environment that you're in you can look at the prefix +of the prompt string in your shell which now changed to (`test_env`). +And to see all your environments, +you can type `conda env list`. + +#### Removing enviornments + +If you are creating environments for practice, or you want to recreate an environment you can delete your conda enviornments by: + +```bash +# look for all the installed enviornments +conda env list +``` + +```bash +# delete an enviornment +conda remove --name ENV_NAME --all +``` + +Similiarly, all your enviornments are installed within your `miniconda3` folder, +which is typically located in `~/miniconda3` +In here you will see an `env` folder. +If you delete the folder with the corresponging enviornment name (e.g., with `rm`) +you can also delete an enviornment this way too. + +### Sharing Environments with others + +To share an environment, you can export your conda environment to an environment file, +which will list each package and its version +in the format `package=version=build`. + +Exporting your environment to a file called `environment.yaml` +(it could be called anything, +but this is the conventional name +and using it makes it easy for others +to recognize that this is a conda env file, +the extension can be either `.yaml` or `.yml`): + +```bash +conda env export -f environment.yaml +``` + +Remember that `.yaml` files are plain text, +so you can use a text editor such as VS Code to open them. +If you do, +you will realize that this environment file has A LOT more packages +than `jupyterlab` and `pandas`. +This is because the default behavior is to also list the dependencies +that were installed together with these packages, +e.g. `numpy`. +This is good in the sense that it gives an exact copy of *everything* +in your environment. + +However, +some dependencies might differ between operating systems, +so this file *might* not work with someone from a different OS. +To remedy this, +you can append the `--from-history` flag, +which look at the history of the packages you explicitly told conda to install +and only list those in the export. +The required dependencies will then be handled in an OS-specific manner during the installation, +which guarantees that they will work across OSes. +This `environment.yaml` file would be much shorter and look something like this: + +```yaml +name: test_env +channels: + - conda-forge + - defaults +dependencies: + - conda + - python=3.7 + - pandas==1.0.2 + - jupyterlab +``` + +Importantly, +this will not include the package version +unless you included it when you installed +with the `package==version` syntax. +For an environment to be reproducible, +you **NEED** to add the version string manually. + +--- + +**Lecture** + +--- + +### Creating environment from an environment file + +Now, let's install `environment.yaml` environment file above so that we can create a conda environment called `test_env`. + +```bash +$ conda env create --file environment.yaml +``` + +### Copying an environment + +We can make an exact copy of an environment to an environment with a different name. +This maybe useful for any testing versus live environments or different Python 2.7 versions for the same packages. +In this example, `test_env` is cloned to create `live_env`. + +```bash +conda create --name live_env --clone test_env +``` + +### Deleting an environment + +Since we are only testing out our environment, +we will delete `live_env` to remove some clutter. +*Make sure that you are not currently using `live_env`.* + +```bash +conda env remove -n live_env +``` + +### Making environments work well with JupyterLab + +In brief, +you need to install the `ipykernel` package +in any new environment your create, +and the `nb_conda_kernels` package needs to be installed +in the environment where JupyterLab is installed. + +By default, +JupyterLab only sees the conda environment where it is installed. +Since it is quite annoying to install JupyterLab and its extensions separately in each environment, +there is a package called `nb_conda_kernels` that makes it possible +to have a single installation of JupyterLab access kernels in other conda environments. +This package needs to be installed in the conda environment +where JupyterLab is installed. + +Lastly, +you also need to install a kernel in the new conda environment +so that it can be detected by `nb_conda_kernels`. +This kernel can be installed via the package `ipykernel` for Python +and the `r-irkernel` package for R +([more info in the nb_conda_kernels README](https://github.com/Anaconda-Platform/nb_conda_kernels#installation)). + +## Managing Packages + +### Seeing what packages are available + +We will now check packages that are available to us. +The command below will list all the packages in an environment, in this case `test_env`. +The list will include versions of each package, the specific build, +and the channel that the package was downloaded from. +`conda list` is also useful to ensure that you have installed the packages that you desire. + +```bash +conda list +``` + +```bash +# packages in environment at //miniconda/envs/test_env: +# +Using Anaconda Cloud api site https://api.anaconda.org +blas 1.1 openblas conda-forge +ca-certificates 2016.9.26 0 conda-forge +certifi 2016.9.26 py27_0 conda-forge +cycler 0.10.0 py27_0 conda-forge +freetype 2.6.3 1 conda-forge +functools32 3.2.3.2 py27_1 conda-forge +libgfortran 3.0.0 0 conda-forge +``` + +### Searching for a certain package + +Some packages might not be available in conda, but are available in [pypi](https://pypi.python.org/pypi). +For example, we will search for rasterio within the [anaconda cloud](https://anaconda.org/). +*It is not necessary to create an account with anaconda cloud, unless you'd like to contribute in the future when you are pro with conda.* + +In this example, we will use rasterio from conda-forge. The anaconda cloud page for rasterio will show how to install the package, compatible OS, individual files for that package, etc. + +With conda you can do this search within the command line: + +```bash +conda search rasterio +``` + +```bash +Using Anaconda Cloud api site https://api.anaconda.org +Run 'anaconda show ' to get more details: +Packages: + Name | Version | Package Types | Platforms + ------------------------- | ------ | --------------- | --------------- + IOOS/rasterio | 1.0a2 | conda | linux-64, win-32, win-64, osx-64 + Terradue/rasterio | 0.32.0 | conda | linux-64 + : Fast and direct raster I/O for use with Numpy and SciPy + anaconda/rasterio | 0.36.0 | conda | linux-64, win-32, win-64, linux-32, osx-64 + conda-forge/rasterio | 1.0a2 | conda | linux-64, win-32, win-64, osx-64 + : Rasterio reads and writes geospatial raster datasets + dharhas/rasterio | 0.23.0 | conda | win-64 + : Rasterio reads and writes geospatial raster datasets. + erdc/rasterio | 0.23.0 | conda | win-64 + : Rasterio reads and writes geospatial raster datasets. + jesserobertson/rasterio | 0.23.0 | conda | linux-64, linux-32, osx-64 + jhamman/rasterio_to_xarray | 2016.03.16-1558 | ipynb | + : IPython notebook + krisvanneste/rasterio | 0.26.0 | conda | win-64 + ocefpaf/rasterio | 0.19.1 | conda | linux-64, osx-64 + omgarcia/rasterio | 0.25.0 | conda | linux-64 + pypi/rasterio | 0.13.2 | pypi | + : Fast and direct raster I/O for Python programmers who use Numpy + robintw/rasterio | 0.35.1 | conda | osx-64 + : Rasterio reads and writes geospatial raster datasets + sgillies/rasterio | 0.15 | conda | osx-64 + ztessler/rasterio | 0.31.0 | conda | osx-64 + : Fast and direct raster I/O for use with Numpy and SciPy +Found 15 packages +``` + +### Installing conda package + +Under the name column of the result in the terminal or the package column in the Anaconda Cloud listing, +shows the necessary information to install the package. +e.g. conda-forge/rasterio. +The first word list the channel that this package is from and the second part shows the name of the package. + +To install the latest version available within the channel, do not specify in the install command. We will install version 0.35 of `rasterio` from conda-forge into `test_env` in this example. Conda will also automatically install the dependencies for this package. + +```bash +conda install -c conda-forge rasterio=0.35 +``` + +If you have a few trusted channels that you prefer to use, you can pre-configure these so that everytime you are creating an environment, you won't need to explicitly declare the channel. + +```bash +conda config --add channels conda-forge +``` + +#### Removing a conda Package + +We decided that rasterio is not needed in this tutorial, so we will remove it from `test_env`. +Note that this will remove the main package rasterio and its dependencies (unless a dependency was installed explicitly at an earlier point in time or is required be another package). + +```bash +conda remove -n test_env rasterio +``` + +```bash +Using Anaconda Cloud api site https://api.anaconda.org +Fetching package metadata ......... +Solving package specifications: .......... + +Package plan for package removal in environment //anaconda/envs/test_env: + +The following packages will be REMOVED: + + rasterio: 0.35.1-np111py27_1 conda-forge + +Proceed ([y]/n)? y + +Unlinking packages ... +[ COMPLETE ]|#######################################################################################################| 100% +``` + +--- + +Select all the correct answers + +Which of the following sentences are true about Conda? + +``` +A. It is a command line tool +B. It is a package manager +C. It is a Python package +D. It is installed as part of Anaconda and Miniconda +E. It is a metapackage +F. It installs Python +G. Using it you can handle Python packages only +``` +--- + +# R environments + +In R, +environments are managed by `renv`, +which works with similar principles as `conda`, +and other virtual environment managers, +but the commands are different. +To see which commands are used in `renv`, +you can [visit the project website](https://rstudio.github.io/renv/articles/renv.html). +Briefly, +`renv::init()` is used to create a new env, +`renv::snapshot` is used to save/export the environment to a file (`renv.lock`), +and installing and removing packages are done as usual +via the `install.packages()` and `remove.packages()` commands. + + + + +## Attribution + +The conda virtual environment section of this guide +was originally published at http://geohackweek.github.io/ under a CC-BY license +and has been updated to reflect recent changes in conda, +as well as modified slightly to fit the MDS lecture format. + +# R Markdown repetition and a few tips + +Let's start with creating an r project again. +To use this with an existing git repo, +with can select "Existing dir". +You can also create an empty one +and move your git repo in here later, +as long as there is an `.git` folder +RStudio will show you the context menu for git. + +Next, +let's create an R Notebook. +We could have a create an R Markdown document also, +but the notebook offers a few conveniences. +Mostly that it has a preview option `html_notebook` +that renders the notebook to HTML +in its curent state. +In contrast, +knitting the notebook to HTML via `html_document` +will run all cells so this takes longer. + +**Note that it is important to knit to HTML before sharing +so that you are sure everything works from scratch.** +This is the same reason we should do "Run all" in Jupyter Lab before sharing +and why we don't want to store our R workspace sessions. +We need to make sure that someone new can run this from it's current state. +Another useful tool for this in R is to use `devtools::session_info()` +at the top or bottom of your document +(I put it at the end of the chunk where I load libraries) +to ensure that you have included information +about the versions of the packages you are running +so that someone else can use the same version. +There are more robust ways of version control +that we will get into later in MDS, +but this is a good minimum measure +that is easy for you to get into the habit of doing. + +A couple of features that are good to know +in addition to those we learnt last time, +are block commenting and automatic code reformatting. +If I type a few lines where I for example forget to add whitespace +around an operator or assignment, +going to `Code -> Reformat code` +( +Ctrl + Shift + A on Windows/Linux or + + Shift + A on a Mac +) +will fix this automatically +for all highlighted lines. +If I want to toogle commenting for some lines, +I can click `Code -> Comment/Uncomment line` +( +Ctrl + Shift + C on Windows/Linux or + + Shift + C on a Mac +), +instead of manually adding `#` in front of each line. + +One final tip is the use of the `here` package for file paths. +We have already solved the part of setting working directory +by creating an R proj. +If you only plant on using R Markdown file, +you would be fine writing relative paths +(e.g. `../data/cars.csv`) +the same way you would write them in Python +because they look relative their own location. +However, +if you also need to run something from a script or the console, +note that the working directory path will now be used +as the current directory, +rather than the directoy of the script +so the same relative path will not work +(you would need `data/cars.csv` instead). +`here` solves this by allowing you to type +`here::here('data', 'cars.csv')` from wherever you are +which also makes sure that file paths work across operating systems +([more info on `here` here](https://malco.io/2018/11/05/why-should-i-use-the-here-package-when-i-m-already-using-projects/). + +# R Markdown YAML header + +The YAML header +(also called the "front matter") +is where we can specify metadata about our project. +It is delimited by two `---` (three hyphens) +and we create a new notebook, +it looks like this: + +```yaml +--- +title: "R Notebook" +output: html_notebook +--- +``` + +In YAML, +data is stored as a `key: value` pair, +just like a Python dictionary. +We can add new values, +for example the author name +and the date. + +```yaml +--- +title: "R Notebook" +output: html_notebook +author: Joel Ostblom +date: 2020-09-23 +--- +``` + +R code can be evaluated inside the YAML header, +so if we wanted the date to be updated +every time we stitch the document, +we could instead write ``date: `r Sys.Date()` ``. +Other useful options include the ones for +numbering headings, adding a table of contents, +and placing the table of contents on the side of the document. +Since these are options to the output document, +they are indented under that section with two or four spaces: + +```yaml +--- +output: + html_notebook: + toc: yes + toc_float: yes + number_sections: yes +--- +``` + +Another useful option is to fold away your code, +but still having it available for view if someone desires to see it. + +```yaml +--- +output: + html_document: + code_folding: hide +--- +``` + + +## Attribution + +The conda virtual environment section of this guide +was originally published at http://geohackweek.github.io/ under a CC-BY license +and has been updated to reflect recent changes in conda, +as well as modified slightly to fit the MDS lecture format. diff --git a/lectures/8-regex-filenames-project-organization.qmd b/lectures/8-regex-filenames-project-organization.qmd index f6410ff..db1efd1 100644 --- a/lectures/8-regex-filenames-project-organization.qmd +++ b/lectures/8-regex-filenames-project-organization.qmd @@ -551,296 +551,6 @@ What is wrong with the following --- -# Introduction to Regular Expressions (REGEX) - -Like with most things, -the best way for you to learn Regex is to get practice using it. -There are a few exercises included in the notebook, -and at the end I have also included links interactive online exercises -with are great to practice your regexes! - -To see what a particular regex is matching and how, -you can use one of these two webpages, -which both do a great job visualizing and explaining the different parts of a regex match: - -- https://regexr.com/ - - regexr interprets text input as one big string by default, - so you need to check "multiline" under "flags" (top right) - for it to behave as expected with beginning and end of line matches - (it hints at this in the output for both ^ and $). -- https://regex101.com/ - - regex101 has the "multiline" flag set by default. - -## Basic matching - -- Basic matching: if you look for a regular string, like `banana`, - regex will match the exact string (including its upper/lower case). -- Both JupyterLab and VS Code have built in regex functionality - (bring up the search box and click the `.*` symbol to use regex - rather than the default search). -- When learning regex it is helpful to use one of the two webtools - mentioned in the previous cell - in order to visualize how your regex is matching the text. -- For this lecture, we will use a list of fruits to learn about regex. - -applesas -apple -apricot -banana -bilberry -blackberry -blackcurrant -blood orange -blueberry -canary melon -cantaloupe -cherry -clementine -cloudberry -coconut -cranberry -cucumber -currant -dragonfruit -durian -elderberry -gooseberry -grape -grapefruit -papaya -passionfruit -peach -orange -oranges unripe -persimmon -pineapple -pomegranate -pomelo -purple mangosteen -rock melon -salal berry -satsuma -star fruit -strawberry -watermelon - -## The square brackets: `[]` - -- If you want to specify the set of possible characters - you can use square brackets `[]`; -- For example, `[Aa]pple` would match `Apple` and `apple`. - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 1 -:::: -::::{.exercise-container} -Find all the pairs of vowels in the fruit list. - -Highlight the black box below to see than correct answer -(the black box will not show up on GitHub, -so download the notebook unless you want the answer displayed) -Remember to use one of the websites linked above to help you understand -what your regex is matching -(https://regexr.com/ or https://regex101.com/). - -
-[aeiou][aeiou] -
- -:::: -::: - - -### Ranges within `[]` - -- You can also define ranges when using brackets. For example: - - `[A-Z]`: will match any upper case letter - - `[a-z]`: will match any lower case letter - - `[0-9]`: will match any digit - - `[0-5]`: will match any digit between 0 and 5 -- The order cannot be reversed, `[z-A]` does not work. -- You can combine ranges: `[A-Za-z]`. -- You can use square brackets starting with a caret. For example: - - `[^A-Z]`: will match anything that is not an upper case letter - - `[^0-9]`: will match anything that is not a digit - - Note that the caret needs to be inside the bracket, - if it is outside it will match the beginning of a line as described under the "Anchors" section below. -- For the curious, these ranges are ordered based on [ASCI codes](https://en.wikipedia.org/wiki/ASCII#Printable_characters) - where every character is represented by a number. - The first character in the list is ` ` (space) and the last is `~` (tilde). - The full list is shown below: - ``` - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ - ``` - -## Special matching characters - -- A common operation is to match any character (e.g. between two important characters). -- Instead of writing out the full range `[ -~]` (space to tilde), - the special character `.` can be used to match any character in the list above. - - Note that `.` does not match the newline character, - so if you have an expression that continues on the next line it will not be matched. -- To match a literal `.` (the period character), - you can "escape" its special meaning by prefacing it with a backslash `\.` (most common) - or surrounding it with square brackets `[.]`. -- Another useful special character is `\w`, - which matches any character that normally occurs inside a word - (so it does not match spaces, underlines, etc) - - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 2 -:::: -::::{.exercise-container} - -What is the difference between writing `[A-Za-z]` and [`A-z`]? - -
-[A-z] will also match the characters `[/]^_`, as you can see in the list above. -
-:::: -::: - - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 3 -:::: -::::{.exercise-container} - -Match any characters between two `_`. - -
-_.*_ -
- -:::: -::: - -## Anchors - -- The caret outside the brackets means beginning of line. For example, `^apple` will match all lines that start with `apple`, including `apple sauce` and `apples`. -- The dollar sign `$` means end of line, e.g., `fruit$` will match lines that end with `fruit`. -- To remember this, you can use the mnemonic "Start with power (`^`) and end with money (`$`)" (originally from Jenny Bryan). -- Another useful anchor is `\b`, which matches end of word. - - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 4 -:::: -::::{.exercise-container} -Write a regex that will match a line that contains only pineapple. (Hint: you cannot just write `pineapple` - it will not work - why?) - -
-^pineapple$ -
note that if you use just `pineapple`, lines that also contain other words would match too. -
-:::: -::: - - - - -## Repetitions - -- To match multiple of the same character, - you can either repeat it or use the following syntax: - - `{n}`: exactly `n` occurrences - - `{n,}`: at least `n` occurrences - - `{0,m}`: at most `m` occurrences - - `{n,m}`: between `n` and `m` (inclusive) occurrences - - -### Special repetition characters - -- There are some shortcuts for the most common repetitions: - - `?`: means 0 or 1 time (`{0,1}`) - - `*`: means 0 or more time (`{0,}`) - - `+`: means 1 or more time (`{1,}`) -- For example, `apples?` will match `apple` and `apples`. - - But `apples+` will not match `apple` or `appplesq`, - but it will match `apples`, `appless`, `applesss`, etc. - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 5 -:::: -::::{.exercise-container} -Find the fruits with names between 10 and 12 characters. - -
-.{10,12} -
-:::: -::: - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 6 -:::: -::::{.exercise-container} -Find the lines with no more than 4 letters. - -
-^.{0,4}$ -
-:::: -::: - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 7 -:::: -::::{.exercise-container} -Find all the words that contain at least two consecutive vowels. - - -
-[aeiou]{2,} -
or
-[aeiou][aeiou]+ -
- -:::: -::: - -:::{.exercise} -::::{.exercise-header} -### Lecture 8 Exercise 8 -:::: -::::{.exercise-container} - -This is a bit harder and derives from all previous sections: Match entire words that end in `_`. - -
-\\w*_\\b -
- -:::: -::: - -:::{.exercise} -::::{.exercise-header} -### Additional exercises -:::: -::::{.exercise-container} - - -- Go through the interactive tutorials and practice sessions at https://regexone.com/ - that correspond to the topics we have covered during class. -- The Library Carpentry organization has many regex exercises - in all sections of their regex course here https://librarycarpentry.org/lc-data-intro/ - (you can just to do the exercises). - -:::: -::: - - - - ## References [1] Kery, M. B., Radensky, M., Arya, M., John, B. E., & Myers, B. A. (2018, April). The story in the notebook: Exploratory data science using a literate programming tool. In Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems (pp. 1-11). diff --git a/lectures/8b-regex.qmd b/lectures/8b-regex.qmd new file mode 100644 index 0000000..c8e5205 --- /dev/null +++ b/lectures/8b-regex.qmd @@ -0,0 +1,305 @@ +--- +title: Introduction to Regular Expressions (RegEx) +jupyter: python3 +--- + +## Learning outcomes + +{{< include ../learning_objectives/lo-ch-08.qmd >}} + + +## Introduction + +Like with most things, +the best way for you to learn Regex is to get practice using it. +There are a few exercises included in the notebook, +and at the end I have also included links interactive online exercises +with are great to practice your regexes! + +To see what a particular regex is matching and how, +you can use one of these two webpages, +which both do a great job visualizing and explaining the different parts of a regex match: + +- https://regexr.com/ + - regexr interprets text input as one big string by default, + so you need to check "multiline" under "flags" (top right) + for it to behave as expected with beginning and end of line matches + (it hints at this in the output for both ^ and $). +- https://regex101.com/ + - regex101 has the "multiline" flag set by default. + +## Basic matching + +- Basic matching: if you look for a regular string, like `banana`, + regex will match the exact string (including its upper/lower case). +- Both JupyterLab and VS Code have built in regex functionality + (bring up the search box and click the `.*` symbol to use regex + rather than the default search). +- When learning regex it is helpful to use one of the two webtools + mentioned in the previous cell + in order to visualize how your regex is matching the text. +- For this lecture, we will use a list of fruits to learn about regex. + +```markdown +applesas +apple +apricot +banana +bilberry +blackberry +blackcurrant +blood orange +blueberry +canary melon +cantaloupe +cherry +clementine +cloudberry +coconut +cranberry +cucumber +currant +dragonfruit +durian +elderberry +gooseberry +grape +grapefruit +papaya +passionfruit +peach +orange +oranges unripe +persimmon +pineapple +pomegranate +pomelo +purple mangosteen +rock melon +salal berry +satsuma +star fruit +strawberry +watermelon +``` + +## The square brackets: `[]` + +- If you want to specify the set of possible characters + you can use square brackets `[]`; +- For example, `[Aa]pple` would match `Apple` and `apple`. + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 1 +:::: +::::{.exercise-container} +Find all the pairs of vowels in the fruit list. + +Highlight the black box below to see than correct answer +(the black box will not show up on GitHub, +so download the notebook unless you want the answer displayed) +Remember to use one of the websites linked above to help you understand +what your regex is matching +(https://regexr.com/ or https://regex101.com/). + +
+[aeiou][aeiou] +
+ +:::: +::: + + +### Ranges within `[]` + +- You can also define ranges when using brackets. For example: + - `[A-Z]`: will match any upper case letter + - `[a-z]`: will match any lower case letter + - `[0-9]`: will match any digit + - `[0-5]`: will match any digit between 0 and 5 +- The order cannot be reversed, `[z-A]` does not work. +- You can combine ranges: `[A-Za-z]`. +- You can use square brackets starting with a caret. For example: + - `[^A-Z]`: will match anything that is not an upper case letter + - `[^0-9]`: will match anything that is not a digit + - Note that the caret needs to be inside the bracket, + if it is outside it will match the beginning of a line as described under the "Anchors" section below. +- For the curious, these ranges are ordered based on [ASCI codes](https://en.wikipedia.org/wiki/ASCII#Printable_characters) + where every character is represented by a number. + The first character in the list is ` ` (space) and the last is `~` (tilde). + The full list is shown below: + ``` + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + ``` + +## Special matching characters + +- A common operation is to match any character (e.g. between two important characters). +- Instead of writing out the full range `[ -~]` (space to tilde), + the special character `.` can be used to match any character in the list above. + - Note that `.` does not match the newline character, + so if you have an expression that continues on the next line it will not be matched. +- To match a literal `.` (the period character), + you can "escape" its special meaning by prefacing it with a backslash `\.` (most common) + or surrounding it with square brackets `[.]`. +- Another useful special character is `\w`, + which matches any character that normally occurs inside a word + (so it does not match spaces, underlines, etc) + + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 2 +:::: +::::{.exercise-container} + +What is the difference between writing `[A-Za-z]` and [`A-z`]? + +
+[A-z] will also match the characters `[/]^_`, as you can see in the list above. +
+:::: +::: + + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 3 +:::: +::::{.exercise-container} + +Match any characters between two `_`. + +
+_.*_ +
+ +:::: +::: + +## Anchors + +- The caret outside the brackets means beginning of line. For example, `^apple` will match all lines that start with `apple`, including `apple sauce` and `apples`. +- The dollar sign `$` means end of line, e.g., `fruit$` will match lines that end with `fruit`. +- To remember this, you can use the mnemonic "Start with power (`^`) and end with money (`$`)" (originally from Jenny Bryan). +- Another useful anchor is `\b`, which matches end of word. + + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 4 +:::: +::::{.exercise-container} +Write a regex that will match a line that contains only pineapple. (Hint: you cannot just write `pineapple` - it will not work - why?) + +
+^pineapple$ +
note that if you use just `pineapple`, lines that also contain other words would match too. +
+:::: +::: + + + + +## Repetitions + +- To match multiple of the same character, + you can either repeat it or use the following syntax: + - `{n}`: exactly `n` occurrences + - `{n,}`: at least `n` occurrences + - `{0,m}`: at most `m` occurrences + - `{n,m}`: between `n` and `m` (inclusive) occurrences + + +### Special repetition characters + +- There are some shortcuts for the most common repetitions: + - `?`: means 0 or 1 time (`{0,1}`) + - `*`: means 0 or more time (`{0,}`) + - `+`: means 1 or more time (`{1,}`) +- For example, `apples?` will match `apple` and `apples`. + - But `apples+` will not match `apple` or `appplesq`, + but it will match `apples`, `appless`, `applesss`, etc. + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 5 +:::: +::::{.exercise-container} +Find the fruits with names between 10 and 12 characters. + +
+.{10,12} +
+:::: +::: + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 6 +:::: +::::{.exercise-container} +Find the lines with no more than 4 letters. + +
+^.{0,4}$ +
+:::: +::: + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 7 +:::: +::::{.exercise-container} +Find all the words that contain at least two consecutive vowels. + + +
+[aeiou]{2,} +
or
+[aeiou][aeiou]+ +
+ +:::: +::: + +:::{.exercise} +::::{.exercise-header} +### Lecture 8 Exercise 8 +:::: +::::{.exercise-container} + +This is a bit harder and derives from all previous sections: Match entire words that end in `_`. + +
+\\w*_\\b +
+ +:::: +::: + +:::{.exercise} +::::{.exercise-header} +### Additional exercises +:::: +::::{.exercise-container} + + +- Go through the interactive tutorials and practice sessions at https://regexone.com/ + that correspond to the topics we have covered during class. +- The Library Carpentry organization has many regex exercises + in all sections of their regex course here https://librarycarpentry.org/lc-data-intro/ + (you can just to do the exercises). + +:::: +::: + + + + +## References + +[1] Kery, M. B., Radensky, M., Arya, M., John, B. E., & Myers, B. A. (2018, April). The story in the notebook: Exploratory data science using a literate programming tool. In Proceedings of the 2018 CHI Conference on Human Factors in Computing Systems (pp. 1-11). diff --git a/renv.lock b/renv.lock index c723e84..4a2abf4 100644 --- a/renv.lock +++ b/renv.lock @@ -136,6 +136,16 @@ ], "Hash": "e0b3a53876554bd45879e596cdb10a52" }, + "here": { + "Package": "here", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "rprojroot" + ], + "Hash": "24b224366f9c2e7534d2344d10d59211" + }, "highr": { "Package": "highr", "Version": "0.11", @@ -287,6 +297,16 @@ ], "Hash": "062470668513dcda416927085ee9bdc7" }, + "rprojroot": { + "Package": "rprojroot", + "Version": "2.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "4c8415e0ec1e29f3f4f6fc108bef0144" + }, "sass": { "Package": "sass", "Version": "0.4.9", diff --git a/requirements.txt b/requirements.txt index e3bb248..b09dd89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,9 @@ jupyter numpy matplotlib +# supplemental to help run the course +jupyterlab + # used for instructors in course grading otter-grader==5.6.0 +rpy2