From 30abe15f7f98eb9e306cc8db8897b2a1e8481b8f Mon Sep 17 00:00:00 2001 From: Michel Wermelinger <14291202+mwermelinger@users.noreply.github.com> Date: Thu, 4 Apr 2024 02:35:52 -0700 Subject: [PATCH] Fix #29: options for linters (#30) * Pass arbitrary options to the linters (but `on` must be first argument) * If linter exits with status > 0, display error, unless it's syntax error * Add `black` for formatting headers in library docs * Update and regenerate docs * Update .gitignore --------- Co-authored-by: michael --- .gitignore | 2 + docs/essays/example-1-to-n/index.html | 6 +- docs/library/index.html | 60 ++++-- docs/search/search_index.json | 2 +- docs/sitemap.xml | 36 ++-- docs/sitemap.xml.gz | Bin 376 -> 375 bytes docs/writing/index.html | 8 +- poetry.lock | 59 +++++- pyproject.toml | 1 + src/algoesup/magics.py | 292 +++++++++++++------------- src/docs/essays/example-1-to-n.ipynb | 4 +- src/docs/writing.md | 6 +- 12 files changed, 281 insertions(+), 195 deletions(-) diff --git a/.gitignore b/.gitignore index 021c812..835bdbf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # https://github.com/github/gitignore/blob/main/Python.gitignore +.vscode/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/docs/essays/example-1-to-n/index.html b/docs/essays/example-1-to-n/index.html index e5beb26..245e790 100644 --- a/docs/essays/example-1-to-n/index.html +++ b/docs/essays/example-1-to-n/index.html @@ -1348,7 +1348,7 @@

Sum of 1 to n diff --git a/docs/library/index.html b/docs/library/index.html index 63de9c2..8e848c8 100644 --- a/docs/library/index.html +++ b/docs/library/index.html @@ -1140,7 +1140,15 @@

-
time_functions(functions: list[Callable], inputs: Callable, start: int, double: int, text: bool = True, chart: bool = False, value: bool = False) -> None
+
time_functions(
+    functions: list[Callable],
+    inputs: Callable,
+    start: int,
+    double: int,
+    text: bool = True,
+    chart: bool = False,
+    value: bool = False,
+) -> None
 
@@ -1303,7 +1311,14 @@

-
time_cases(function: Callable, cases: list[Callable], start: int, double: int, text: bool = True, chart: bool = False) -> None
+
time_cases(
+    function: Callable,
+    cases: list[Callable],
+    start: int,
+    double: int,
+    text: bool = True,
+    chart: bool = False,
+) -> None
 
@@ -1452,7 +1467,14 @@

-
time_functions_int(functions: list[Callable], generator: Callable = int_value, start: int = 1, double: int = 10, text: bool = True, chart: bool = True) -> None
+
time_functions_int(
+    functions: list[Callable],
+    generator: Callable = int_value,
+    start: int = 1,
+    double: int = 10,
+    text: bool = True,
+    chart: bool = True,
+) -> None
 
@@ -1651,13 +1675,16 @@

Activate/deactivate the pytype linter.

When active, the linter checks each code cell that is executed for type errors.

    -
  • %pytype --disable ... on activates the linter but does not check the given errors - (see the list of errors)
  • -
  • %pytype on is equal to %pytype --disable name-error,import-error on
  • +
  • %pytype on ... activates the linter with the command options given after on
  • +
  • %pytype on is equal to %pytype on --disable name-error,import-error
  • %pytype off deactivates the linter
  • %pytype shows the current status of the linter
  • %pytype? shows this documentation and the command’s options
+

For a list of possible options ..., enter !pytype -h in a code cell. +Some options may not be appropriate when running pytype within a notebook.

+

The --disable option expects a list of +errors to ignore, without spaces.

@@ -1682,13 +1709,18 @@

When active, the linter checks each code cell that is executed against the selected code style rules.

    -
  • %ruff --select ... --ignore ... on activates the linter with the given rules - (see the list of rules)
  • -
  • %ruff on is equal to %ruff --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415 on
  • +
  • %ruff on ... activates the linter with any command options given after on + (see [ruff’s list of rules])
  • +
  • %ruff on is equal to %ruff on --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415
  • %ruff off deactivates the linter
  • %ruff shows the current status of the linter
  • -
  • %ruff? shows this documentation and the command’s options
  • +
  • %ruff? shows this documentation
+

The command %ruff on ... will run ruff check --output-format json ... on each cell. +For a list of the possible options ..., enter !ruff help check in a code cell. +Some options may not be appropriate when running Ruff within a notebook.

+

The --select and --ignore options expect a list +of rule codes, without spaces.

diff --git a/docs/search/search_index.json b/docs/search/search_index.json index a8991c9..374754b 100644 --- a/docs/search/search_index.json +++ b/docs/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Algorithmic Essays","text":"

These documents provide guidance to help you write, critique, and share algorithmic essays.

We define algorithmic essays as short reports, with code, that explain and compare alternative approaches to solving a computational problem.

If you\u2019re a student, writing algorithmic essays benefits you in various ways:

  • You reinforce your learning by explaining concepts to others and by exploring alternative solutions to the same problem.
  • You develop professional skills like problem-solving, reviewing code, working collaboratively, and writing readable, tested, documented code.
  • You learn how to use professional tools like code formatters, linters and type checkers, which improves your employability.
  • You produce an artefact (the essay) for your portfolio for prospective employers.

If you\u2019re an educator, you can copy our resources from the GitHub repository and adapt them to your course, due to our permissive licence (see below).

Example

We provide some example essays to illustrate what they are.

While many approaches to writing algorithmic essays are possible, we recommend using Jupyter notebooks, the most widely used medium for mixing text and executable code.

If you\u2019re a student on M269, our data structures and algorithms course, you can write an essay with your existing software and share it on the forums.

Get started (M269)

If you prefer a \u2018Google Docs\u2019-like environment for collaborative authoring and commenting on essays, we suggest Deepnote or Google\u2019s Colaboratory (Colab for short):

  • free account
  • no software installation necessary
  • you can share your essays publicly (or just with a few people) to easily
    • receive and give feedback
    • work collaboratively on the same essay.

(We have no affiliation, commercial or otherwise, with Deepnote or Google.)

Get started (Deepnote) Get started (Colab)

"},{"location":"#acknowledgements","title":"Acknowledgements","text":"

This project was partly funded by the UK\u2019s Council of Professors and Heads of Computing, under the Special Projects grant scheme, in May 2023.

"},{"location":"#licences","title":"Licences","text":"

The code and text in this repository are Copyright \u00a9 2023\u20132024 by The Open University, UK. The code is licensed under a BSD 3-clause licence. The text is licensed under a Creative Commons Attribution 4.0 International Licence.

"},{"location":"deepnote-background/","title":"Background","text":""},{"location":"deepnote-background/#organisation","title":"Organisation","text":"

Deepnote uses an organisational framework with three major parts: workspaces can contain multiple projects and each project can contain multiple notebooks and files.

The workspace is the highest level structure in Deepnote, and is designed to group related Projects and enable team collaboration. Every user can create their own workspaces where they can manage access, set permissions and oversee projects. A workspace can have multiple members each with their own access permissions, but by default they can see all projects in the workspace.

Projects are the next tier down from workspaces in terms of organisation and provide the main working environment as well as the integrated file system. When you duplicated our project, you duplicated all of the notebooks, files and the environment too.

Notebooks are akin to Jupyter notebook. They are interactive documents that combine executable code with richly formatted text and visualisations. Notebooks are where you will write your essays.

"},{"location":"deepnote-background/#deepnote-vs-classic-notebook","title":"Deepnote vs Classic Notebook","text":"

Notebooks in Deepnote have similar functionality to classic Jupyter notebooks in that they combine rich text, visualisations and executable code in a single document. But there are a few notable differences in the UI and functionality worth mentioning.

Firstly, Deepnote is a cloud-based Jupyter notebook platform: this means no local software installation is required to get started. It also means you can access your documents from anywhere with an internet connection. This is a double edged sword of course, if you lose your connection you lose access to your notebooks.

In Deepnote the divisions within a notebook are referred to as \u201cblocks\u201d instead of \u201ccells\u201d, but we will continue to use the classic terminology. Deepnote retains the same structure of code and markdown cells as used in Jupyter notebook, but it also provides additional cell types. Besides cells for data science, Deepnote adds rich text cells. Unlike standard Jupyter notebooks, these cells offer a what you see is what you get (WYSIWYG) text editing experience, similar to applications such as MS word and Google Docs. They include spellchecking and the usual formatting shortcuts like Ctrl + B for bold, Ctrl + I for italics, and so on. Rich text cells can be advantageous if you plan to use Deepnote exclusively as they can simplify the writing process. However, it is important to note that any formatting from rich text cells will not be preserved if you download your notebook. So if you plan to use your notebooks on other platforms it is advisable to stick to Markdown cells for writing text.

Finally, there is a key difference in the way Deepnote and Jupyter handle Markdown. Normally to create a line break in Markdown, either a double space followed by the Enter key, or a backslash followed by the Enter key is required. This is the approach followed by Jupyter Notebook and many other notebook interfaces. Deepnote, however, does it differently, simply pressing Enter creates a line break without the need for explicit characters like double space or backslash. This alternative approach is a deviation from the Markdown standard and affects how rendered Markdown looks when moving between platforms.

"},{"location":"deepnote-background/#collaboration","title":"Collaboration","text":"

Deepnote was designed with collaboration in mind and offers several features to do this which are not found on some other platforms.

Users in Deepnote can work together on projects simultaneously: any changes made to files and notebooks within the project can be seen instantaneously by both parties. Real time collaboration works best when you are also communicating in real time with your peers, say for example, using Zoom, Teams, Discord or WhatsApp.

Additionally Deepnote offers the option for asynchronous communication through comments. Comments can be left in a specific cell and are visible by anybody viewing the notebook. The first comment made in a cell opens a new thread, and anyone commenting in a thread receives email notifications after a new message is posted. Open threads can be resolved to hide them and save space; threads can be reopened if needed. Any open threads are displayed in a main comments panel on the right-hand side of a project.

Comments are one of the ways to give and receive feedback on your essays. See the feedback guide for more details.

"},{"location":"deepnote-background/#limitations","title":"Limitations","text":"

As noted at the start of the guidance, Deepnote was selected as the platform for the Learning Professional Skills with Algorithmic Essays project due to its simple interface, customisable environment, and features for collaborative working. However, it is important to acknowledge and assess its limitations.

As mentioned above, Deepnote deviates from Jupyter Notebook by having new types of cell such as rich text cells. It also handles Markdown in an different way from most other platforms. This has implications for how your notebooks will be rendered using different jupyter interfaces. The same notebook could look different on other platforms compared to Deepnote; the length of text lines might be different and some of the formatting may be altered or lost.

In addition to this, when working on a cloud-based platform such as Deepnote, executing cells can sometimes feel slow, especially when the virtual machine has been idle for a while. Furthermore, an issue has been observed when a Markdown cell contains an excessive amount of text, which appears to slow down performance, potentially due to the autocomplete functionality.

As a final point, the way Deepnote stores notebooks within the environment must be mentioned. You can of course upload a notebook to the system and access it like any other file, but if you want to run the notebook, it must be moved to the NOTEBOOKS section. This then becomes a problem if you wish to access the notebook file again, say using the terminal. When a notebook is moved to this section it effectively takes the notebook out of the integrated file system and locates it to a separate database which the user no longer has access to.

"},{"location":"deepnote-how-to/","title":"Deepnote how-to guides","text":""},{"location":"deepnote-how-to/#account-operations","title":"Account operations","text":""},{"location":"deepnote-how-to/#sign-up-for-deepnote","title":"Sign up for Deepnote","text":"

This is explained in our Getting Started guide.

"},{"location":"deepnote-how-to/#log-in","title":"Log in","text":"

If you have logged out, to log back in you need to verify your email again:

  1. Go to the sign-in page.
  2. Enter your email. Click the CAPTCHA button. Click Continue with email.
  3. You will receive an email from Deepnote with a link. Click on it.
"},{"location":"deepnote-how-to/#workspace-operations","title":"Workspace operations","text":"

Before any of the following operations, you must change from project view to workspace view:

  1. Click on your workspace name in the top left corner of the screen.
  2. From the drop-down menu, select Back to workspace.

You will now see a list of the projects in your workspace: Some projects were automatically added by Deepnote when creating your workspace.

After you completed the desired workspace operations, click in the side panel on the project you want to work next.

"},{"location":"deepnote-how-to/#enable-sharing","title":"Enable sharing","text":"
  1. In the workspace view, click on Settings & members in the side panel.
  2. Click on the Project settings tab.
  3. Turn on the option Allow projects to be shared publicly.
"},{"location":"deepnote-how-to/#project-operations","title":"Project operations","text":""},{"location":"deepnote-how-to/#rename-duplicate-download-or-delete-a-notebook-or-file","title":"Rename, duplicate, download or delete a notebook or file","text":"
  1. In the side panel, hover over the name of the chosen notebook or file.
  2. Click on the three dots that appear.
  3. From the drop-down menu, select the desired operation.

For notebooks, the download operations are called Export as .ipynb and Export as .pdf. If your notebook contains other types of cells besides Markdown and code, the downloaded .ipynb file won\u2019t be rendered correctly on other Jupyter platforms.

"},{"location":"deepnote-how-to/#duplicate-our-project","title":"Duplicate our project","text":"

This is explained in our Getting Started guide.

"},{"location":"deepnote-how-to/#share-your-project","title":"Share your project","text":"

The following assumes you have enabled sharing for your workspace.

  1. Click the Share button in the top right corner of the screen.
  2. To the right of Anyone with a link to this project, click on the drop-down menu and select Comment.
  3. Click the highlighted blue link to this project, to copy the link.
  4. Share that link with your peers by email or by posting in your course\u2019s forum.
"},{"location":"deepnote-how-to/#create-a-new-notebook","title":"Create a new notebook","text":"
  1. Click on the + icon next to the Notebooks heading in the side panel.
  2. Enter a name for your new notebook, then press Enter.
"},{"location":"deepnote-how-to/#upload-a-notebook-or-file","title":"Upload a notebook or file","text":"

The simplest way is to drag the notebook or file from your desktop to the Notebooks or Files section in the side panel.

Alternatively, to upload a file:

  1. Click on the + icon next to the Files heading in the left panel.
  2. Select Upload file from the drop-down menu.
  3. In the file browser, navigate to the file you want to upload, then click Open.
"},{"location":"deepnote-how-to/#notebook-operations","title":"Notebook operations","text":"

To perform an action on a cell, do one of the following:

  • Click on the cell to select it: the outline becomes blue. Press the action\u2019s keyboard shortcut.
  • Hover over the cell. A pop-up menu appears in the top right corner of the cell. Click on the action\u2019s icon or click the three dots to get a menu of actions.
"},{"location":"deepnote-how-to/#run-one-or-all-cells","title":"Run one or all cells","text":"

Running a cell executes the code or formats the Markdown text.

To run one cell, do one of the following:

  • Press Ctrl + Enter (Mac: Cmd + Enter) if the cell is selected.
  • Click the triangle icon in the top right corner of the cell.

To run all cells, click Run notebook in the top right corner of the notebook.

Note

The first time you run code, it will take some time, because Deepnote must first start a server with the necessary software.

"},{"location":"deepnote-how-to/#add-a-cell","title":"Add a cell","text":"

To insert a cell between two existing cells:

  1. Hover the mouse between the two cells.
  2. Click on the line that appears between both cells and do one of the following:
    • To insert a code cell, press c followed by Enter.
    • To insert a Markdown cell, press m followed by Enter.

To append a cell, scroll down to the end of the notebook and do one of the following:

  • To add a code cell, click on the Code button.
  • To add a Markdown cell, click on the Text button and select Markdown.
"},{"location":"deepnote-how-to/#delete-a-cell","title":"Delete a cell","text":"

Do one of the following:

  • Press Ctrl + Shift + Backspace (Mac: Cmd + Shift + Backspace) if the cell is selected.
  • Click the bin icon in the top right corner of the cell.
"},{"location":"deepnote-how-to/#comment-on-a-cell","title":"Comment on a cell","text":"

Do one of the following:

  • Press Ctrl + Alt + C (Mac: Cmd + Alt + C) if the cell is selected.
  • Click the speech bubble icon in the top right corner of the cell.

After typing your comment, press the upwards blue arrow to finish.

Warning

You must be logged into your account to comment on notebooks shared with you. If you\u2019re not logged in, your comments are marked as \u2018anonymous user\u2019 and the essay\u2019s author won\u2019t see them.

"},{"location":"deepnote-how-to/#format-a-code-cell","title":"Format a code cell","text":"

This \u2018pretty prints\u2019 the code. Do one of the following:

  • Press Alt + Shift + F if the cell is selected.
  • Click the three dots in the top right corner of the cell and select Format code from the drop-down menu. (To avoid scrolling all the way down, type f in the search box of the menu.)

Note

Formatting takes 1-2 seconds and adds an empty line (which you may delete) to the end of the cell.

Warning

If you get a message \u2018parsing failed\u2019, then the code cell is not valid Python and can\u2019t be automatically formatted. This may happen if the cell has IPython commands starting with %.

"},{"location":"deepnote-reference/","title":"Reference","text":""},{"location":"deepnote-reference/#workspace-interface","title":"Workspace interface","text":"

The workspaces interface provides an overview of any projects in your Workspace. On the left-hand side panel you will find a navigation menu allowing you to quickly navigate to different sections of your workspace. Starting from the top, the sections are:

  • Integrations - This section allows you to set up and manage connections to data sources. This is mostly used for data science work.

  • Settings & members - Manage who has access to the workspace and its resources.

  • Recents - Projects listed in order of most recently opened.
  • Private projects - A list of private projects, which only you have access to. Other workspace members can\u2019t see private projects.
  • Published apps - This section shows any apps you have published. In Deepnote, an app is a notebooks in which some blocks have been hidden to abstract away technical details. This may be useful to present your findings to stakeholders with non-technical backgrounds.
  • PROJECTS. - A list of all projects within the workspace.
"},{"location":"deepnote-reference/#project-interface","title":"Project interface","text":"

A project\u2019s interface has similarities to the interface for your workspace. Starting from the top, the sections on the left-hand side panel are:

  • NOTEBOOKS - This section is where your notebooks live. If you want to actively work on your notebooks they must be added to this location.
  • INTEGRATIONS - This section allows you to use an integration defined for the workspace. Integrations are mainly used in data science.
  • FILES - Each project in Deepnote has an integrated file system which you can view and access in this section. You can create and upload files and folders here.
  • TERMINALS - Deepnote allows you to launch terminals from this section by clicking on the \u201c+\u201d icon. As you would expect you can access the local file system through the terminal to run scripts or complete other tasks. Note that you cannot access any notebooks located in the NOTEBOOKS section from a terminal, they are stored in a separate database and not considered part of the file system.
  • TABLE OF CONTENTS - This section will show the major headings of the current notebook you are working on so you can quickly navigate through your document by clicking on them.
  • ENVIRONMENT - The environment section shows a simple overview of the environment you are currently working in. There is an option to expand this section into a more detailed view by clicking on the cog symbol next to the ENVIRONMENT heading in the top right hand corner of the panel.
"},{"location":"deepnote-reference/#notebooks","title":"Notebooks","text":"

Notebooks in Deepnote have the same core functionality as Jupyter Notebooks: they combine executable code and text in the same document.

See Deepnote vs Classic Notebook for some differences between the two.

"},{"location":"deepnote-reference/#access-levels","title":"Access levels","text":"

Access levels are the range of permissions or capabilities assigned to a user in Deepnote. They differ between the contexts of workspaces and projects.

The access levels for projects are:

  • App User: Can use published app, but cannot view the project source code.
  • View: Can inspect the project, but cannot view or post comments nor execute or edit files.
  • Comment: Can post and view comments in addition to inspecting the project.
  • Execute: Can execute code in addition to viewing and commenting, but cannot change anything or use terminals.
  • Edit: Can use terminals, connect datasets, comment and edit files as well as view and execute.

The access levels for workspaces are:

  • Viewer: Viewers can see all projects and members of a workspace. They can leave comments in projects but can\u2019t make any changes. They can duplicate a project to another workspace as well as request additional access from the team\u2019s owner.
  • Contributor: Contributors can execute all notebooks within the workspace as well as change input block values. They cannot make changes to code.
  • Editor: Editors can create and edit workspace projects.

  • Admin: Admins have all access rights, including permission to manage workspace members.

"},{"location":"deepnote-reference/#cells","title":"Cells","text":"

Cells (called \u2018blocks\u2019 in Deepnote) are the divisions within each notebook. They are a distinct area where code or text can be added depending on the type of the cell. See our how-to guide for working with cells.

"},{"location":"deepnote-reference/#terminal","title":"Terminal","text":"

A terminal will give you a command line interface for your project and runs a bash shell.

Launching a Terminal in Deepnote allows you to run scripts or complete tasks where the GUI is not suitable.

See the Deepnote documentation on terminals for more information.

"},{"location":"deepnote-reference/#environment","title":"Environment","text":"

The environment refers to the setup and configuration that supports the execution of code within your project.

The code in each project runs on a virtual machine which is an isolated computing environment with its own CPU memory and storage resources. These specifications can be adjusted in a limited way if required and various software packages can be added to your environment to suit your needs.

When you copied our project, you also copied the environment.

See Deepnote\u2019s documentation on custom environments for more information.

"},{"location":"deepnote-reference/#real-time-collaboration","title":"Real-time collaboration","text":"

Real time collaboration refers to the capability of multiple users to work on the same documents in the same project at the same time. Any changes to documents can be seen by all users working on the project as and when they happen.

See Deepnote\u2019s documentation on real-time collaboration for more details.

"},{"location":"deepnote-reference/#asynchronous-collaboration","title":"Asynchronous collaboration","text":"

Asynchronous collaboration is a method of working where users do not have to be working at the same time. Users can contribute to projects and documents at their own pace to suit their own schedule.

The main tool for asynchronous collaboration in Deepnote is the comments system. Users can comment on code and text in the corresponding cells to communicate with peers.

"},{"location":"deepnote-reference/#command-palette","title":"Command palette","text":"

The command palette provides quick access to all of the files in a project and the most popular actions.

You can open and close the command pallet by pressing Cmd + P on Mac or Ctrl + P on Windows.

"},{"location":"deepnote-reference/#members","title":"Members","text":"

A member is a Deepnote user associated with a particular workspace.

When a user is a member of a workspace, they typically have access to all the projects within that workspace, but the access permissions can be adjusted.

Projects do not have members, but you can give or be given access to a project with certain permissions. See Access levels for more information.

"},{"location":"deepnote-reference/#markdown-cheat-sheet","title":"Markdown cheat sheet","text":"Feature Syntax/Example Headers H1 Header # H1 Header H2 Header ## H2 Header H3 Header ### H3 Header H4 Header #### H4 Header H5 Header ##### H5 Header Code Inline Code `Code` Code Block ```Code block``` Formatting Italic _italic_ or *italic* Bold **bold** or __bold__ Strikethrough ~~strikethrough~~ Links External Link [Google](https://www.google.com) Section Link [Top](#top) Lists Bulleted List - List item Numbered List 1. List item Math Inline Math $x=1$ Math Block $$$x=1$$$ Other Quote > Quote Divider --- HTML <h1>Title</h1>"},{"location":"deepnote-reference/#keyboard-shortcuts","title":"Keyboard shortcuts","text":"

Deepnote has many keyboard shortcuts for quickly performing typical actions on cells and text.

General

MAC WINDOWS & LINUX ACTION \u2318 + P ctrl + P Show/Hide command palette

Block Actions

MAC WINDOWS & LINUX ACTION \u21e7 + \u21b5 shift + enter Run current block and move cursor to next block (creates a new cell if at the end of the notebook) \u2318 + \u21b5 ctrl + enter Run current block \u2318 + \u21e7 + . ctrl + shift + . Stop execution \u2318 + \u21e7 + H ctrl + shift + H Hide/Show block output \u2318 + \u21e7 + M ctrl + shift + M Toggle between code and Markdown block \u2318 + \u21e7 + \u232b ctrl + shift + backspace Delete block \u2325 + \u21e7 + \u2191 alt + shift + \u2191 Move block up \u2325 + \u21e7 + \u2193 alt + shift + \u2193 Move block down \u2318 + \u21e7 + D ctrl + shift + D Duplicate block \u2318 + J ctrl + J Add new code block below current one \u2318 + K ctrl + K Add new code block above current one \u2318 + Z ctrl + Z Undo \u2318 + \u21e7 + Z ctrl + shift + Z Redo

Code Editing

MAC WINDOWS & LINUX ACTION \u2318 + D ctrl + D Expand selection (multiple cursors) tab tab When caret is at the beginning of a line, add indent; otherwise, show autocomplete suggestions \u21e7 + tab shift + tab Decrease indent \u2318 + / ctrl + / Toggle line/selection comment \u2325 + \u2193 alt + \u2193 Move lines down \u2325 + \u2191 alt + \u2191 Move lines up

Terminal

MAC WINDOWS & LINUX ACTION \u2318 + C ctrl + shift + C Copy selected text \u2318 + V ctrl + shift + V Paste"},{"location":"deepnote/","title":"Deepnote","text":"

Deepnote is the cloud based Jupyter notebook platform we recommend for writing your essays.

The platform was chosen due to its simple interface, customisable environment, and features for collaborative working.

Warning

This Deepnote guide is limited in scope to the writing, sharing and critiquing of algorithmic essays. There are many features of the platform not covered here. See the official documentation for more extensive coverage.

You may find it helpful to watch the following introduction video.

This guide has three parts:

  • The How-to guide provides step by step instructions on how to complete specific tasks in Deepnote.
  • The Background section discusses and explains aspects of Deepnote in a longer form and sometimes broader context.
  • The Reference section is for quickly looking up information about key aspects of Deepnote.

Note

Use the side panel to navigate the Deepnote guidance.

"},{"location":"example-essays/","title":"Example essays","text":"

These examples illustrate different ways of structuring essays and different writing styles.

For some essays, there\u2019s a template that highlights the essay\u2019s structure and the purpose of each section. These templates help you start writing your own essay. Clicking on a button below will open a read-only version of the essay or template.

To get an editable version of a template to start writing your own essay, right-click on a download button. From the pop-up menu, choose \u2018Save file as\u2026\u2019 (or similar) to save the template in a folder of your choice.

If you\u2019re using Deepnote, you don\u2019t need to download anything because you will have the essay templates and examples once you copied our essay project, as explained in Getting started (Deepnote).

"},{"location":"example-essays/#sum-of-1-to-n","title":"Sum of 1 to n","text":"

This is a short and simple essay, suitable for those on introductory programming courses. The essay shows two ways of calculating 1 + 2 + \u2026 + n and compares their run-times. The essay follows a simple structure, in which each approach is outlined, implemented and tested before moving on to the next one.

Essay Template Template (download)

"},{"location":"example-essays/#jewels-and-stones","title":"Jewels and Stones","text":"

This is a longer essay, for those on data structures and algorithms courses. The problem is to count how many characters of a string occur in another string. The essay solves the problem in three ways, with linear search, a set, and a bag/multiset. The complexity of the three algorithms is analysed and compared to their run-time.

This essay follows a slightly different structure, in which each approach is outlined and its complexity analysed, before deciding which approaches are worth implementing.

Essay Template Template (download)

"},{"location":"example-essays/#two-sum-two-approaches","title":"Two Sum (two approaches)","text":"

This classic problem asks to find two numbers in a list that add up exactly to a given number. This essay solves the problem in two ways, with brute-force search (nested loops) and a map (Python dictionary).

Essay

"},{"location":"example-essays/#two-sum-three-approaches","title":"Two Sum (three approaches)","text":"

This is an extended version of the previous essay. It adds a third approach, that sorts the list of numbers.

Essay

"},{"location":"feedback/","title":"Feedback guide","text":"

Giving feedback on each other\u2019s essays is an important part of developing professional skills with algorithmic essays.

Receiving feedback helps improve your essay, while giving feedback helps develop your communication skills. Moreover, reading other people\u2019s essays is a great way to learn more about Python, algorithms, data structures, code style, etc.

Feedback is part of professional software development practices. Many companies and open source projects have formal code reviews, in which developers must submit their code for inspection by others. Code reviews help increase the quality of the code, share best practices among developers, and on-board new developers.

Many companies use pair programming, in which two developers work together on the same piece of code. While one writes the code, the other reviews it as it\u2019s written, pointing out mistakes and suggesting improvements. The two developers switch roles often during a pair programming session. With Deepnote and Colab, you and someone else can work simultaneously on the same notebook, while using Zoom, Microsoft Teams or some other app to chat.

In summary, by engaging in a feedback process for your and others\u2019 essays, you will develop valuable professional skills.

"},{"location":"feedback/#asking-for-feedback","title":"Asking for feedback","text":"

You can ask for feedback at any point during the production of your essay, not just when you have a complete draft. You will have to tell others what kind of feedback you want.

For example, if you\u2019re undecided between three problems, you may write a notebook that just describes the problems and asks others to choose one of them, by including something like:

Please help me choose a problem to tackle. Which of the previous problems would you like to see solved and why?

It helps the reader if the request for feedback stands out from the remaining text. You may prefer to use bold, italics, or HTML to format it differently.

You can also ask for feedback after choosing a problem and thinking of some algorithms, but before investing the time in implementing and testing them. Your request could be:

I\u2019m looking for feedback on the above algorithms. Are they clear? Have I missed other approaches to solving this problem?

Once your notebook is ready for feedback, you need to share it so that others can comment on it. You may invite comments from anyone or only from one or two \u2018essay buddies\u2019: they comment on your essay and you comment on theirs.

Info

For how to share your essay, see the corresponding instructions for Colab, Deepnote or M269.

"},{"location":"feedback/#giving-feedback","title":"Giving feedback","text":"

Once you have been invited to comment on an essay, read it as soon as you can, because the author is waiting for it to progress the essay to the next draft.

Feedback should be specific and constructive. Generic comments like \u201cThis is confusing\u201d or \u201cI don\u2019t understand this\u201d aren\u2019t helpful for the author. State what you\u2019re finding hard to grasp, like:

  • \u201cWhat is \u2018it\u2019 referring to in the second sentence?\u201d
  • \u201cThe term \u2018\u2026\u2019 in the last sentence hasn\u2019t been defined. Does it mean the same as \u2018\u2026\u2019?\u201d
  • \u201cWhat is variable \u2018\u2026\u2019 used for in the algorithm? Should it initially be the empty list?\u201d

Before adding a comment to a notebook cell, read the existing comments, to avoid repeating the same points others have made.

Comments that aren\u2019t about a specific text paragraph or code cell, but rather about the whole essay, e.g. its structure, should be attached to the first cell, with the essay\u2019s title, or to the last cell.

The author has put effort into their essay, and will appreciate encouraging feedback to keep polishing it. For example, if you commented on a previous version, praise the parts that improved.

"},{"location":"feedback/#acting-on-feedback","title":"Acting on feedback","text":"

Deepnote emails every time you get a comment on your essay. You may wish to improve your essay as you get each piece of feedback, or you may wait some time, e.g. a week, to collect a variety of comments and then address them in one pass.

As you scroll down your essay, look for speech bubble icons. Click on them to see the comment thread associated to that cell. Once you modify the cell to address those comments, mark the thread as resolved.

Don\u2019t feel obliged to follow every suggestion you receive. The reviewers of your essay may present contradictory suggestions and some may take too much effort to address.

If you don\u2019t understand a reviewer\u2019s comment, add a comment yourself, asking them for clarification. Be specific, explaining what you don\u2019t understand. Alternatively, rephrase the reviewer\u2019s comment and ask them to confirm your interpretation, e.g. \u201cDo you mean I should rename the variable from \u2026 to \u2026?\u201d

"},{"location":"feedback/#crediting-feedback","title":"Crediting feedback","text":"

You should acknowledge who provided input, by adding a section at end of your essay with something like this:

I thank my tutor Jane Doe and my fellow students John Smith and Carla Suarez for feedback that helped improve this essay.

Crediting others allows them to point to your essay to provide evidence, e.g. to prospective employers, of giving feedback others find useful. It\u2019s therefore best if you can be specific about each one\u2019s contribution, in particular if someone provided some of the content, beyond just commenting. Content contributions should be mentioned before feedback. For example:

I thank John Smith for analysing the complexity of the second approach. Jane Doe helped me improve the structure of this essay. Carla Suarez spotted several typos and suggested improvements to the code style.

It\u2019s easiest to keep track of contributions if you update the acknowledgements as you act upon feedback. You may wish to offer co-authorship of the essay to those who made substantial contributions.

The acknowledgments should also mention if your solutions are based on someone else\u2019s, unless you already said so when introducing the algorithms. For example:

The second algorithm was taken from the solution posted by LeetCode user soandso, but the code is my own. The third algorithm and implementation are based on user123\u2019s solution. I thank \u2026

When possible, the acknowledgement text should link directly to the original solutions.

If the code or tests were partly or wholly written by generative AI, say so, preferably indicating the prompts you used.

"},{"location":"getting-started-google-colab/","title":"Getting started (Google Colab)","text":"

Follow these steps to start writing essays on Google Colab within a few minutes.

"},{"location":"getting-started-google-colab/#copy-an-essay-template","title":"Copy an essay template","text":"

Important

To use Colab, you need a Google account. To create one, visit the Google Account sign in page and follow the instructions.

  1. Log in to your Google account
  2. Click one of these links to open a template in Colab:
    • introductory programming template
    • data structures and algorithms template
  3. In the template, click File->Save a copy in Drive to save a copy to your Google Drive.
  4. In your copy of the template, click File->Rename to rename the copy. Use a descriptive name relating to your essay.

Info

The first code cell of each template essay notebook installs the necessary software when running the notebook in Colab.

"},{"location":"getting-started-google-colab/#write-the-essay","title":"Write the essay","text":"

Now that you have saved and renamed a copy of the template, you can start writing your essay in Colab.

For how to use Jupyter notebooks in Colab, read through the Overview of Colaboratory Features.

For guidance on writing algorithmic essays, see our writing guide

"},{"location":"getting-started-google-colab/#share-the-essay","title":"Share the essay","text":"

When you are ready to share the essay with others, do the following:

  1. Click the Share button in the top right corner of your essay. A small window will appear in the middle of the screen.
  2. In the General access section of the window, select Anyone with a link from the drop-down menu, and commenter as the role from the new drop-down menu on the right.
  3. Click the Copy link button to put the link in your clipboard.
  4. Click Done to close the window and share the link with your peers.
"},{"location":"getting-started-google-colab/#comment-on-essays","title":"Comment on essays","text":"

After sharing your essay, others will be able to comment on it. You can also make comments on essays as part of the feedback process. See our feedback guide for more information.

In Colab, comments are attached to a cell and are displayed next to the cell they refer to.

If you have edit or comment permissions, you can comment on a cell in one of three ways:

  1. Select a cell and click the comment button (speech bubble) in the toolbar above the top-right corner of the cell.
  2. Right-click a text cell and select \u2018Add a comment\u2019 from the pop-up menu.
  3. Press Ctrl+Alt+M to add a comment to the currently selected cell.

You can resolve and reply to comments, and you can target comments to specific collaborators by typing @[email address] (e.g., @user@domain.com). Addressed collaborators will be emailed.

The Comment button in the top-right corner of the page shows all comments attached to the notebook.

"},{"location":"getting-started-m269/","title":"Getting started (M269)","text":"

If you\u2019re an M269 student, follow these steps to start writing essays within a few minutes.

"},{"location":"getting-started-m269/#install-software-optional","title":"Install software (optional)","text":"

While you can write essays with your current M269 software, we recommend installing two more packages to help you test, check the style, and measure the run-time of code.

  1. Activate your M269 environment as usual:
    • Open a PowerShell (Windows) or terminal (Linux / macOS), and enter m269-23j.
    • The prompt should now be (m269-23j) ... and you should be in your M269 folder.
  2. To install the software, enter pip install algoesup ruff in the PowerShell / terminal.

    Note

    You also need the most recent version of allowed, which is available from the Resources tab of the M269 website.

"},{"location":"getting-started-m269/#copy-template","title":"Copy template","text":"

We have created templates to serve as starting points for your essay.

  1. Go to our example essays page.
  2. Choose a template and right-click on the corresponding download button.
  3. Select \u2018Download / Save file as\u2026\u2019 and save the template to your M269 folder, with a filename that includes your name (e.g. essay-Michael.ipynb) or that indicates the topic (e.g. balanced-brackets.ipynb). If you prefer, you can rename the file later, when you settled on a topic.
"},{"location":"getting-started-m269/#write-the-essay","title":"Write the essay","text":"

With the essay template in your M269 folder, you can work on it in the same way you do with the book\u2019s chapters and your TMAs.

  1. Open a PowerShell or terminal.
  2. Enter m269-23j to activate your M269 environment and go to your M269 folder.
  3. Enter nb to open the Jupyter dashboard, from which you can open the essay.
"},{"location":"getting-started-m269/#share-the-essay","title":"Share the essay","text":"

Once you have a draft you want others to give feedback on, make a post in the VLE forum corresponding to the topic of your essay, and attach your notebook file.

For example, if your essay is about ordered or unordered collections, post it in the Weeks 1-10 forum, but if it also uses recursion or a sorting algorithm, post it in the Weeks 11\u201320 forum.

Once you have addressed others\u2019 feedback, post the final version of your essay, acknowledging the commenters.

"},{"location":"getting-started-m269/#publish-the-essay-optional","title":"Publish the essay (optional)","text":"

If you want to publish your essay outside the M269 forums, you have several options.

  • Ask us in the forum to add your essay to the algoesup GitHub repository, with a link from the example essays page. The copyright of your essay remains with you.
  • Create your own GitHub repository for your notebook file. (Learning how to use GitHub for software development is a valuable professional skill.)
  • Make your essay available via a cloud Jupyter environment, like Cocalc, Colab, Datalore and Deepnote.
"},{"location":"getting-started/","title":"Getting started","text":"

Follow these steps to start writing essays within a few minutes, without any software installation or configuration.

"},{"location":"getting-started/#create-a-deepnote-account","title":"Create a Deepnote account","text":"
  1. Open the Deepnote sign-up page.
  2. Enter your email address. Use your academic (rather than personal) email to get the free education plan.
  3. Check the CAPTCHA box and click Continue with email.
  4. Check your email for a sign-in link from Deepnote and click it.

    Note

    There are no passwords for Deepnote when signing up by email. If you explicitly log out of your Deepnote account, see our guide for how to log in.

  5. In Deepnote, answer the introductory questions, which may depend on the type of email you used to sign up.

    • If you\u2019re asked what you are working on, type Writing essays and click Continue.
    • If you\u2019re asked to name your workspace, which is where you will store your projects, give it a unique and descriptive name, e.g. \u201cYour Name\u2019s projects\u201d.
    • If you\u2019re asked to invite your teammates, click Continue.
    • If you\u2019re asked for your data sources, click Take me to Deepnote.

You should now be looking at an empty notebook that is part of the Welcome to Deepnote project within your workspace. You won\u2019t need that project for writing algorithmic essays, but you may wish to keep it, to later explore Deepnote\u2019s data science features.

For the moment, just proceed with the next steps.

"},{"location":"getting-started/#duplicate-our-project","title":"Duplicate our project","text":"

We created an essay project in our Deepnote workspace, to be copied to your workspace. The project has all necessary software pre-installed.

  1. Open our project.
  2. Click on the blue Duplicate button in the top right corner.
  3. Choose the name of your workspace from the drop-down menu.
  4. Click Duplicate project to finish the process.

    Note

    Do not check the box to make your project private: that would prevent sharing your essays later with others.

    After a few moments, you will see the project in your workspace, with the name Algorithmic Essays - Duplicate at the top of the screen.

  5. Click on the three dots in the top right corner.

  6. Select Rename project from the drop-down menu.
  7. In the text box that appears, type a name for your project, e.g. \u201cYour Name\u2019s essays\u201d.

You should now see some notebooks in the left panel.

"},{"location":"getting-started/#copy-a-template-and-edit-it","title":"Copy a template and edit it","text":"

We provide two templates as starting points for your essay. One template is simpler, with introductory programming in mind, the other is for data structures and algorithms courses. To write an essay, copy one of the templates and edit that copy.

  1. In the left panel, hover your mouse over the template you wish to copy.
  2. Click on the three dots that appear next to the template\u2019s name.
  3. In the pop-up menu that appears, select Duplicate.

This creates a new notebook, with the name of the template followed by \u2018-2\u2019, and opens it. You can now start editing your copy of the template. (Once you decided your essay\u2019s topic, you can rename the notebook.)

If you\u2019re familiar with the classic Jupyter interface, we recommend you first read about the differences with Deepnote.

For a video introduction to notebooks and Deepnote, see our Deepnote guide.

"},{"location":"library/","title":"Library","text":"

The algoesup library provides support for testing, timing and linting code.

"},{"location":"library/#testing","title":"Testing","text":"

Simplified testing for Python functions

"},{"location":"library/#algoesup.test.test","title":"test","text":"
test(function: Callable, test_table: list) -> None\n

Test the function with the test_table. Report failed tests.

Parameters:

Name Type Description Default function Callable

The function to be tested.

required test_table list

The list of tests. Each element of test_table is a list or tuple with: a string (the test case name); one or more values (the inputs to the function); the expected output value.

required"},{"location":"library/#timing","title":"Timing","text":"

Tools for measuring and plotting run-times

"},{"location":"library/#algoesup.time.time_functions","title":"time_functions","text":"
time_functions(functions: list[Callable], inputs: Callable, start: int, double: int, text: bool = True, chart: bool = False, value: bool = False) -> None\n

Print or plot the run-times of different functions for the same inputs.

time_functions prints or plots the run-times given list of functions and an input generator. Inputs are generated based on a starting size and are doubled a specified number of times.

Parameters:

Name Type Description Default functions list[Callable]

A list of functions whose run-times will be measured. Must be 1 to 6 functions.

required inputs Callable

A function to generate inputs when given a specific size.

required start int

The starting size for the inputs. Must be positive.

required double int

The number of times to double the input size. Must be non-negative.

required text bool

If True, print the run-times in text format

True chart bool

If True plot the run-times using a chart.

False value bool

If True x-axis is labelled \u201cInput value\u201d otherwise \u201cInput size\u201d.

False

Raises:

Type Description AssertionError

If input conditions are not satisfied.

"},{"location":"library/#algoesup.time.time_cases","title":"time_cases","text":"
time_cases(function: Callable, cases: list[Callable], start: int, double: int, text: bool = True, chart: bool = False) -> None\n

Print or plot the run-times of function for different input cases.

time_cases prints or plots the run-times of a single function using a list of different input generators. Inputs are generated based on a starting size and are doubled a specified number of times.

Parameters:

Name Type Description Default function Callable

A function whose run-times will be measured.

required cases list[Callable]

A list of 1 to 6 functions to generate inputs of different cases, e.g. best-, normal- and worst-case.

required start int

The starting size for the inputs. Must be positive.

required double int

The number of times to double the input size. Must be non-negative.

required text bool

If True, print the run-times in text format.

True chart bool

If True, plot the run-times using a chart.

False

Raises:

Type Description AssertionError

If input conditions are not satisfied.

"},{"location":"library/#algoesup.time.time_functions_int","title":"time_functions_int","text":"
time_functions_int(functions: list[Callable], generator: Callable = int_value, start: int = 1, double: int = 10, text: bool = True, chart: bool = True) -> None\n

Time functions that take a single integer as input.

time_functions_int uses time_functions to measure and display the run-times of a given list of functions that accept a single integer input. The integer inputs are generated starting from a specified value that defaults to 1, and are doubled a specified number of times that defaults to 10.

Parameters:

Name Type Description Default functions list[Callable]

A list of functions whose run-times will be measured. Each function must accept a single integer argument. Must be 1 to 6 functions.

required generator Callable

A function to generate integer inputs. Defaults to int_value, which returns a tuple containing the input integer.

int_value start int

The starting integer value for inputs. Defaults to 1. Must be positive.

1 double int

The number of times to double the input integer value. Defaults to 10. Must be non-negative.

10 text bool

If True, print the run-times in text format.

True chart bool

If True, plot the run-times using a chart.

True"},{"location":"library/#linting","title":"Linting","text":"

Linting tools for Jupyter Notebook environments

"},{"location":"library/#algoesup.magics.allowed","title":"allowed","text":"
allowed\n

Activate/deactivate the allowed linter.

When active, the linter checks each code cell that is executed for any Python constructs that are not listed in the given configuration file.

  • %allowed --config ... on activates the linter with the given configuration, which must be m269.json, tm112.json or one you defined
  • %allowed on is equal to %allowed --config m269.json on
  • %allowed off deactivates the linter
  • %allowed shows the current status of the linter
  • %allowed? shows this documentation and the command\u2019s options
"},{"location":"library/#algoesup.magics.pytype","title":"pytype","text":"
pytype\n

Activate/deactivate the pytype linter.

When active, the linter checks each code cell that is executed for type errors.

  • %pytype --disable ... on activates the linter but does not check the given errors (see the list of errors)
  • %pytype on is equal to %pytype --disable name-error,import-error on
  • %pytype off deactivates the linter
  • %pytype shows the current status of the linter
  • %pytype? shows this documentation and the command\u2019s options
"},{"location":"library/#algoesup.magics.ruff","title":"ruff","text":"
ruff\n

Activate/deactivate the Ruff linter.

When active, the linter checks each code cell that is executed against the selected code style rules.

  • %ruff --select ... --ignore ... on activates the linter with the given rules (see the list of rules)
  • %ruff on is equal to %ruff --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415 on
  • %ruff off deactivates the linter
  • %ruff shows the current status of the linter
  • %ruff? shows this documentation and the command\u2019s options
"},{"location":"writing/","title":"Writing guide","text":"

This document provides guidance on how to produce your essay.

Note

Although we wish to accommodate novice programmers in the future, the guide currently has data structures and algorithms students in mind.

An essay can have more than one author, although more than two is harder to manage. Deepnote and Colab make it easy to work collaboratively on a single notebook, at the same time or asynchronously, and leave comments to co-authors. You may wish to first pitch your essay idea to your peers, to recruit co-authors.

In the rest of this guide, \u2018you\u2019 and \u2018your\u2019 are both singular and plural pronouns, to refer simultaneously to a single author or multiple authors.

Note

You may wish to keep this guide open while going through your copy of our template.

"},{"location":"writing/#problem","title":"Problem","text":"

It\u2019s worth spending time on choosing an appropriate problem before putting effort into an essay about it. You may invent your own problem or select an existing one. For example, it may be a non-assessed exercise from your course, or it may relate to your hobby or work. In so, provide any information the reader needs to understand the problem. If the problem is from your work, get permission from your employer or client.

There are many websites with thousands of algorithmic problems to choose from. We have used Kattis and LeetCode in the past.

Some sites, like LeetCode, tag their problems with the data structure or algorithmic technique needed, like \u2018array\u2019 or \u2018sorting\u2019. This helps you find problems about a particular topic.

Some sites, like LeetCode, have official and user-provided solutions, but the latter may be terse (single-letter identifiers, no comments) or not fully analysed. Other sites, like the International Olympiad in Informatics, often have just solution hints or outlines. You may thus wish to write an essay that fully implements a solution outline or that improves and compares several user solutions. Either way would be useful to the user community of those sites.

It is often said that the best way to learn a topic is to have to explain it to others. You may thus wish to pick a problem on a topic you\u2019re not comfortable with, choose two existing solutions, and explain them in an essay.

If you\u2019re undecided, make a shortlist of 2\u20133 problems and ask your peers for their opinion.

"},{"location":"writing/#text","title":"Text","text":"

An essay presents two or more algorithmic solutions for a computational problem, and concludes which one is better, according to some criteria. Possible criteria include:

  • time and space complexity
  • empirical run-times and memory used
  • simplicity of the solution
  • ease of adapting the solution to similar problems.

The essay should thus have a clear narrative, going from the problem to the conclusion.

An algorithmic essay contains more text than code, and while code can and should have comments, the text carries most of the explanation. It\u2019s thus important for the text to be clear and error-free.

Deepnote notebooks can have rich-text cells (headings, paragraph, bullet item, etc.) that, contrary to the Markdown cells, are spell-checked as you write the text and support keyboard shortcuts, like Ctrl + B to put the selected text in bold. Unless you want to keep your essays in Deepnote, we do not recommend using rich-text cells, as their formatting is lost when downloading the notebook to your computer.

Essays can be written in any style: it\u2019s a personal choice. For example, you can use \u2018we\u2019, \u2018I\u2019 or an impersonal form.

"},{"location":"writing/#structure","title":"Structure","text":"

An essay starts with a title that states the problem or the algorithmic technique to be used. Next, put your name(s) and the current date, which should be updated whenever you edit the essay.

Next, without any heading, comes the introduction. It should state what the essay is about. Normally an essay\u2019s aim is to solve a particular problem, but it may also illustrate a general technique, like space-time trade-offs or recursion, or highlight an issue, like the difference between complexity analysis and run-time performance.

The introduction should also state what you assume the reader to know, as no essay can explain everything from first principles. For example, tell the reader that they must know about binary trees to understand your essay.

Following the introduction, use section headings to structure your essay, for example:

  • Problem: this section describes the problem, with some examples.
  • Algorithms: this section outlines two or more algorithms that solve the problem and their complexity.
  • Implementations: this section implements and tests only the most promising algorithms.
  • Comparison: this section compares the implemented algorithms according to other criteria, e.g. their run-times.
  • Conclusion: this section summarises the findings and concludes which approach is best.

The algorithms and implementations sections may have subsections, one per algorithm.

An alternative structure implements each approach before evaluating all of them:

  • Problem: this section describes the problem, with some examples.
  • First approach: this section outlines an algorithm, implements it and tests it.
  • Second approach: this section presents another algorithm and its implementation.
  • \u2026: further sections, one per approach.
  • Evaluation: this section states the criteria to be used and evaluates each approach according to them.
  • Conclusion: this section summarises the findings and concludes which approach is best.

If the problem description is a single paragraph, you may include it in the introduction, rather than having a separate section. If you didn\u2019t invent the problem, indicate its source, e.g. by providing a link to a website or by writing something like \u201cThis is problem 4.5 in [book title] by [author].\u201d

"},{"location":"writing/#algorithms","title":"Algorithms","text":"

You should choose at least two sufficiently different algorithms that solve the problem, and describe each one succinctly, preferably before implementing it, to make the code easier to understand for the reader.

We recommend to not describe algorithms that are only slight variants of each other, as this is usually of little interest, and to only include two algorithms in your first draft.

If you\u2019re using solutions by others, e.g. by LeetCode users, acknowledge the original author and provide a link to their solution. If you have modified their solutions, state what you have changed and explain why.

You should include worst-case complexity analyses of the various solutions you propose, as this helps discard the inefficient ones that may not be worth implementing.

"},{"location":"writing/#code","title":"Code","text":"

Your code should be correct, simple, and as readable as possible. Unless the aim of your essay is to discuss advanced Python constructs, try to use only a basic subset of the language. This allows more people, including those with limited knowledge of Python, to understand your code. It also makes your code easier to port to other programming languages.

We recommend the following workflow, which is further explained in the following subsections.

  1. Write the tests for your algorithms.
  2. Implement the algorithms and run the tests.
  3. Typecheck your code as you run each cell.
  4. Format your code, cell by cell.
  5. Check the code style as you run each cell.

Writing the tests (step 1) before the code they test (step 2) is a cornerstone of test-driven development, a widely used practice. Thinking of the tests early in the process helps you better understand the problem and think of correct solutions.

Info

If you followed our \u2018getting started\u2019 instructions, the software mentioned in the next subsections to carry out the above workflow is already installed.

"},{"location":"writing/#testing","title":"Testing","text":"

You should write tests for each function, to have some assurance that it is correct. Tests that check the behaviour of a single function are called unit tests. The unit tests should cover normal cases and edge cases: extreme input values and inputs that lead to extreme output values.

For each input, the smallest possible value, e.g. zero or the empty list, is an edge case, and so is the largest possible value, if there is one for that input. If a function is doing a search for an item in a list, then edge cases would be the item being at the start, at the end, or not occurring at all. If the output is a list, then inputs that produce the empty list are edge cases too. In summary, try to think of the \u2018trickiest\u2019 inputs the algorithm has to cope with.

We provide a small library to support algorithmic essays: algoesup. It allows you to easily write and run unit tests. Here\u2019s an example. (The # fmt: off and # fmt: on lines will be explained later.)

from algoesup import test\n\n# function to be tested\ndef absolute_difference(x: int, y: int) -> int:\n    \"\"\"Return the absolute value of the difference between x and y.\"\"\"\n    return x - y  # deliberately wrong, should be abs(x - y)\n\n# fmt: off\n# unit tests in tabular form, one test per row\nunit_tests = [\n    # test case,  x,    y,    expected result\n    (\"x == y\",    1,    1,    0),\n    (\"x > y\",     10,   -1,   11),\n    (\"x < y\",     -1,   10,   11),\n]\n# fmt: on\n\n# run the function on all test inputs and compare the actual and expected outputs\ntest(absolute_difference, unit_tests)\n

Output

Testing absolute_difference\u2026 x < y FAILED: -11 instead of 11 Tests finished: 2 passed, 1 failed.

A unit test consists of the input values to pass to your function and the output value you\u2019re expecting. The library requires a short descriptive string for each unit test, so that it can indicate which tests failed. The library expects unit tests to be in tabular format: one row per test, and one column for the description, one column for each input, and one column for the expected output. In the example above, the test table is a list of tuples, but it could as well be a list of lists, a tuple of lists, or a tuple of tuples.

You should reuse the test table for all solutions, because they\u2019re about the same problem. Here\u2019s a correct function that passes all test cases.

def absolute_difference_without_abs(x: int, y: int) -> int:\n    \"\"\"Return the absolute value of the difference between x and y.\n\n    This solution doesn't use the built-in abs() function.\n    \"\"\"\n    if x > y:\n        return x - y\n    else:\n        return y - x\n\ntest(absolute_difference_without_abs, unit_tests) # same test table\n

Output

Testing absolute_difference_without_abs\u2026 Tests finished: 3 passed, 0 failed.

"},{"location":"writing/#type-checking","title":"Type checking","text":"

As the above examples show, your code should contain type hints like x: int and ... -> int to indicate the type of the input and of the output. They make your code easier to understand, and help type checkers detect any type mismatches, like passing a string instead of an integer.

The algoesup library also provides an extension for Jupyter notebooks, which you must load first.

%load_ext algoesup.magics\n
(Magics are special commands that can change the behaviour of running a code cell.) You can now turn on type checking as follows.
%pytype on\n

Output

pytype was activated

Words that start with % are special commands (\u2018magics\u2019) for IPython, the Python interpreter used by Jupyter notebooks. The %pytype command, provided by our library, activates Google\u2019s pytype type checker.

Once the type checker is activated, it checks each cell immediately after it\u2019s executed. In this way you can detect and fix errors as you write and run each code cell. Here\u2019s an example of what happens.

def double(x: int) -> int:\n    \"\"\"Return twice the value of x.\"\"\"\n    return x * 2\n\ndouble([4])\n

Output

[4, 4]

pytype found issues:

  • 5: Function double was called with the wrong arguments [wrong-arg-types]

The function is executed and produces an output because lists can also be \u2018multiplied\u2019 with an integer, but the type checker detects that line 5 should have passed integers, not lists of integers, to the function. Clicking on the error name in square brackets leads you to pytype\u2019s website, with more info.

When a type checker only processes one cell at a time, it is missing the wider context, like the previously defined functions. Therefore, pytype won\u2019t spot all type errors. However, some checking is better than no checking.

The type checker adds some seconds to the overall time to run each code cell. You may thus wish to initially turn off the type checking, with %pytype off, and only turn it on after all code is written and tested. You will have to run all cells of your notebook for the type checking to take place.

For a list of all the options for the %pytype command, see the library reference.

"},{"location":"writing/#formatting","title":"Formatting","text":"

Note

This subsection only applies to Deepnote.

Once you have written, tested and type checked all your code, you should format it so that it follows the Python community\u2019s code style. You will need to format each cell, as explained here.

If there\u2019s a block of code that you don\u2019t want the formatter to change, write # fmt: off on its own line before the block and write # fmt: on after the block, to temporarily switch off formatting for that block. This feature is especially useful for keeping the format of unit test tables, as shown in an earlier example.

The Deepnote formatter automatically enforces simple formatting conventions, like 4 spaces for indentation and 2 empty lines between functions, so you will see fewer warnings in the next stage.

"},{"location":"writing/#linting","title":"Linting","text":"

You should lint your code, which means to check it for style violations.

"},{"location":"writing/#code-style","title":"Code style","text":"

Our library support ruff, the fastest Python linter. To turn it on, write the following after loading the algoesup.magics extension.

%ruff on\n

Output

ruff was activated

From now on, each cell is automatically linted after it\u2019s executed. Here\u2019s an example:

l = [1, 2, 3]\nif (not 5 in l) == True:\n    print(\"5 isn't in the list\")\n

Output

5 isn\u2019t in the list

ruff found issues:

  • 1: [E741] Ambiguous variable name: l
  • 2: [PLR2004] Magic value used in comparison, consider replacing 5 with a constant variable
  • 2: [E713] Test for membership should be not in. Suggested fix: Convert to not in
  • 2: [E712] Comparison to True should be cond is True or if cond:. Suggested fix: Replace with cond is True

Every message indicates the line of the problem.

  • The first message is trying to tell us that l can be misread for 1 (one).
  • The second message recommends using constants, like EXPECTED_VALUE, instead of literals like 5 that are meaningless to the reader.
  • The third message tells us that it\u2019s better to write 5 not in l.
  • The last message says that == True should be is True or simply omitted. The suggested fix is not appropriate for this if-statement: it should be if 5 not in l:.

As this code cell shows, ruff sometimes suggests how to fix the reported error, but you must consider whether the suggestion is appropriate.

If you don\u2019t understand an error message, like the first one, click on the error code in brackets, to get more information from ruff\u2019s website.

Like for type checking, linting one cell at a time means that the linter is unaware of the wider context of your code. For example, in notebooks, variables may be defined in one cell but used in a later cell. As the linter checks each cell separately, it would report an undefined variable in the later cell. We have disabled checks for undefined variables and other checks that would lead to irrelevant error messages in notebooks, which means that genuine undefined variables won\u2019t be flagged. But again, some linting is better than none.

If you get errors that you think are irrelevant, you can disable them with the --ignore option: see the library reference.

"},{"location":"writing/#language-subset","title":"Language subset","text":"

Our library also supports the allowed linter, created by ourselves. It checks whether your code only uses a certain subset of the Python language. This gives you some reassurance that your code will be understood by a wide audience.

By default, allowed checks against the Python subset used in our algorithms and data structures course. So, if you\u2019re an M269 student, to check that your essay is easily understood by your peers in terms of Python constructs, just add the following after loading the extension:

%allowed on\n

Output

allowed was activated

Henceforth, after a cell is executed, the allowed linter will list any constructs, modules or built-in types we haven\u2019t taught, like this:

from math import pi, sin\n\nprint(f\"\u03c0 is approximately {pi:.5f}.\")\n

Output

\u03c0 is approximately 3.14159.

allowed found issues:

  • 1: sin
  • 3: f-string

We haven\u2019t taught the math.sin() function nor f-strings, and allowed reports these.

Any line that ends with the comment # allowed is ignored. This is useful when you don\u2019t want the linter to flag a construct that you explain in your essay. For example, adding the comment after print(...) would not report the f-string. Note that the comment makes the tool skip the whole line: if it has several constructs that weren\u2019t taught, none of them is reported.

The allowed linter also includes the configuration for TM112, our introductory Computing course, in case you want to use even fewer constructs in your essay. To use that configuration, write %allowed --config tm112.json on. For a list of all the options for the %allowed command, see the library reference.

You can configure the linter with a JSON file that lists the allowed constructs. In Deepnote, rename the allowed.json JSON configuration in the Files section of your project, and adapt it to your course. See the allowed website for instructions.

"},{"location":"writing/#performance-analysis","title":"Performance analysis","text":"

Complexity analysis gives an indication of how the run-times will grow as the inputs grow, but it can\u2019t predict the exact run-times nor which algorithm is in practice fastest.

Our library helps measure and plot the run-times of one function on different kinds of input, or of different functions on the same inputs.

For example, let\u2019s suppose our essay is about sorting algorithms and we have implemented selection sort.

def selection_sort(values: list[int]) -> list[int]:\n    \"\"\"Return a copy of the values, in ascending order.\"\"\"\n    result = values[:]\n    for current in range(len(result) - 1):\n        # select the smallest element in result[current:] ...\n        smallest = current\n        for index in range(current + 1, len(result)):\n            if result[index] < result[smallest]:\n                smallest = index\n        # ... and swap it with the current element\n        result[current], result[smallest] = result[smallest], result[current]\n    return result\n

"},{"location":"writing/#generating-inputs","title":"Generating inputs","text":"

To measure the run-times of sorting algorithms on increasingly large lists, we must implement functions that generate such lists. For example, we can write a function that generates lists that are already in ascending order, which is a best case for many sorting algorithms, and a function that generates lists that are in descending order, which is a worst case for some sorting algorithms.

The library expects such input-generating functions to take a non-negative integer n, and to produce a tuple of input values, with total size n. Why a tuple? Although our sorting algorithm takes a single input (a list of integers), many algorithms take more than one input. Thus the input-generating functions must generate a tuple of inputs, in the same order as expected by the algorithm.

def ascending(n: int) -> tuple[list[int]]:\n    \"\"\"Return a list of n integers in ascending order.\"\"\"\n    return (list(range(1, n + 1)),)  # trailing comma to make it a tuple\n\ndef descending(n: int) -> tuple[list[int]]:\n    \"\"\"Return a list of n integers in descending order.\"\"\"\n    return (list(range(n, 0, -1)),)\n
We should of course test these functions, to make sure they produce the expected lists, but we will skip that in this explanation because we\u2019re focusing on how to measure run-times.

"},{"location":"writing/#comparing-cases","title":"Comparing cases","text":"

To measure the run-times of a function f on best, average and worst case inputs, use library function time_cases(f, [case1, case2, ...], s, d). The second argument can be a list (or tuple) of up to 6 input-generating functions. The time_cases function works as follows.

  1. Call case1(s) to generate inputs of size s for f.
  2. Run function f on the generated inputs and measure its run-time.
  3. Do the two previous steps with each of the functions case2, ....
  4. Set s to double its value and go back to step 1.

The whole process stops when s has been doubled d times. If d is zero, the run-times are only measured for size s.

Here\u2019s how we could measure the run-times for selection sort on ascending and descending lists.

from algoesup import time_cases\n\ntime_cases(selection_sort, [ascending, descending], start=100, double=4)\n

Output

Run-times for selection_sort\n\nInput size       ascending      descending\n       100           168.2           173.2 \u00b5s\n       200           643.2           660.6 \u00b5s\n       400          2716.7          2817.9 \u00b5s\n       800         11072.4         11407.3 \u00b5s\n      1600         44285.3         45512.7 \u00b5s\n

Running selection sort on lists from 100 to 1600 integers takes about 170 microseconds to 45 milliseconds. To measure precisely such small time spans, function f (here, selection_sort) is called multiple times on the same input, within a loop, and the total time is divided by the number of iterations, to obtain a better estimate of the time taken by a single call to f. The whole process is repeated 3 times, because the run-times will vary due to other processes running on the computer. The lowest of the 3 run-times is reported.

Because function f is called multiple times, it is very important that f does not modify its inputs. For example, if selection_sort sorted the list in-place, instead of returning a new list, then the first call would put the numbers in ascending order, and the subsequent calls would just try to sort an already sorted list, swapping no numbers. We would obtain almost exact same times for ascending and descending input lists, instead of always larger times for descending lists, as shown above.

When executing a code like the previous one, be patient while waiting for the results. Even though each call may just take a few milliseconds or less, the code cell will take several seconds or even minutes to execute, because the function is called many times to make the measurements more robust.

"},{"location":"writing/#comparing-functions","title":"Comparing functions","text":"

Our library also allows you to compare different algorithms for the same input case. For that, use time_functions([f1, f2, ...], case, s, d), which does the following: 1. Call case(s) to generate inputs of total size s. 2. Call each function f1, f2, etc. on the generated inputs and measure their run-times. 3. Double the value of s and go to step 1, unless s has been doubled d times.

The run-times are measured as for time_cases: take the best of 3 run-times, each obtained by calling the function within a loop and dividing the total time by the number of iterations.

Here\u2019s a comparison of the built-in sorted function against selection sort, on descending lists.

from algoesup import time_functions\n\ntime_functions([selection_sort, sorted], descending, start=100, double=4)\n

Output

Inputs generated by descending\n\nInput size  selection_sort          sorted\n       100           172.8             0.5 \u00b5s\n       200           660.7             0.8 \u00b5s\n       400          2795.7             1.6 \u00b5s\n       800         11534.0             3.1 \u00b5s\n      1600         45470.0             5.9 \u00b5s\n

As expected, the built-in sorting implementation is much, much faster.

"},{"location":"writing/#charting-run-times","title":"Charting run-times","text":"

If you add argument chart=True to time_cases or time_functions, then you will see a line chart of the run-times, in addition to the exact run-times. If you only want to see the chart, then add arguments text=False, chart=True.

time_cases(sorted, [ascending, descending], 100, 4, text=False, chart=True)\n

Output

time_functions([sorted, selection_sort], ascending, 100, 4, chart=True)\n

Output

Inputs generated by ascending\n\nInput size          sorted  selection_sort\n       100           482.1        168025.5 ns\n       200           808.3        646093.8 ns\n       400          1496.9       2720369.2 ns\n       800          2850.4      11090135.4 ns\n      1600          5553.0      44372758.4 ns\n

The 1e7 above the y-axis means that the run-times must be multiplied by 10\u2077, i.e. 10 million.

Note that when calling time_functions([selection_sort, sorted], ...) the run-times were reported in microseconds, but when calling time_functions([sorted, selection_sort], ...) they were in nanoseconds. The reason is that the library chooses the time unit based on the first run-time measured. If there\u2019s a big time difference between the fastest and slowest cases or algorithms, you may wish for the first function in the list to be the slowest one, to report it with small values in a \u2018large\u2019 time unit, instead of very large values in a \u2018small\u2019 time unit. So, in time_functions([f1, f2, ...], case, ...) the slowest function should be f1, and in time_cases(f, [case1, case2, ...], ...) the worst case should be case1.

"},{"location":"writing/#interpreting-run-times","title":"Interpreting run-times","text":"

If, as the input size doubles, the run-times\u2026

  • \u2026remain the same, then the function has constant complexity.
  • \u2026also double, then the function has linear complexity.
  • \u2026quadruple, then the function has quadratic complexity.
  • \u2026increase by a fixed amount, then the function has logarithmic complexity.

Looking at the run-times reported in the previous subsections, we can see that sorted is linear because the run-times about double when the input size doubles, whereas selection sort is quadratic because the run-times increase about 4-fold when the input size doubles.

Remember that run-times vary every time you execute a cell because the computer is executing other processes. This may lead to the odd behaviour here and there. For example, we have noted that sorted is occasionally faster for descending lists, which is counter-intuitive because it does have to reverse them.

If you can\u2019t see any trend in the run-times, or they aren\u2019t what you expect, one possible cause is that the input sizes are too small. Increase start and run again the code cell.

If after increasing the start size several times you still don\u2019t get the run-times you expect from your complexity analysis, then there might be other explanations:

  • your complexity analysis is wrong
  • your implemented algorithm modifies its input
  • your input-generating functions are not generating best or worst cases.

For an example of the latter, see the Jewels and Stones essay.

"},{"location":"writing/#final-check","title":"Final check","text":"

Whether it\u2019s your essay\u2019s first draft or final version, before you share it with others, you should restart the kernel and run all cells, so that you have a \u2018clean\u2019 version. Then, after a break, read your essay with \u2018fresh eyes\u2019 from start to end and fix any typos or missing explanations you find.

Look at the table of contents of your notebook and check that your section headings are at the right level.

Info

In Deepnote, the table of contents is on the left sidebar; in Colab, you must click the bullet list icon in the left sidebar.

Finally, let others comment on your essay and help you produce a better version. See our feedback guide for details.

"},{"location":"writing/#further-reading","title":"Further reading","text":"

If you\u2019re interested and have the time, here are further details on some of the above.

  • Strunk and White\u2019s The Elements of Style is a classic. The examples are dated but the advice is good.
  • The websites of allowed, pytype and ruff.
  • A summary of Python\u2019s type hints provided by the mypy project (another type checker).
  • The Python code style and docstring conventions.
  • The formatting style enforced by black, which we suspect is the formatter used by Deepnote. Deepnote ignores the # fmt: skip directive to not format a single line.
"},{"location":"essays/example-1-to-n/","title":"Sum of 1 to n","text":"

This short notebook provides a very simple example of an algorithmic essay and shows the support that our library provides.

The problem to be solved is to compute the sum of the integers, 1, 2, ..., n, for some given n. Two solutions will be presented, both only using basic Python.

The first thing to do is to import the necessary functions from the algorithmic essays support library. The functions will be explained later, when they're used.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[1]: Copied!
# import functions to test the code and to measure the execution time\nfrom algoesup import test, time_functions_int\n
# import functions to test the code and to measure the execution time from algoesup import test, time_functions_int

The library also includes two commands to turn on the tools that will check the code as it's executed. There won't be any messages in this notebook because the code passes all checks.

In\u00a0[2]: Copied!
%load_ext algoesup.magics\n# check the code for style violations\n%ruff on\n# check that only the subset of Python taught in our introductory course TM112 is used\n%allowed --config tm112.json on\n
%load_ext algoesup.magics # check the code for style violations %ruff on # check that only the subset of Python taught in our introductory course TM112 is used %allowed --config tm112.json on
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
tests = [\n    # case            n,    sum\n    [\"no integers\",  -1,      0],\n    [\"one integer\",   1,      1],\n    [\"n is even\",     4,     10],  # 1 + 2 + 3 + 4 = 10\n    [\"n is odd\",      7,     28],  # 1 + 2 + 3 + 4 + 5 + 6 + 7 = 28\n]\n
tests = [ # case n, sum [\"no integers\", -1, 0], [\"one integer\", 1, 1], [\"n is even\", 4, 10], # 1 + 2 + 3 + 4 = 10 [\"n is odd\", 7, 28], # 1 + 2 + 3 + 4 + 5 + 6 + 7 = 28 ] In\u00a0[4]: Copied!
def sum_with_loop(n):\n    \"\"\"Return 1 + 2 + ... + n, using a loop.\"\"\"\n    total = 0\n    for number in range(1, n + 1):\n        total = total + number\n    return total\n
def sum_with_loop(n): \"\"\"Return 1 + 2 + ... + n, using a loop.\"\"\" total = 0 for number in range(1, n + 1): total = total + number return total

The next step is to check the code with the imported test function, which takes two arguments: the function to be tested and a test table. Every test case in the test table is checked and if the actual output isn't the expected output, the test is reported as having failed.

In\u00a0[5]: Copied!
test(sum_with_loop, tests)\n
test(sum_with_loop, tests)
Testing sum_with_loop...\nTests finished: 4 passed, 0 failed.\n

Algorithm 1 passes all tests. Since the algorithm does exactly what the problem asks for, the only test that could fail is for n < 1. However, in that case the loop is skipped and the returned total is zero, as desired.

In\u00a0[6]: Copied!
def sum_with_formula(n):\n    \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\"\n    return n * (n + 1) // 2\n
def sum_with_formula(n): \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\" return n * (n + 1) // 2

This function is solving the same problem as the previous one, so it must be tested with the exact same tests.

In\u00a0[7]: Copied!
test(sum_with_formula, tests)\n
test(sum_with_formula, tests)
Testing sum_with_formula...\nTests finished: 4 passed, 0 failed.\n

The code passes the tests, and yet the algorithm is wrong! The formula is only meant for n \u2265 1. It just happens that if n = 0 or n = -1 then n \u00b7 (n+1) / 2 = 0, as desired, but for n < -1 the result is non-zero. An additional test helps confirm this.

In\u00a0[8]: Copied!
tests.append([\"n < -1\", -5, 0])  # testing with n = -5\n\ntest(sum_with_formula, tests)\n
tests.append([\"n < -1\", -5, 0]) # testing with n = -5 test(sum_with_formula, tests)
Testing sum_with_formula...\nn < -1 FAILED: 10 instead of 0\nTests finished: 4 passed, 1 failed.\n

The result is -5 \u00b7 (\u20135 + 1) / 2 = -5 \u00b7 -4 / 2 = 10 instead of zero. The algorithm must be modified.

Algorithm 2 (corrected): If n < 1, return 0, otherwise return n \u00b7 (n+1) / 2.

In\u00a0[9]: Copied!
def sum_with_formula(n):\n    \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\"\n    if n < 1:\n        return 0\n    else:\n        return n * (n + 1) // 2\n\ntest(sum_with_formula, tests)\n
def sum_with_formula(n): \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\" if n < 1: return 0 else: return n * (n + 1) // 2 test(sum_with_formula, tests)
Testing sum_with_formula...\nTests finished: 5 passed, 0 failed.\n

Now the additional test also passes.

As a new test was added, the first algorithm must be tested again.

In\u00a0[10]: Copied!
test(sum_with_loop, tests)\n
test(sum_with_loop, tests)
Testing sum_with_loop...\nTests finished: 5 passed, 0 failed.\n
In\u00a0[11]: Copied!
time_functions_int([sum_with_loop, sum_with_formula])\n
time_functions_int([sum_with_loop, sum_with_formula])
Inputs generated by int_value\n\nInput value   sum_with_loop sum_with_formul \n          1           151.1            87.3 ns\n          2           178.9            87.5 ns\n          4           209.2            87.6 ns\n          8           278.3            88.1 ns\n         16           401.5            94.6 ns\n         32           701.0            99.0 ns\n         64          1359.7            99.2 ns\n        128          2610.7            99.2 ns\n        256          5144.2           102.8 ns\n        512         11148.8           102.8 ns\n       1024         23216.0           102.7 ns

Computing the sum with the formula always takes about the same time, around 90 to 100 nanoseconds (that's what the 'ns' means). However, the run-times for the sum with the loop grow as n gets larger. If the value of n doubles, the number of iterations also doubles, so the run-time also roughly doubles, as can be seen above the chart. As n increases, the iterative sum gets slower and slower. At n = 1024, using a loop is about 200 times slower than using the formula!

"},{"location":"essays/example-1-to-n/#sum-of-1-to-n","title":"Sum of 1 to n\u00b6","text":"

Michel Wermelinger, 22 January 2024, last updated 16 March 2024

"},{"location":"essays/example-1-to-n/#tests","title":"Tests\u00b6","text":"

Before thinking of a solution, it's best to write down some tests, as they help our understanding of the problem. Each test is the input integer n and the corresponding expected integer output 1 + 2 + ... + n.

The tests should include edge cases: inputs that are extreme values or that lead to extreme outputs. For this problem, edge cases are when there are no integers to add up (n < 1) or when there's only one (n = 1).

Tests should also include normal cases. When the input is an integer, tests typically include odd and even values.

The test function that was imported in the previous cell expects the tests to be written as a table, e.g. as a list of lists. The table must have one row per test case. Each row has a column with a string describing the case, one column per input, and a final column with the expected output.

"},{"location":"essays/example-1-to-n/#solutions","title":"Solutions\u00b6","text":"

The next step is to think of possible algorithms that solve the problem.

"},{"location":"essays/example-1-to-n/#using-a-loop","title":"Using a loop\u00b6","text":"

The probably most obvious algorithm is to explicitly calculate the sum, by iterating over the integers from 1 to n.

Algorithm 1: Set the total to zero. For each value from 1 to n, add the value to the total. Return the total.

The algorithm is implemented as follows.

"},{"location":"essays/example-1-to-n/#using-a-formula","title":"Using a formula\u00b6","text":"

It's possible to compute the sum directly, using a well-known formula, taught in many introductory math courses:

1 + 2 + 3 + ... + (n - 2) + (n - 1) + n = (1 + n) + (2 + n - 1) + (3 + n - 2) + ... = (n + 1) \u00b7 n / 2.

The formula is based on 'pairing up' the first with the last number, the second with the next to last, and so on. Each pair adds up to n + 1, and the number of pairs is half of n. The algorithm is simply the formula:

Algorithm 2: Return n \u00b7 (n+1) / 2.

"},{"location":"essays/example-1-to-n/#performance","title":"Performance\u00b6","text":"

The final, optional, step is to compare the performance of both solutions to see which is fastest. For this problem, the second algorithm is expected to be faster that the first one, as it computes the sum directly, without iterating over n integers.

The time_functions_int function, which was imported in the first cell, takes a list of functions that have a single integer n as input, and measures their run-times for n = 1, 2, 4, 8, ..., 1024. Doubling the input allows to see any trends in how run-times grow with just eleven executions of the two functions. Running the functions with n = 1, 2, 3, 4, 5, ..., 1024 would take much longer and not produce any additional insight.

"},{"location":"essays/example-1-to-n/#concluding-remarks","title":"Concluding remarks\u00b6","text":"

This essay presented two solutions to a simple problem: computing the sum 1 + ... + n, for any integer n. The sum should be zero if n < 1.

The first solution computes the sum iteratively, while the second computes it directly, with a formula. The second is always faster, even for n = 1. Moreover, as n increases, the first approach becomes slower whereas the second always takes about the same time.

This essay illustrates the need for thinking about different algorithms for the same problem, as one approach may be much more efficient than others.

The essay also showed that passing all tests doesn't mean that the code is correct. As the various algorithms are developed, new tests may have to be added.

"},{"location":"essays/example-jewels/","title":"Jewels and Stones","text":"

In this example algorithmic essay I will tackle LeetCode problem Jewels and Stones. The inputs are two strings jewels and stones, where each character represents a type of stone, e.g. A for amethyst, D for diamond, b for basalt and ? for an unknown stone. The jewels string contains the characters that represent jewels, without repetition. The desired output is the number of stones that are jewels. For example, if jewels is \"AD\" and stones is \"b?AbDAb\" then the output is 3 because three of the stones are jewels (two amethysts and one diamond). To be clear, which stones and jewels the characters represent doesn't really matter: the output is simply how many characters of the second string occur in the first string.

This essay will illustrate a space-time trade-off: by using an additional data structure, we get a faster algorithm. I assume you're familiar with sets, bags (multisets) and basic complexity analysis. This essay uses Big-Theta notation \u0398(...), but if you're not familiar with it, you can read it as Big-Oh notation O(...) for the purposes of the analyses done below. Sorting, binary search, and log-linear and logarithmic complexities are mentioned in Section 2.3, but you can skip it, as it won't affect the understanding of the rest of the essay.

Before starting to solve the problem, I import the necessary functions from algoesup, the algorithmic essays support library, and turn on the tools that will check my code for style and type violations (with Ruff and pytype), and for constructs I didn't teach (with allowed).

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[1]: Copied!
from algoesup import test, time_cases, time_functions\n
from algoesup import test, time_cases, time_functions In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%pytype on\n%allowed on\n
%load_ext algoesup.magics %ruff on %pytype on %allowed on
ruff was activated\npytype was activated\nallowed was activated\n
In\u00a0[3]: Copied!
# don't allow code formatters to change this table\n# fmt: off\ntests = [\n    # case                  jewels  stones  output\n    (\"no jewels\",           \"\",     \"abc\",      0),\n    (\"no stones\",           \"DA\",   \"\",         0),\n    (\"both empty\",          \"\",     \"\",         0),\n    (\"none is a jewel\",     \"DA\",   \"dada?\",    0),\n    (\"equal strings\",       \"DA\",   \"DA\",       2),\n    (\"all same jewel\",      \"DA\",   \"AAAA\",     4),\n    (\"some jewels missing\", \"DARJ\", \"RRD\",      3),\n    (\"some not jewels\",     \"RAD\",  \"bRADAb\",   4),\n]\n# allow code formatters to change the following cells\n# fmt: on\n
# don't allow code formatters to change this table # fmt: off tests = [ # case jewels stones output (\"no jewels\", \"\", \"abc\", 0), (\"no stones\", \"DA\", \"\", 0), (\"both empty\", \"\", \"\", 0), (\"none is a jewel\", \"DA\", \"dada?\", 0), (\"equal strings\", \"DA\", \"DA\", 2), (\"all same jewel\", \"DA\", \"AAAA\", 4), (\"some jewels missing\", \"DARJ\", \"RRD\", 3), (\"some not jewels\", \"RAD\", \"bRADAb\", 4), ] # allow code formatters to change the following cells # fmt: on In\u00a0[4]: Copied!
def count_in_string(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 2: for each stone, check if it's a jewel.\n    Complexity: best \u0398(len(stones)); worst \u0398(len(stones) * len(jewels))\n    \"\"\"\n    counter = 0\n    for stone in stones:\n        if stone in jewels:\n            counter = counter + 1\n    return counter\n\n\n# test(function, test_table) runs `function` on each test in `test_table`\n# and prints the tests that fail (actual output isn't the expected output).\ntest(count_in_string, tests)\n
def count_in_string(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 2: for each stone, check if it's a jewel. Complexity: best \u0398(len(stones)); worst \u0398(len(stones) * len(jewels)) \"\"\" counter = 0 for stone in stones: if stone in jewels: counter = counter + 1 return counter # test(function, test_table) runs `function` on each test in `test_table` # and prints the tests that fail (actual output isn't the expected output). test(count_in_string, tests)
Testing count_in_string...\nTests finished: 8 passed, 0 failed.\n

The implementation of Algorithm 2 passes all tests. Next, Algorithm 3 can be implemented with Python's Counter class.

In\u00a0[5]: Copied!
from collections import Counter\n\n\ndef count_in_bag(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 3: put stones in a bag; add the frequencies of those that are jewels.\n    Complexity: \u0398(len(stones) + len(jewels))\n    \"\"\"\n    bag = Counter(stones)\n    counter = 0\n    for jewel in jewels:\n        counter = counter + bag[jewel]\n    return counter\n\n\ntest(count_in_bag, tests)\n
from collections import Counter def count_in_bag(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 3: put stones in a bag; add the frequencies of those that are jewels. Complexity: \u0398(len(stones) + len(jewels)) \"\"\" bag = Counter(stones) counter = 0 for jewel in jewels: counter = counter + bag[jewel] return counter test(count_in_bag, tests)
Testing count_in_bag...\nTests finished: 8 passed, 0 failed.\n

Finally, Algorithm 4 can be implemented with Python's built-in set type.

In\u00a0[6]: Copied!
def count_in_set(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 4: put jewels in a set; count the stones that are in the set.\n    Complexity: \u0398(len(jewels) + len(stones))\n    \"\"\"\n    jewel_set = set(jewels)\n    counter = 0\n    for stone in stones:\n        if stone in jewel_set:\n            counter = counter + 1\n    return counter\n\n\ntest(count_in_set, tests)\n
def count_in_set(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 4: put jewels in a set; count the stones that are in the set. Complexity: \u0398(len(jewels) + len(stones)) \"\"\" jewel_set = set(jewels) counter = 0 for stone in stones: if stone in jewel_set: counter = counter + 1 return counter test(count_in_set, tests)
Testing count_in_set...\nTests finished: 8 passed, 0 failed.\n
In\u00a0[7]: Copied!
def best_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a best-case input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - all stones are jewels[0]\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)\n    # create a string with s spaces (Unicode 32), the first character in jewels\n    stones = \" \" * s\n    return (jewels, stones)\n
def best_case(size: int) -> tuple[str, str]: \"\"\"Return a best-case input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - all stones are jewels[0] \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # create a string with s spaces (Unicode 32), the first character in jewels stones = \" \" * s return (jewels, stones)

allowed found issues:

  • 14: chr()

Note that the allowed tool reports that I haven't taught my students the chr function. That's why I explained it before presenting the code.

Moving on, I write some tests, to check that best_case is generating the right strings.

In\u00a0[8]: Copied!
# fmt: off\ntest(best_case, [\n    # case,           size, (jewels, stones)\n    (\"empty strings\", 0,    (\"\",     \"\")),\n    (\"no jewels\",     1,    (\"\",     \" \")),\n    (\"odd size\",      5,    (' !',   \"   \")),\n])\n# fmt: on\n
# fmt: off test(best_case, [ # case, size, (jewels, stones) (\"empty strings\", 0, (\"\", \"\")), (\"no jewels\", 1, (\"\", \" \")), (\"odd size\", 5, (' !', \" \")), ]) # fmt: on
Testing best_case...\nTests finished: 3 passed, 0 failed.\n

To generate a worst case, I have two options: no stone is a jewel or all stones are the last jewel (Unicode 31 + j). I choose the first one. (Spoiler alert: I made the wrong choice. Can you see why?)

In\u00a0[9]: Copied!
def worst_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a worst-case input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - no stone is a jewel\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)  # allowed\n    # create a string with s different characters (Unicode 32+j onwards)\n    stones = \"\"\n    for code in range(32 + j, 32 + j + s):\n        stones = stones + chr(code)  # allowed\n    return (jewels, stones)\n\n\n# fmt: off\ntest(worst_case, [\n    # case,           size, (jewels,    stones)\n    (\"empty strings\", 0,    (\"\",        \"\")),\n    (\"no jewels\",     1,    (\"\",        \" \")),\n    (\"odd size\",      11,   (' !\"#$',   \"%&'()*\")),  # 5 jewels, 6 stones\n])\n# fmt: on\n
def worst_case(size: int) -> tuple[str, str]: \"\"\"Return a worst-case input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - no stone is a jewel \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # allowed # create a string with s different characters (Unicode 32+j onwards) stones = \"\" for code in range(32 + j, 32 + j + s): stones = stones + chr(code) # allowed return (jewels, stones) # fmt: off test(worst_case, [ # case, size, (jewels, stones) (\"empty strings\", 0, (\"\", \"\")), (\"no jewels\", 1, (\"\", \" \")), (\"odd size\", 11, (' !\"#$', \"%&'()*\")), # 5 jewels, 6 stones ]) # fmt: on
Testing worst_case...\nTests finished: 3 passed, 0 failed.\n

The # allowed comment in lines 14 and 18 tells the tool that chr() is allowed, because it was explained, and hence should not be reported.

Finally I generate random typical cases in which not all stones are jewels and not all jewels occur in the stones.

In\u00a0[10]: Copied!
import random\n\n\ndef normal_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a typical input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - stones has some jewels and some non-jewels, if len(jewels) > 1\n    - not all jewels occur in stones, if len(jewels) > 1\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)  # allowed\n    # create a string with s random characters from Unicode 33 to 33 + 2j\n    stones = \"\"\n    for _ in range(s):\n        stones = stones + chr(random.randint(33, 33 + 2 * j))  # allowed\n    return (jewels, stones)\n\n\n# Can't test with a test table, because the output is random.\n(jewels, stones) = normal_case(20)\ncounter = count_in_string(jewels, stones)\nprint(\"Some stones are jewels:\", counter > 0)\nprint(\"Some stones aren't jewels:\", counter < len(stones))\n
import random def normal_case(size: int) -> tuple[str, str]: \"\"\"Return a typical input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - stones has some jewels and some non-jewels, if len(jewels) > 1 - not all jewels occur in stones, if len(jewels) > 1 \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # allowed # create a string with s random characters from Unicode 33 to 33 + 2j stones = \"\" for _ in range(s): stones = stones + chr(random.randint(33, 33 + 2 * j)) # allowed return (jewels, stones) # Can't test with a test table, because the output is random. (jewels, stones) = normal_case(20) counter = count_in_string(jewels, stones) print(\"Some stones are jewels:\", counter > 0) print(\"Some stones aren't jewels:\", counter < len(stones))
Some stones are jewels: True\nSome stones aren't jewels: True\n

The loop variable's name is just an underscore (line 21) to avoid a warning that the loop variable isn't used in the loop's body.

In\u00a0[11]: Copied!
cases = [best_case, normal_case, worst_case]\ntime_cases(count_in_string, cases, start=10, double=4)\n
cases = [best_case, normal_case, worst_case] time_cases(count_in_string, cases, start=10, double=4)
Run-times for count_in_string\n\nInput size       best_case     normal_case      worst_case \n        10           202.2           161.6           155.0 ns\n        20           332.7           346.1           296.7 ns\n        40           589.1           625.3           420.5 ns\n        80          1084.0          1085.0           768.1 ns\n       160          2080.8          2242.0          1536.6 ns

The results are baffling: the worst case is the fastest!

The linear search is done with the in operator, which is much faster than implementing the search in Python. Due to the very small input sizes, with at most 80 jewels and 80 stones, it doesn't really matter whether the search stops at the first jewel or goes through all the jewels.

Since the search takes about the same time in the best and worst cases, what makes the difference to the overall run-time is how often the counter is updated. In the best case (all stones are the first jewel) the counter is always incremented, whereas in the worst case (no stone is a jewel) the counter is never incremented. Hence, the worst case is faster than the best case.

The real issue is that I'm not generating worst cases. I'm not constructing inputs that make Algorithm 2 do the most work. A worst case should make the linear search go through all the jewels and increment the counter for each one. The worst case is actually when all stones are the last jewel.

I should go back, modify the worst_case function so that stones = chr(31 + j) * s, and run again the previous cell. Instead, I will run count_in_string on larger inputs. That will avoid the search taking about the same time whether a stone is the first jewel or not a jewel. This time I also ask for a graphical output.

In\u00a0[12]: Copied!
time_cases(count_in_string, cases, start=1000, double=4, chart=True)\n
time_cases(count_in_string, cases, start=1000, double=4, chart=True)
Run-times for count_in_string\n\nInput size       best_case     normal_case      worst_case \n      1000            14.5            34.8            33.1 \u00b5s\n      2000            30.2           109.4           113.9 \u00b5s\n      4000            61.6           327.2           378.0 \u00b5s\n      8000           123.6          1098.9          1339.7 \u00b5s\n     16000           248.4          3954.2          5003.5 \u00b5s

The best-case run-times double as the input size doubles, confirming the linear complexity. The worst-case run-times about quadruple as the input size doubles, confirming the quadratic complexity.

In\u00a0[13]: Copied!
algorithms = [count_in_string, count_in_set, count_in_bag]\ntime_functions(algorithms, normal_case, 1000, 4, chart=True)\n
algorithms = [count_in_string, count_in_set, count_in_bag] time_functions(algorithms, normal_case, 1000, 4, chart=True)
Inputs generated by normal_case\n\nInput size count_in_string    count_in_set    count_in_bag \n      1000            34.4            36.4            65.6 \u00b5s\n      2000           111.3            88.1           147.2 \u00b5s\n      4000           342.8           189.3           306.0 \u00b5s\n      8000          1115.9           406.8           641.9 \u00b5s\n     16000          4015.2           849.7          1302.0 \u00b5s

As expected, using a set or bag pays off as the input size increases: as there are more jewels and stones, the linear search takes longer, but looking up a stone in a set of jewels or a jewel in a bag of stones takes constant time. However, for small inputs, a linear search is fast and hence the extra time to create the additional data structure doesn't pay off.

Using a set of jewels is about twice as fast as using a bag of stones, maybe because set is built-in and implemented in C, a very fast language, whereas Counter is part of the standard library implemented in Python, which is much slower. This example shows that algorithms with the same complexity can perform differently in practice, because the complexity tells us how the run-times grow, but not how fast or slow they are.

"},{"location":"essays/example-jewels/#jewels-and-stones","title":"Jewels and Stones\u00b6","text":"

Michel Wermelinger, 23 January 2024, last updated 13 March 2024

"},{"location":"essays/example-jewels/#1-tests","title":"1 Tests\u00b6","text":"

Before thinking of any solution, I write down some tests. Each test is two input strings and the expected integer output. I need to think of edge cases: inputs that are extreme values or that lead to extreme outputs.

For this problem, one edge case is that none of the stones is a jewel, which can happen in several ways: there are no stones; there are no jewels; there are stones and jewels but no character in stones occurs in jewels. Note that the problem statement doesn't prevent the strings to be empty, so we must assume they may be.

Another edge case is that all stones are jewels, which again can happen in several ways: the two input strings are the same; all stones are the same jewel; each stone is a jewel but not all jewels are among the stones.

Besides the edge cases I must also consider normal cases, where the stones include jewels and non-jewels and where the stones don't appear in the same order as the jewels.

I write the tests succinctly as a table, with one row per test, and one column with a string describing the test, one column per input, and a final column with the expected output. Later, I will use a function from algoesup to run all the tests and report those where the actual outputs differ from the expected ones.

"},{"location":"essays/example-jewels/#2-algorithms","title":"2 Algorithms\u00b6","text":"

Next I think of possible algorithms and their complexity, to decide which ones are worth implementing.

"},{"location":"essays/example-jewels/#21-without-additional-data","title":"2.1 Without additional data\u00b6","text":"

I first attempt to solve the problem directly on the input strings. One approach that comes to mind is to count how often each jewel occurs among the stones.

Algorithm 1: Set a counter to zero. For each jewel, iterate over the stones. If a stone is the same as the current jewel, increment the counter. After going through all jewels, return the counter.

If j is the number of jewels and s the number of stones, then this algorithm always has complexity j \u00d7 \u0398(s) = \u0398(j \u00d7 s) because it does at most two constant-time operations, checking equality and incrementing the counter, for each jewel and stone.

The first approach searches each jewel among the stones. The symmetric approach is to search each stone among the jewels:

Algorithm 2: Set a counter to zero. For each stone, do a linear search for it among the jewels. If it is found, increment the counter. After going through all stones, return the counter.

In the best case, each stone is the first jewel, and the search takes constant time. The best-case complexity is s \u00d7 \u0398(1) = \u0398(s). In the worst case, the search goes through all the jewels, because the stone isn't a jewel or is the last jewel. The worst-case complexity is s \u00d7 \u0398(j) = \u0398(s \u00d7 j).

"},{"location":"essays/example-jewels/#22-with-additional-data","title":"2.2 With additional data\u00b6","text":"

My next attempts consider pre-processing the inputs to make the searches faster.

Algorithm 1 goes through the stones j times, each time counting how often a jewel occurs. It's more efficient to go through the stones only once, counting how often each stone occurs, and then add the counts of those that are jewels. We need to know the frequency of each stone and the natural data type for that is the bag (or multiset).

Algorithm 3: Put all stones in a bag. Initialise a counter to zero. For each jewel, add to the counter the frequency of that jewel in the bag. Return the value of the counter.

If the bag type is implemented efficiently, both adding an item and obtaining its frequency take constant time. The complexity of Algorithm 3 is thus s \u00d7 \u0398(1) + j \u00d7 \u0398(1) = \u0398(s + j).

Algorithm 2 checks if each stone is a jewel. Is there an abstract data type (ADT) that provides such an operation? Yes, the set ADT allows checking if an item is a member of a set, which leads to...

Algorithm 4: Put all jewels in a set. Initialise a counter to zero. For each stone, if it is a member of the set, increment the counter. Return the value of the counter.

If the set ADT is implemented efficiently, both adding an item and checking membership take constant time. The complexity of Algorithm 4 is thus j \u00d7 \u0398(1) + s \u00d7 \u0398(1) = \u0398(j + s).

"},{"location":"essays/example-jewels/#23-with-sorting","title":"2.3 With sorting\u00b6","text":"

The order of jewels and stones in the input strings doesn't affect the output (the number of stones that are jewels). I can thus sort the jewels, the stones, or both, to use logarithmic binary search instead of linear search.

However, sorting takes linear time in the best case and log-linear or quadratic time in the worst case. I already have algorithms that are linear in the total size of the input, so sorting wouldn't be more efficient. I therefore do not further pursue this approach.

"},{"location":"essays/example-jewels/#24-summary","title":"2.4 Summary\u00b6","text":"

Algorithms 1 and 2 don't use additional memory, but have complexity \u0398(j \u00d7 s), while Algorithms 3 and 4 have better complexity \u0398(j + s), but have the run-time and memory overhead of an additional data structure. In practice, Algorithms 3 and 4 might be slower than Algorithms 1 and 2, so it's best to implement all of them.

However, Algorithm 1 always takes \u0398(j \u00d7 s) whereas Algorithm 2 only does so in the worst case. I therefore won't implement Algorithm 1.

Algorithm 3 creates a set with j jewels, whereas Algorithm 4 creates a bag with s stones. Typically, we expect inputs to have more stones than jewels (j < s), so Algorithm 3 is likely to be faster. Nevertheless, I'll implement Algorithm 4 too, for comparison.

"},{"location":"essays/example-jewels/#3-code","title":"3 Code\u00b6","text":"

The next function implements Algorithm 2, using Python's in operator to do the linear search.

"},{"location":"essays/example-jewels/#4-performance","title":"4 Performance\u00b6","text":"

Finally, I will measure the run-times of the above functions with the algoesup library. The library expects us to define functions that construct inputs for a given total size.

"},{"location":"essays/example-jewels/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

This problem has two inputs, so I must decide how to divide the total size among both strings. I'll divide it equally: s = j. (For odd n, s = j + 1.) In that way, the expected run-times will be linear or quadratic in s: \u0398(s + j) = \u0398(2s) = \u0398(s) or \u0398(s \u00d7 j) = \u0398(s\u00b2). This makes it easy to check them empirically.

I write a function to generate a best-case input: each stone is the first jewel. To generate the input strings, I use Python's built-in chr function to obtain a character, given its Unicode number. The first 31 characters are mostly unprintable control characters, so I start at code 32 (space character).

"},{"location":"essays/example-jewels/#42-best-normal-and-worst-run-times","title":"4.2 Best, normal and worst run-times\u00b6","text":"

Algorithms 3 and 4 always have the same complexity, but not so for Algorithm 2. I can measure its run-times on best, typical and worst cases, using the library's time_cases. I start with an input size of 10 and double it four times to 20, 40, 80 and 160.

"},{"location":"essays/example-jewels/#43-fastest-and-slowest-algorithm","title":"4.3 Fastest and slowest algorithm\u00b6","text":"

The library provides time_functions to measure the run-times of up to 6 functions on the same inputs. I run the three implemented algorithms on typical inputs, with the same sizes as before.

"},{"location":"essays/example-jewels/#5-concluding-remarks","title":"5 Concluding remarks\u00b6","text":"

This essay explored a simple problem: how many of the characters in a string occur in another string? There are at least 7 algorithms: 2 don't require any additional data structure, 2 use a set or a bag to achieve constant-time searches, and 3 other sort one or both inputs. Three of the first four algorithms were implemented in Python and their run-times measured. Using a set is the best option: the complexity is the lowest possible (linear in the total size of the inputs), and the run-times are better than using a bag.

"},{"location":"essays/example-two-sum-2/","title":"Two Sum (two solutions)","text":"

This simple algorithmic essay aims to solve the classic Two Sum problem from LeetCode.

Readers should have an intermediate understanding of Python to understand this essay.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[1]: Copied!
from algoesup import test, time_functions\n
from algoesup import test, time_functions In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%allowed on\n
%load_ext algoesup.magics %ruff on %allowed on
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
two_sum_tests = [\n  # [\"description\", nums, target, expected_output],\n    [\"minimum size for nums\", [1, 2], 3, (0, 1)],\n    [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)],\n    [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)],\n    [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)],\n    [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)],\n    [\"max and min range\", [-109, 109, 0], 0, (0, 1)],\n    [\"lowest target value\", [-50, 1, -59], -109, (0, 2)],\n    [\"highest target value\", [50, 1, 59], 109, (0, 2)],\n]\n
two_sum_tests = [ # [\"description\", nums, target, expected_output], [\"minimum size for nums\", [1, 2], 3, (0, 1)], [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)], [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)], [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)], [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)], [\"max and min range\", [-109, 109, 0], 0, (0, 1)], [\"lowest target value\", [-50, 1, -59], -109, (0, 2)], [\"highest target value\", [50, 1, 59], 109, (0, 2)], ] In\u00a0[4]: Copied!
def two_sum_bf(nums, target):\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\"\n    for index_1 in range(len(nums)):\n        for index_2 in range(len(nums)):\n            if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n                return index_1, index_2\n\ntest(two_sum_bf, two_sum_tests)\n
def two_sum_bf(nums, target): \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\" for index_1 in range(len(nums)): for index_2 in range(len(nums)): if index_1 != index_2 and nums[index_1] + nums[index_2] == target: return index_1, index_2 test(two_sum_bf, two_sum_tests)
Testing two_sum_bf...\nTests finished: 8 passed, 0 failed.\n

Next up is the Mapping algorithm implemented using Python's dict.

In\u00a0[5]: Copied!
def two_sum_map(nums, target):\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\"\n    differences = {}\n    for index in range(len(nums)):\n        difference = target - nums[index]\n        if nums[index] in differences:\n            return differences[nums[index]], index\n        differences[difference] = index\n\ntest(two_sum_map, two_sum_tests)\n
def two_sum_map(nums, target): \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\" differences = {} for index in range(len(nums)): difference = target - nums[index] if nums[index] in differences: return differences[nums[index]], index differences[difference] = index test(two_sum_map, two_sum_tests)
Testing two_sum_map...\nTests finished: 8 passed, 0 failed.\n
In\u00a0[6]: Copied!
def worst(size):\n    \"\"\"Given a size, generate a worst-case problem instance for two sum.\"\"\"\n    nums = [0] * (size - 2) + [1, 1]\n    target = 2\n    return (nums, target)\n
def worst(size): \"\"\"Given a size, generate a worst-case problem instance for two sum.\"\"\" nums = [0] * (size - 2) + [1, 1] target = 2 return (nums, target) In\u00a0[7]: Copied!
solutions = [two_sum_bf, two_sum_map]\ntime_functions(solutions, worst, start=100, double=4, chart=True)\n
solutions = [two_sum_bf, two_sum_map] time_functions(solutions, worst, start=100, double=4, chart=True)
Inputs generated by worst\n\nInput size      two_sum_bf     two_sum_map \n       100           450.6             5.6 \u00b5s\n       200          1794.4            10.8 \u00b5s\n       400          7407.3            22.4 \u00b5s\n       800         30596.3            45.6 \u00b5s\n      1600        124085.9            92.1 \u00b5s

The numbers printed before the chart represent the run-times of our solutions in microseconds (\u00b5s) for increasing input sizes.

On the chart, the data points for two_sum_bf almost instantly eclipse that of two_sum_map. It looks as if the run-times for two_sum_map are not growing at all, but we know by looking at numbers above that this is not the case.

Let us see if we can modify the inputs of time_functions for a better visual representation.

In\u00a0[8]: Copied!
solutions = [two_sum_bf, two_sum_map]\ntime_functions(solutions, worst, start=1, double=4, text=False, chart=True)\n
solutions = [two_sum_bf, two_sum_map] time_functions(solutions, worst, start=1, double=4, text=False, chart=True)

We changed the initial input size to 1, and the trend of the run-times is a little clearer now. The Brute force algorithm's run-time's still accelerate off into the stratosphere, but we can see the separation and trend of the Mapping algorithm a little better.

"},{"location":"essays/example-two-sum-2/#two-sum-two-solutions","title":"Two Sum (two solutions)\u00b6","text":"

Michael Snowden, 24 January 2024, last updated 16 March 2024

"},{"location":"essays/example-two-sum-2/#1-problem","title":"1 Problem\u00b6","text":"

Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Only one valid answer exists.
"},{"location":"essays/example-two-sum-2/#2-algorithms","title":"2 Algorithms\u00b6","text":"

With our problem defined, the next step is to think of ways to solve it. This section presents two approaches to solving Two Sum: brute force, and mapping.

"},{"location":"essays/example-two-sum-2/#21-brute-force","title":"2.1 Brute force\u00b6","text":"

Generally speaking, a brute force algorithm tries all possibilities, and selects a correct one. For this problem, the possibilities are all sums that can be obtained by pairing each number in nums with every other number, and the correct pair is selected when the sum matches target.

Brute force algorithm: An outer loop iterates through each number in nums, then for each number, an inner loop iterates nums again. For each pair of numbers, if their indices are different and their sum matches target, return their indices.

"},{"location":"essays/example-two-sum-2/#22-mapping","title":"2.2 Mapping\u00b6","text":"

In the Brute force algorithm, we checked each pair of numbers in nums to see if the resulting sum was equal to target. Since we are already checking every number in the list, why not store some piece information from each number that will help us find our matching pair?

For every number in nums, we can map the difference between it and the target (target - number) to its corresponding index using a hashtable. This allows us to check the hashtable for matching numbers much faster.

Mapping algorithm: For each number in nums, if it's in the hashmap, return its index and the index mapped to it. Otherwise, calculate the difference (target - number) and map it to the corresponding index of number.

"},{"location":"essays/example-two-sum-2/#3-code","title":"3 Code\u00b6","text":"

In this section we will implement and test the algorithms.

"},{"location":"essays/example-two-sum-2/#31-testing","title":"3.1 Testing\u00b6","text":"

We start off by writing some tests.

To test the above solutions, we need to consider edge cases and other important functional tests. We should include tests for the minimum input size, and any extremes values that can be present. When integers are part of the input, and there are no restrictions, negative numbers and zero should be added to the tests.

"},{"location":"essays/example-two-sum-2/#32-implementations","title":"3.2 Implementations\u00b6","text":"

The next cell implements the Brute force algorithm using nested for loops

"},{"location":"essays/example-two-sum-2/#4-performance","title":"4 Performance\u00b6","text":"

In this section we measure the run-times of our solutions under certain conditions and discuss the results.

"},{"location":"essays/example-two-sum-2/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

Since time_functions from the algoesup library requires code to generate inputs, we shall write that first.

It is often useful to measure the run-times of a solution when it is doing the most work; this is called the worst-case. We want to generate inputs that will take our solution the most time to complete, and this happens when the two numbers that sum to target are the last two.

"},{"location":"essays/example-two-sum-2/#42-run-times-for-each-solution","title":"4.2 Run-times for each solution\u00b6","text":"

We now compare worst-case runtimes for both solutions. The input has an initial size of 100, and is doubled 4 times; the run times are measured for the initial size, then each time it is doubled.

"},{"location":"essays/example-two-sum-2/#5-conclusion","title":"5 Conclusion\u00b6","text":"

We started this essay with the definition of the Two Sum problem. Next, we outlined two algorithms: brute force, and mapping . After that, we implemented and tested our solutions using Python, and in the penultimate section we used empirical testing and discussed the results. Now we must decide which of our algorithms is best.

The Brute force algorithm is not very efficient when it comes to run-times. When the size of the input increases the run-times increase by a large amount. The one redeeming aspect of this algorithm is the efficient memory usage which is achieved by not using any additional data structures like dictionaries.

In contrast, the Mapping algorithm is reasonably efficient in terms of run times, but this is achieved by using extra memory in the form of the dictionary. In the final analysis, the slow run-times of the brute force algorithm cannot be ignored. The small trade of memory for faster run-times is worth it in this instance. We therefore conclude the mapping algorithm is best.

"},{"location":"essays/example-two-sum-3/","title":"Two sum (three solutions)","text":"

In this extended algorithmic essay we aim to solve the classic Two Sum problem from LeetCode. We are going to explore, analyse, and compare a selection of approaches with the end goal of finding a clear and efficient solution.

We assume the reader has an intermediate understanding of Python, including aspects like importing modules, using loops, and applying conditionals. Furthermore, Big-Oh notation is used to analyse the complexity of our solutions and we refer to terms such as binary search and brute force.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[1]: Copied!
from algoesup import test, time_functions, time_cases\n
from algoesup import test, time_functions, time_cases In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%allowed on\n
%load_ext algoesup.magics %ruff on %allowed on
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
two_sum_tests = [\n  # [\"description\", nums, target, expected_output]\n    [\"minimum size for nums\", [1, 2], 3, (0, 1)],\n    [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)],\n    [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)],\n    [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)],\n    [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)],\n    [\"max and min range\", [-109, 109, 0], 0, (0, 1)],\n    [\"lowest target value\", [-50, 1, -59], -109, (0, 2)],\n    [\"highest target value\", [50, 1, 59], 109, (0, 2)],\n]\n
two_sum_tests = [ # [\"description\", nums, target, expected_output] [\"minimum size for nums\", [1, 2], 3, (0, 1)], [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)], [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)], [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)], [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)], [\"max and min range\", [-109, 109, 0], 0, (0, 1)], [\"lowest target value\", [-50, 1, -59], -109, (0, 2)], [\"highest target value\", [50, 1, 59], 109, (0, 2)], ] In\u00a0[4]: Copied!
def two_sum_bf(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    for index_1 in range(len(nums)):\n        for index_2 in range(len(nums)):\n            if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n                return index_1, index_2\n\ntest(two_sum_bf, two_sum_tests)\n
def two_sum_bf(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" for index_1 in range(len(nums)): for index_2 in range(len(nums)): if index_1 != index_2 and nums[index_1] + nums[index_2] == target: return index_1, index_2 test(two_sum_bf, two_sum_tests)
Testing two_sum_bf...\nTests finished: 8 passed, 0 failed.\n

Next up is the approach that uses sorting.

In\u00a0[5]: Copied!
def two_sum_sort(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    pairs = []\n    for index in range(len(nums)):\n        pairs.append((nums[index], index))\n    pairs.sort()\n    start = 0\n    end = len(nums) - 1\n    while start < end:\n        current_sum = pairs[start][0] + pairs[end][0]\n        if current_sum == target:\n            # return the indices in ascending order for reliable testing\n            lower_index = min(pairs[start][1], pairs[end][1])\n            upper_index = max(pairs[start][1], pairs[end][1])\n            indices = (lower_index, upper_index)\n            return indices\n        if current_sum < target:\n            start = start + 1\n        else:\n            end = end - 1\n\ntest(two_sum_sort, two_sum_tests)\n
def two_sum_sort(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" pairs = [] for index in range(len(nums)): pairs.append((nums[index], index)) pairs.sort() start = 0 end = len(nums) - 1 while start < end: current_sum = pairs[start][0] + pairs[end][0] if current_sum == target: # return the indices in ascending order for reliable testing lower_index = min(pairs[start][1], pairs[end][1]) upper_index = max(pairs[start][1], pairs[end][1]) indices = (lower_index, upper_index) return indices if current_sum < target: start = start + 1 else: end = end - 1 test(two_sum_sort, two_sum_tests)
Testing two_sum_sort...\nTests finished: 8 passed, 0 failed.\n

Finally, the mapping algorithm is implemented using Python's dict.

In\u00a0[6]: Copied!
def two_sum_map(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    differences: dict[int, int] = {} # allowed\n    for index in range(len(nums)):\n        difference = target - nums[index]\n        if nums[index] in differences:\n            return differences[nums[index]], index\n        differences[difference] = index\n\ntest(two_sum_map, two_sum_tests)\n
def two_sum_map(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" differences: dict[int, int] = {} # allowed for index in range(len(nums)): difference = target - nums[index] if nums[index] in differences: return differences[nums[index]], index differences[difference] = index test(two_sum_map, two_sum_tests)
Testing two_sum_map...\nTests finished: 8 passed, 0 failed.\n

The brute force algorithm comes out on top in terms of simplicity, it is just a case of checking every pair of numbers. The double pointer approach seems like the most convoluted with the mapping differences algorithm somewhere in the middle of the two.

In\u00a0[7]: Copied!
def best(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a best case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [1, 1] + [0] * (size - 2)\n    target = 2\n    return (nums, target)\n\ndef normal(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a normal case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [0] * size\n    nums[size // 2 - 1:size // 2 + 1] = [1, 1]\n    target = 2\n    return (nums, target)\n\ndef worst(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a worst case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [0] * (size - 2) + [1, 1]\n    target = 2\n    return (nums, target)\n
def best(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a best case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [1, 1] + [0] * (size - 2) target = 2 return (nums, target) def normal(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a normal case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [0] * size nums[size // 2 - 1:size // 2 + 1] = [1, 1] target = 2 return (nums, target) def worst(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a worst case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [0] * (size - 2) + [1, 1] target = 2 return (nums, target)

First let us see the run-times of two_sum_bf for best, normal and worst-case instances. Note the input size starts at 100 and is doubled 4 times reaching 1600 for the last data point.

In\u00a0[8]: Copied!
input_generators = [worst, normal, best]\ntime_cases(two_sum_bf, input_generators, start=100, double=4, chart=True)\n
input_generators = [worst, normal, best] time_cases(two_sum_bf, input_generators, start=100, double=4, chart=True)
Run-times for two_sum_bf\n\nInput size           worst          normal            best \n       100           456.1           225.7             0.3 \u00b5s\n       200          1825.0           901.7             0.3 \u00b5s\n       400          7434.7          3720.2             0.3 \u00b5s\n       800         30573.2         15343.2             0.3 \u00b5s\n      1600        124160.9         62221.4             0.3 \u00b5s

We can see from the chart and run-times above, that our analysis of quadratic time complexity for the worst-case seems to line up with the data. As we double the input size, the run-times quadruple. For the best case, the run-times generally stay the same for increasing inputs suggesting constant time complexity. The normal case is somewhere in the middle of the two.

Now let us do the same for two_sum_map.

In\u00a0[9]: Copied!
input_generators = [worst, normal, best]\ntime_cases(two_sum_map, input_generators, start=100, double=4, chart=True)\n
input_generators = [worst, normal, best] time_cases(two_sum_map, input_generators, start=100, double=4, chart=True)
Run-times for two_sum_map\n\nInput size           worst          normal            best \n       100             5.6             3.0             0.3 \u00b5s\n       200            11.0             5.7             0.3 \u00b5s\n       400            22.6            11.1             0.3 \u00b5s\n       800            46.6            22.6             0.3 \u00b5s\n      1600            92.9            46.1             0.3 \u00b5s

The first thing to note is the dramatic reduction in size of the run-times. The scales on the y-axis differ by orders of magnitude. Also, the plot for our worst-case on this chart has a much straighter line with run-times doubling in proportion with input size. This aligns with our prediction of linear time complexity.

In\u00a0[10]: Copied!
solutions = [two_sum_bf, two_sum_sort, two_sum_map]\ntime_functions(solutions, worst, start=100, double=4, chart=True)\n
solutions = [two_sum_bf, two_sum_sort, two_sum_map] time_functions(solutions, worst, start=100, double=4, chart=True)
Inputs generated by worst\n\nInput size      two_sum_bf    two_sum_sort     two_sum_map \n       100           454.0            14.4             5.6 \u00b5s\n       200          1805.4            28.4            11.0 \u00b5s\n       400          7469.5            57.6            22.6 \u00b5s\n       800         30776.4           116.9            45.9 \u00b5s\n      1600        124944.8           237.3            93.0 \u00b5s

The run-times for two_sum_bf almost instantly eclipse that of two_sum_sort and two_sum_map.On the chart it looks as if the run-times for two_sum_sort and two_sum_map are not growing at all, but we know by looking at the run-times above that this is not the case. Let us see if we can adjust the inputs of time_functions so the growth rates of the fastest two functions have a better visual representation in the chart.

In\u00a0[11]: Copied!
solutions = [two_sum_bf, two_sum_sort, two_sum_map]\ntime_functions(solutions, worst, start=1, double=4, text=False, chart=True)\n
solutions = [two_sum_bf, two_sum_sort, two_sum_map] time_functions(solutions, worst, start=1, double=4, text=False, chart=True)

The point at which the growth rates start to diverge is much clearer now. The brute force approach's run-times still accelerate off into the stratosphere, but we can see the separation and trend of the sorting and mapping algorithms.

"},{"location":"essays/example-two-sum-3/#two-sum-three-solutions","title":"Two sum (three solutions)\u00b6","text":"

Michael Snowden, 25 January 2024, last updated 16 March 2024

"},{"location":"essays/example-two-sum-3/#1-problem","title":"1 Problem\u00b6","text":"

To effectively solve Two Sum, it is crucial we thoroughly understand the problem. We need to identify the inputs, outputs and the relationship between them.

Leetcode provides the following problem description.

\"Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.\"

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Only one valid answer exists.

We can extract some important information from their description, namely the pre- and post-conditions.

Preconditions:

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Exactly one pair a and b in nums has a + b = target

Postconditions:

Let indices be the output, then:

  • len(indices) = 2;
  • nums[indices[0]] + nums[indices[1]] = target

The preconditions must be satisfied for our algorithms to be defined, and the postconditions must be satisfied for our algorithms to be correct.

"},{"location":"essays/example-two-sum-3/#2-algorithms","title":"2 Algorithms\u00b6","text":"

With our problem defined, the next step is to think of ways to solve it. This section presents three distinct approaches to solving Two sum: brute force, sorting and mapping.

"},{"location":"essays/example-two-sum-3/#21-brute-force","title":"2.1 Brute force\u00b6","text":"

Generally speaking, a brute force algorithm tries all possibilities, and selects a correct one. For this problem, the possibilities are all sums that can be obtained by pairing each number in nums with every other number, and the correct pair is identified if the sum matches target. We are checking all possible sums, so we are sure to find our indices if they exist. Looking back at the preconditions, we can see that each problem instance must have exactly one pair that sums to target. Hence this approach is guaranteed to find a solution, as long as our preconditions are met.

Getting any working solution regardless of efficiency can be an important first step. Sometimes we need to solve a problem quickly, and more importantly it gets us thinking through it, which can often lead to additional solutions.

Brute force algorithm: An outer loop iterates through each number in nums, then for each number, an inner loop iterates nums again. For each pair of numbers, if their indices are different and their sum matches target, return their indices.

1. for each index_1 from 0 to len(nums)-1:\n    1. for each index_2 from 0 to len(nums)-1:\n        1. if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n            1. let indices be (index_1, index_2)\n            2. stop\n

Let n = len(nums), then this algorithm has two nested for loops that do n iterations each. The operations performed within the inner loop are constant time, meaning this solution will do at most n $\\times$ n $\\times$ O(1) steps. Thus, the worst-case time complexity is O(n $^2$). In the best-case, the first and second numbers in nums sum to target. No matter the size of nums, the run-times would not increase. Therefore, the best-case time complexity would be O(1).

"},{"location":"essays/example-two-sum-3/#22-sorting","title":"2.2 Sorting\u00b6","text":"

For many computational problems a good question to ask is: will sorting the inputs simplify the problem and lead to a more efficient solution? In this case, the answer is yes, we can exploit the properties of a sorted input in a similar way to binary search. Rather than focusing on the middle of the sequence and reducing the search space by half, we keep track of the two ends with position variables and have reduce the search space by one each time. This kind of approach is commonly referred to as a \"double pointer algorithm\" named after the two position variables.

Before we move on to a formal description of the algorithm, we need to consider a crucial aspect of the Two Sum problem: it requires indices to be returned. This has implications for our solution: direct sorting of nums is not possible because the original index positions would be altered. Thus, any additional data structures we use must keep track of the corresponding indices from elements of nums. Keeping this in mind, here is the description of our algorithm.

With sorting algorithm: Create a pair (number, index) for each number in nums. Add each pair to a list pairs, then sort the list into ascending order based on the numbers. Initialise two variables start and end to be 0 and len(nums) - 1 respectively. While start $\\neq$ end sum the numbers in pairs corresponding to the indices start and end. If the sum is less than target, move start to the right by incrementing its value by one. If the sum is greater than target, move end to the left by decrementing its value by one. If the sum matches target then return the indices of both numbers.

The logic of this strategy is as follows. The sum of the numbers at positions start and end in our pairs list will have one of the following three cases: the sum can be equal to, greater than or less than target. If the sum is equal to target, then we have found our solution and can return the indices. If the sum is less than target, we need to increase the value of our sum; the only way to do this is by moving start to the right. Remember we have sorted the list, so all values to the right are greater. If our sum is greater than target we need to decrease the value of our sum, and the only way to do that by moving end to the left.

1. let pairs be an empty list\n2. for each index from 0 to len(nums):\n    1. let `pair be (nums[index], index)\n    2. append pair to `pairs`\n3. let pairs be sorted by value at first index\n4. let start = 0\n5. let end = len(nums) -1\n6. while start != end:\n    1. pair_sum = pairs[start][0] + pairs[end][0]\n    2. if pairs_sum = target:\n        1. let indices be (pairs[start][1], pairs[end][1])\n        2. stop\n    3. otherwise if pairs_sum > target:\n        1. let end = end - 1\n    4. otherwise:\n        1. let start = start + 1\n

The important parts of this algorithm with respect analysing time complexity are: the for loop at step number two, the sorting operation at step number three and the while loop at step number six.

Let n = len(nums), then the for loop always does n iterations, and we will assume the sorting operation has worst-case complexity of O(n log(n)) and best-case of O(n), that just leaves the while loop. The while loop will do at most n iterations in a scenario where one of the variables start or end stays in place and the other is incremented until they are next to each other.

It is clear now that the sorting operation will dominate this approach when it comes to time complexity. Therefore, this algorithm has an overall worst-case time complexity of O(n log(n)) and a best-case of O(n).

"},{"location":"essays/example-two-sum-3/#23-mapping","title":"2.3 Mapping\u00b6","text":"

In the previous algorithm we paired each number in nums with its index out of necessity. We wanted to sort nums without loosing the original paring of number to index. This action of pairing numbers to indices is a useful idea; what if instead of pairing a number directly to its index, we paired the difference between our number and the target (i.e. target - number) to its index? If we did that, then finding our pair would be a case of checking if current number is in the pairs list.

This is a good start, but we still have a problem, the lookup operation takes linear time for a list. We need an alternative data structure, one with much efficient lookup times. If fast lookup times are required, then we should always consider a hashtable. This data structure is known informally by many different names such as dictionary, hashmap, map and associative array. A key property of this data structure is the lookup operation has constant time complexity in the average case.

For every number in nums, we can map the difference between it and the target (target - number) to its corresponding index using a hashtable. This allows us to check the hashmap for matching numbers in constant time.

Mapping algorithm: For each number in nums, if its in the hashmap, return its index and the index mapped to it. Otherwise, calculate the difference (target - number) and map it to the corresponding index of number.

1. let differences be an empty dictionary\n2. for index from 0 to len(nums) - 1:\n    1. if nums[index] in differences:\n        1.let indices be (differences[nums[index]], index)\n        2. stop\n    2. otherwise:\n        1. let difference = target - nums[index]\n        2. let differences[difference] = index\n

Let n = len(nums), then this algorithm has a single loop that does n iterations. Because we are using a hashmap, all the operations performed in the loop are done in constant time. Thus, our mapping algorithm has O(n) time complexity in in the worst-case. Similar to the brute force approach, if the correctly summing numbers are in the first two positions of nums, then the run-times will be unaffected by increasing input sizes, giving a best-case complexity of O(1).

"},{"location":"essays/example-two-sum-3/#24-summary","title":"2.4 Summary\u00b6","text":"

Many times a brute force approach is a good starting point; it is a simple strategy that is easy to implement. Moreover, this strategy is efficient in terms of its memory usage when compared to the other algorithms; it does not use additional data structures. However, this approach has an undesirable O(n $^2$) worst-case time complexity. Every time we double the input size, the run-times increase fourfold.

Our next approach used sorting to endow our list with properties useful for searching. This algorithm is perhaps the most convoluted and maybe harder to think through relative to the others. Furthermore, it requires additional memory compared to the brute force approach. The benefits of the strategy are the O(n log(n)) worst-case time complexity which improves considerably on the brute force algorithm.

The third solution made a single pass through nums and used a hashtable to map differences to indexes. While not as simple as the brute force algorithm, this approach is not hard to follow nor understand; everything is carried out in a single loop. On the other hand, this approach has the additional memory overhead of the hashtable itself, which needs to be taken into account. The main advantage with this approach is the O(n) time complexity for the worst-case, making it the most efficient when it comes to scaling run-times with input size.

When considering all three approaches, and taking into account aspects of efficiency as well as readability, the mapping algorithm seems to come out on top. It makes that that classic space-time trade off i.e sacrifices some memory efficiency for time efficiency, but the simplicity of the approach combined with the efficient time complexity makes it a worth while exchange.

"},{"location":"essays/example-two-sum-3/#3-code","title":"3 Code\u00b6","text":"

In this section we will implement the algorithms. We shall do so using a basic subset of Python in the hope of making our code as language agnostic as possible.

Throughout this section we will make use of code quality tools such as linters and type checkers to help us meet the standards expected for clean readable and error free code.

"},{"location":"essays/example-two-sum-3/#31-preparation-and-imports","title":"3.1 Preparation and imports\u00b6","text":"

The next two cells set up the automatic type checking linting and Construct checking for our code cells. We also import some of the functions we will use to test, time and generate instances for our solutions.

If one or more of the styling or type checking ideals are violated, the warnings will be printed alongside the corresponding line number underneath the offending cell.

"},{"location":"essays/example-two-sum-3/#32-testing","title":"3.2 Testing\u00b6","text":"

Before We start implementing our algorithms, we write some tests. The test() function from the algoesup library is a simple way to test for correctness. It takes a function and a test table then reports any failed tests.

To test the algorithms, we need to consider edge cases and other important functional tests. Edge cases often occur at the extreme ends of the spectrum of allowed inputs or outputs, they should ideally test unexpected conditions that might reveal bugs in the code. For the Two Sum problem, we should test the minimum size for nums and also the extremes of the values that can be present. We should include negative numbers and zero in our tests because integers are present in the inputs.

The cell below contains our test table, note the descriptions of each case in the first column, and how the boundary cases, negative numbers and zero are all present in the table.

"},{"location":"essays/example-two-sum-3/#33-implementations","title":"3.3 Implementations\u00b6","text":"

The next cell implements the brute force algorithm using nested for loops and a conditional to check for the correct pair. Note how this conditional looks similar to one of the postconditions; this is a good sign.

"},{"location":"essays/example-two-sum-3/#4-performance","title":"4 Performance\u00b6","text":"

In this section we will measure the run-times of our solutions under various conditions to see if our analysis matches the results.

"},{"location":"essays/example-two-sum-3/#41-generating-inputs","title":"4.1 generating inputs\u00b6","text":"

time_functions and time_cases from the algoesup library require a function that generates problem instances of a given size. We want to be able to generate instances that correspond to best, normal and worst cases for the solutions were appropriate.

The best normal and worst case scenarios might not always be the same for each algorithm, for example, the best-case for two_sum_bf and two_sum_map would be when the first two numbers encountered sum to target but this is not the case for two_sum_sort where the best-case would be dependent on the sorting algorithm.

Since two_sum_bf and two_sum_map share the same best- and worst-case scenarios, we shall focus on those for our input generators. For the normal-case the matching numbers will be in the middle two positions of nums

"},{"location":"essays/example-two-sum-3/#42-best-normal-and-worst-case-run-times","title":"4.2 Best, normal and worst case run-times\u00b6","text":""},{"location":"essays/example-two-sum-3/#43-run-times-for-each-solution","title":"4.3 Run-times for each solution\u00b6","text":"

Let us now compare the worst-case run-times for all three solutions side by side.

"},{"location":"essays/example-two-sum-3/#5-conclusion","title":"5 Conclusion\u00b6","text":"

We started this essay by defining the problem. We came up with three algorithms that used different approaches: brute force, sorting and mapping, then analysed the time complexity of each one. Next, we implemented and tested our solutions using Python, and in the penultimate section used empirical testing to see if our analysis matched the results. Now we must decide which of our algorithms is best.

The brute force approach, unsurprisingly, is not very efficient when it comes to run-times. We suspected this would be the case, then the empirical testing confirmed it. Its only positive attributes were its simplicity and efficient memory usage.

We are now left with a choice between the sorting and mapping approaches and I think there is a clear winner between the two. The mapping approach is more efficient in its worst-case complexity with O(n) compared to O(n log(n)) of the sorting, and on the surface seems simpler and easier to implement. Moreover, the mapping approach has the potential to be more memory efficient. For example, the sorting approach always has an auxiliary data structure the same size as nums, whereas the size of the dictionary will grow dynamically, only becoming the same size as nums in the worst case. Therefore, we must conclude the mapping algorithm is best.

"},{"location":"essays/template-data-structures/","title":"Your essay's title","text":"

This algorithmic essay template is for students of data structures and algorithms (DSA) courses. Throughout the template, there are links to relevant sections of our Writing Guide. Replace any text in italics by yours and delete this paragraph.

The introduction explains what the essay is about, the problem you are solving, and what you assume the reader to know. See our guidance on choosing a problem, writing the text and structuring the essay. This template follows the first structure in the guide. You don't need to change the following code cells.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[\u00a0]: Copied!
from algoesup import test, time_cases, time_functions\n
from algoesup import test, time_cases, time_functions

For information on what the following commands do, see our guide's sections on type checking and linting and remove this paragraph.

In\u00a0[\u00a0]: Copied!
%load_ext algoesup.magics\n# check the code's style\n%ruff on\n# check the data types\n%pytype on\n# optional: flag the Python constructs not taught in M269, our DSA course\n%allowed on\n
%load_ext algoesup.magics # check the code's style %ruff on # check the data types %pytype on # optional: flag the Python constructs not taught in M269, our DSA course %allowed on In\u00a0[\u00a0]: Copied!
tests = [\n    # Each line is a list or tuple of the form:\n    # (description, input1, input2, ..., expected_output),\n]\n
tests = [ # Each line is a list or tuple of the form: # (description, input1, input2, ..., expected_output), ] In\u00a0[\u00a0]: Copied!
# Replace solution_one with a more descriptive name.\ndef solution_one():\n    # Implement your solution here\n    pass\n\ntest(solution_one, tests)\n
# Replace solution_one with a more descriptive name. def solution_one(): # Implement your solution here pass test(solution_one, tests) In\u00a0[\u00a0]: Copied!
# Replace solution_two with a more descriptive name.\ndef solution_two():\n    # Implement your solution here\n    pass\n\ntest(solution_two, tests)\n
# Replace solution_two with a more descriptive name. def solution_two(): # Implement your solution here pass test(solution_two, tests) In\u00a0[\u00a0]: Copied!
# Replace solution_n with a more descriptive name.\ndef solution_n():\n    # Implement your solution here\n    pass\n\ntest(solution_n, tests)\n
# Replace solution_n with a more descriptive name. def solution_n(): # Implement your solution here pass test(solution_n, tests) In\u00a0[\u00a0]: Copied!
def best_case(size: int) -> tuple[...]:\n    # Implement your best-case input generator here.\n    pass\n
def best_case(size: int) -> tuple[...]: # Implement your best-case input generator here. pass In\u00a0[\u00a0]: Copied!
def worst_case(size: int) -> tuple[...]:\n    # Implement your worst-case input generator here.\n    pass\n
def worst_case(size: int) -> tuple[...]: # Implement your worst-case input generator here. pass In\u00a0[\u00a0]: Copied!
def normal_case(size: int) -> tuple[...]:\n    # Implement your normal-case input generator here.\n    pass\n
def normal_case(size: int) -> tuple[...]: # Implement your normal-case input generator here. pass In\u00a0[\u00a0]: Copied!
cases = [best_case, normal_case, worst_case]\n# Change solution_n to the name of your solution.\ntime_cases(solution_n, cases, start=10, double=4)\n
cases = [best_case, normal_case, worst_case] # Change solution_n to the name of your solution. time_cases(solution_n, cases, start=10, double=4)

Analyse the results. See the interpreting run-times section of our guide.

In\u00a0[\u00a0]: Copied!
# Change solution_one, solution_two, and solution_n to the names of your solutions.\nalgorithms = [solution_one, solution_two, solution_n]\n# Replace normal_case with best_case or worst_case, if you wish.\ntime_functions(algorithms, normal_case, 1000, 4, chart=True)\n
# Change solution_one, solution_two, and solution_n to the names of your solutions. algorithms = [solution_one, solution_two, solution_n] # Replace normal_case with best_case or worst_case, if you wish. time_functions(algorithms, normal_case, 1000, 4, chart=True)

Analyse the results. See the interpreting run-times section of our guide.

"},{"location":"essays/template-data-structures/#your-essays-title","title":"Your essay's title\u00b6","text":"

Your (and any co-author's) name, current date

"},{"location":"essays/template-data-structures/#1-tests","title":"1 Tests\u00b6","text":"

This section describes and defines the tests you will use to check your solutions. See the testing section of our guide.

"},{"location":"essays/template-data-structures/#2-algorithms","title":"2 Algorithms\u00b6","text":"

This section outlines some algorithms that solve the problem. See the algorithms section of our guide.

"},{"location":"essays/template-data-structures/#21-first-algorithm-name","title":"2.1 First algorithm name\u00b6","text":"

Describe your first strategy or approach.

Algorithm 1: Briefly describe your first algorithm.

Analyse at least the worst-case time complexity of your first algorithm.

"},{"location":"essays/template-data-structures/#22-second-algorithm-name","title":"2.2 Second algorithm name\u00b6","text":"

Describe your second strategy or approach.

Algorithm 2: Briefly describe your second algorithm.

Analyse at least the worst-case time complexity of your second algorithm.

"},{"location":"essays/template-data-structures/#2n-nth-algorithm-name","title":"2.n nth algorithm name\u00b6","text":"

Describe your nth strategy or approach.

Algorithm n: Briefly describe your nth algorithm.

Analyse at least the worst-case time complexity of your nth algorithm.

"},{"location":"essays/template-data-structures/#2n1-summary","title":"2.n+1 Summary\u00b6","text":"

This section compares the previously outlined algorithms to inform implementation decisions.

"},{"location":"essays/template-data-structures/#3-code","title":"3 Code\u00b6","text":"

This section implements and tests only the most promising algorithms. See the code section of our guide.

"},{"location":"essays/template-data-structures/#4-performance","title":"4 Performance\u00b6","text":"

This section measures and compares the run-times of your implementations, so that you can check them against your earlier complexity analysis.

"},{"location":"essays/template-data-structures/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

Briefly describe your strategy and reasoning for generating the inputs.

"},{"location":"essays/template-data-structures/#42-best-normal-and-worst-run-times","title":"4.2 Best, normal and worst run-times\u00b6","text":"

State which solutions(s) will be timed with best-, normal- or worst-case inputs. See the comparing cases and charting run-times sections of our guide.

"},{"location":"essays/template-data-structures/#43-fastest-and-slowest-algorithm","title":"4.3 Fastest and slowest algorithm\u00b6","text":"

Compare the run times of all your solutions for the same case. See the comparing functions and charting run-times sections of our guide.

"},{"location":"essays/template-data-structures/#5-concluding-remarks","title":"5 Concluding remarks\u00b6","text":"

Summarise your findings and conclude which solution is best.

After completing a draft of your essay, do a final check and then see the feedback guide on how to ask for, give, and handle comments.

"},{"location":"essays/template-data-structures/#6-acknowledgements","title":"6 Acknowledgements\u00b6","text":"

Credit those who helped you create the essay. See the crediting feedback section of our guide.

"},{"location":"essays/template-intro-programming/","title":"Your essay's title","text":"

This algorithmic essay template is for students of introductory programming courses. Throughout the template, there are links to relevant sections of our Writing Guide. Replace any text in italics by yours and delete this paragraph.

The introduction explains what the essay is about, the problem you are solving, and what you assume the reader to know. See our guidance on choosing a problem, writing the text and structuring the essay. This template follows the second structure in the guide. You don't need to change the following code cells.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[\u00a0]: Copied!
# import functions to test the code and to measure the execution time\nfrom algoesup import test, time_functions_int\n
# import functions to test the code and to measure the execution time from algoesup import test, time_functions_int

For information on what the following commands do, see our guide's section on linting and remove this paragraph.

In\u00a0[\u00a0]: Copied!
%load_ext algoesup.magics\n# check the code's style\n%ruff on\n# optional: flag the Python constructs not taught in TM112, our introductory course\n%allowed --config tm112.json on\n
%load_ext algoesup.magics # check the code's style %ruff on # optional: flag the Python constructs not taught in TM112, our introductory course %allowed --config tm112.json on In\u00a0[\u00a0]: Copied!
tests = [\n    # Each line is a list or tuple of the form:\n    # (description, input1, input2, ..., expected_output),\n]\n
tests = [ # Each line is a list or tuple of the form: # (description, input1, input2, ..., expected_output), ] In\u00a0[\u00a0]: Copied!
def solution_one():\n    # Implement your solution here\n    pass\n\ntest(solution_one, tests)\n
def solution_one(): # Implement your solution here pass test(solution_one, tests) In\u00a0[\u00a0]: Copied!
def solution_two():\n    # Implement your solution here\n    pass\n\ntest(solution_two, tests)\n
def solution_two(): # Implement your solution here pass test(solution_two, tests) In\u00a0[\u00a0]: Copied!
# The next line assumes your functions have a single integer as input.\ntime_functions_int([solution_one, solution_two])\n
# The next line assumes your functions have a single integer as input. time_functions_int([solution_one, solution_two])

Analyse the results.

"},{"location":"essays/template-intro-programming/#your-essays-title","title":"Your essay's title\u00b6","text":"

Your (and any co-author's) name, current date

"},{"location":"essays/template-intro-programming/#1-tests","title":"1 Tests\u00b6","text":"

This section describes and defines the tests you will use to check your solutions. See the testing section of our guide.

"},{"location":"essays/template-intro-programming/#2-solutions","title":"2 Solutions\u00b6","text":""},{"location":"essays/template-intro-programming/#21-first-solution-name","title":"2.1 First solution name\u00b6","text":"

Describe your first strategy or approach, then implement and test it.

"},{"location":"essays/template-intro-programming/#22-second-solution-name","title":"2.2 Second solution name\u00b6","text":"

Describe your second strategy or approach, then implement and test it.

"},{"location":"essays/template-intro-programming/#3-performance","title":"3 Performance\u00b6","text":"

This optional section compares the performance of your solutions to see which is fastest.

"},{"location":"essays/template-intro-programming/#4-concluding-remarks","title":"4 Concluding remarks\u00b6","text":"

Summarise your findings and conclude which solution is best.

After completing a draft of your essay, do a final check and then see the feedback guide on how to ask for, give, and handle comments.

"},{"location":"essays/template-intro-programming/#5-acknowledgements","title":"5 Acknowledgements\u00b6","text":"

Credit those who helped you create the essay. See the crediting feedback section of our guide.

"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Algorithmic Essays","text":"

These documents provide guidance to help you write, critique, and share algorithmic essays.

We define algorithmic essays as short reports, with code, that explain and compare alternative approaches to solving a computational problem.

If you\u2019re a student, writing algorithmic essays benefits you in various ways:

  • You reinforce your learning by explaining concepts to others and by exploring alternative solutions to the same problem.
  • You develop professional skills like problem-solving, reviewing code, working collaboratively, and writing readable, tested, documented code.
  • You learn how to use professional tools like code formatters, linters and type checkers, which improves your employability.
  • You produce an artefact (the essay) for your portfolio for prospective employers.

If you\u2019re an educator, you can copy our resources from the GitHub repository and adapt them to your course, due to our permissive licence (see below).

Example

We provide some example essays to illustrate what they are.

While many approaches to writing algorithmic essays are possible, we recommend using Jupyter notebooks, the most widely used medium for mixing text and executable code.

If you\u2019re a student on M269, our data structures and algorithms course, you can write an essay with your existing software and share it on the forums.

Get started (M269)

If you prefer a \u2018Google Docs\u2019-like environment for collaborative authoring and commenting on essays, we suggest Deepnote or Google\u2019s Colaboratory (Colab for short):

  • free account
  • no software installation necessary
  • you can share your essays publicly (or just with a few people) to easily
    • receive and give feedback
    • work collaboratively on the same essay.

(We have no affiliation, commercial or otherwise, with Deepnote or Google.)

Get started (Deepnote) Get started (Colab)

"},{"location":"#acknowledgements","title":"Acknowledgements","text":"

This project was partly funded by the UK\u2019s Council of Professors and Heads of Computing, under the Special Projects grant scheme, in May 2023.

"},{"location":"#licences","title":"Licences","text":"

The code and text in this repository are Copyright \u00a9 2023\u20132024 by The Open University, UK. The code is licensed under a BSD 3-clause licence. The text is licensed under a Creative Commons Attribution 4.0 International Licence.

"},{"location":"deepnote-background/","title":"Background","text":""},{"location":"deepnote-background/#organisation","title":"Organisation","text":"

Deepnote uses an organisational framework with three major parts: workspaces can contain multiple projects and each project can contain multiple notebooks and files.

The workspace is the highest level structure in Deepnote, and is designed to group related Projects and enable team collaboration. Every user can create their own workspaces where they can manage access, set permissions and oversee projects. A workspace can have multiple members each with their own access permissions, but by default they can see all projects in the workspace.

Projects are the next tier down from workspaces in terms of organisation and provide the main working environment as well as the integrated file system. When you duplicated our project, you duplicated all of the notebooks, files and the environment too.

Notebooks are akin to Jupyter notebook. They are interactive documents that combine executable code with richly formatted text and visualisations. Notebooks are where you will write your essays.

"},{"location":"deepnote-background/#deepnote-vs-classic-notebook","title":"Deepnote vs Classic Notebook","text":"

Notebooks in Deepnote have similar functionality to classic Jupyter notebooks in that they combine rich text, visualisations and executable code in a single document. But there are a few notable differences in the UI and functionality worth mentioning.

Firstly, Deepnote is a cloud-based Jupyter notebook platform: this means no local software installation is required to get started. It also means you can access your documents from anywhere with an internet connection. This is a double edged sword of course, if you lose your connection you lose access to your notebooks.

In Deepnote the divisions within a notebook are referred to as \u201cblocks\u201d instead of \u201ccells\u201d, but we will continue to use the classic terminology. Deepnote retains the same structure of code and markdown cells as used in Jupyter notebook, but it also provides additional cell types. Besides cells for data science, Deepnote adds rich text cells. Unlike standard Jupyter notebooks, these cells offer a what you see is what you get (WYSIWYG) text editing experience, similar to applications such as MS word and Google Docs. They include spellchecking and the usual formatting shortcuts like Ctrl + B for bold, Ctrl + I for italics, and so on. Rich text cells can be advantageous if you plan to use Deepnote exclusively as they can simplify the writing process. However, it is important to note that any formatting from rich text cells will not be preserved if you download your notebook. So if you plan to use your notebooks on other platforms it is advisable to stick to Markdown cells for writing text.

Finally, there is a key difference in the way Deepnote and Jupyter handle Markdown. Normally to create a line break in Markdown, either a double space followed by the Enter key, or a backslash followed by the Enter key is required. This is the approach followed by Jupyter Notebook and many other notebook interfaces. Deepnote, however, does it differently, simply pressing Enter creates a line break without the need for explicit characters like double space or backslash. This alternative approach is a deviation from the Markdown standard and affects how rendered Markdown looks when moving between platforms.

"},{"location":"deepnote-background/#collaboration","title":"Collaboration","text":"

Deepnote was designed with collaboration in mind and offers several features to do this which are not found on some other platforms.

Users in Deepnote can work together on projects simultaneously: any changes made to files and notebooks within the project can be seen instantaneously by both parties. Real time collaboration works best when you are also communicating in real time with your peers, say for example, using Zoom, Teams, Discord or WhatsApp.

Additionally Deepnote offers the option for asynchronous communication through comments. Comments can be left in a specific cell and are visible by anybody viewing the notebook. The first comment made in a cell opens a new thread, and anyone commenting in a thread receives email notifications after a new message is posted. Open threads can be resolved to hide them and save space; threads can be reopened if needed. Any open threads are displayed in a main comments panel on the right-hand side of a project.

Comments are one of the ways to give and receive feedback on your essays. See the feedback guide for more details.

"},{"location":"deepnote-background/#limitations","title":"Limitations","text":"

As noted at the start of the guidance, Deepnote was selected as the platform for the Learning Professional Skills with Algorithmic Essays project due to its simple interface, customisable environment, and features for collaborative working. However, it is important to acknowledge and assess its limitations.

As mentioned above, Deepnote deviates from Jupyter Notebook by having new types of cell such as rich text cells. It also handles Markdown in an different way from most other platforms. This has implications for how your notebooks will be rendered using different jupyter interfaces. The same notebook could look different on other platforms compared to Deepnote; the length of text lines might be different and some of the formatting may be altered or lost.

In addition to this, when working on a cloud-based platform such as Deepnote, executing cells can sometimes feel slow, especially when the virtual machine has been idle for a while. Furthermore, an issue has been observed when a Markdown cell contains an excessive amount of text, which appears to slow down performance, potentially due to the autocomplete functionality.

As a final point, the way Deepnote stores notebooks within the environment must be mentioned. You can of course upload a notebook to the system and access it like any other file, but if you want to run the notebook, it must be moved to the NOTEBOOKS section. This then becomes a problem if you wish to access the notebook file again, say using the terminal. When a notebook is moved to this section it effectively takes the notebook out of the integrated file system and locates it to a separate database which the user no longer has access to.

"},{"location":"deepnote-how-to/","title":"Deepnote how-to guides","text":""},{"location":"deepnote-how-to/#account-operations","title":"Account operations","text":""},{"location":"deepnote-how-to/#sign-up-for-deepnote","title":"Sign up for Deepnote","text":"

This is explained in our Getting Started guide.

"},{"location":"deepnote-how-to/#log-in","title":"Log in","text":"

If you have logged out, to log back in you need to verify your email again:

  1. Go to the sign-in page.
  2. Enter your email. Click the CAPTCHA button. Click Continue with email.
  3. You will receive an email from Deepnote with a link. Click on it.
"},{"location":"deepnote-how-to/#workspace-operations","title":"Workspace operations","text":"

Before any of the following operations, you must change from project view to workspace view:

  1. Click on your workspace name in the top left corner of the screen.
  2. From the drop-down menu, select Back to workspace.

You will now see a list of the projects in your workspace: Some projects were automatically added by Deepnote when creating your workspace.

After you completed the desired workspace operations, click in the side panel on the project you want to work next.

"},{"location":"deepnote-how-to/#enable-sharing","title":"Enable sharing","text":"
  1. In the workspace view, click on Settings & members in the side panel.
  2. Click on the Project settings tab.
  3. Turn on the option Allow projects to be shared publicly.
"},{"location":"deepnote-how-to/#project-operations","title":"Project operations","text":""},{"location":"deepnote-how-to/#rename-duplicate-download-or-delete-a-notebook-or-file","title":"Rename, duplicate, download or delete a notebook or file","text":"
  1. In the side panel, hover over the name of the chosen notebook or file.
  2. Click on the three dots that appear.
  3. From the drop-down menu, select the desired operation.

For notebooks, the download operations are called Export as .ipynb and Export as .pdf. If your notebook contains other types of cells besides Markdown and code, the downloaded .ipynb file won\u2019t be rendered correctly on other Jupyter platforms.

"},{"location":"deepnote-how-to/#duplicate-our-project","title":"Duplicate our project","text":"

This is explained in our Getting Started guide.

"},{"location":"deepnote-how-to/#share-your-project","title":"Share your project","text":"

The following assumes you have enabled sharing for your workspace.

  1. Click the Share button in the top right corner of the screen.
  2. To the right of Anyone with a link to this project, click on the drop-down menu and select Comment.
  3. Click the highlighted blue link to this project, to copy the link.
  4. Share that link with your peers by email or by posting in your course\u2019s forum.
"},{"location":"deepnote-how-to/#create-a-new-notebook","title":"Create a new notebook","text":"
  1. Click on the + icon next to the Notebooks heading in the side panel.
  2. Enter a name for your new notebook, then press Enter.
"},{"location":"deepnote-how-to/#upload-a-notebook-or-file","title":"Upload a notebook or file","text":"

The simplest way is to drag the notebook or file from your desktop to the Notebooks or Files section in the side panel.

Alternatively, to upload a file:

  1. Click on the + icon next to the Files heading in the left panel.
  2. Select Upload file from the drop-down menu.
  3. In the file browser, navigate to the file you want to upload, then click Open.
"},{"location":"deepnote-how-to/#notebook-operations","title":"Notebook operations","text":"

To perform an action on a cell, do one of the following:

  • Click on the cell to select it: the outline becomes blue. Press the action\u2019s keyboard shortcut.
  • Hover over the cell. A pop-up menu appears in the top right corner of the cell. Click on the action\u2019s icon or click the three dots to get a menu of actions.
"},{"location":"deepnote-how-to/#run-one-or-all-cells","title":"Run one or all cells","text":"

Running a cell executes the code or formats the Markdown text.

To run one cell, do one of the following:

  • Press Ctrl + Enter (Mac: Cmd + Enter) if the cell is selected.
  • Click the triangle icon in the top right corner of the cell.

To run all cells, click Run notebook in the top right corner of the notebook.

Note

The first time you run code, it will take some time, because Deepnote must first start a server with the necessary software.

"},{"location":"deepnote-how-to/#add-a-cell","title":"Add a cell","text":"

To insert a cell between two existing cells:

  1. Hover the mouse between the two cells.
  2. Click on the line that appears between both cells and do one of the following:
    • To insert a code cell, press c followed by Enter.
    • To insert a Markdown cell, press m followed by Enter.

To append a cell, scroll down to the end of the notebook and do one of the following:

  • To add a code cell, click on the Code button.
  • To add a Markdown cell, click on the Text button and select Markdown.
"},{"location":"deepnote-how-to/#delete-a-cell","title":"Delete a cell","text":"

Do one of the following:

  • Press Ctrl + Shift + Backspace (Mac: Cmd + Shift + Backspace) if the cell is selected.
  • Click the bin icon in the top right corner of the cell.
"},{"location":"deepnote-how-to/#comment-on-a-cell","title":"Comment on a cell","text":"

Do one of the following:

  • Press Ctrl + Alt + C (Mac: Cmd + Alt + C) if the cell is selected.
  • Click the speech bubble icon in the top right corner of the cell.

After typing your comment, press the upwards blue arrow to finish.

Warning

You must be logged into your account to comment on notebooks shared with you. If you\u2019re not logged in, your comments are marked as \u2018anonymous user\u2019 and the essay\u2019s author won\u2019t see them.

"},{"location":"deepnote-how-to/#format-a-code-cell","title":"Format a code cell","text":"

This \u2018pretty prints\u2019 the code. Do one of the following:

  • Press Alt + Shift + F if the cell is selected.
  • Click the three dots in the top right corner of the cell and select Format code from the drop-down menu. (To avoid scrolling all the way down, type f in the search box of the menu.)

Note

Formatting takes 1-2 seconds and adds an empty line (which you may delete) to the end of the cell.

Warning

If you get a message \u2018parsing failed\u2019, then the code cell is not valid Python and can\u2019t be automatically formatted. This may happen if the cell has IPython commands starting with %.

"},{"location":"deepnote-reference/","title":"Reference","text":""},{"location":"deepnote-reference/#workspace-interface","title":"Workspace interface","text":"

The workspaces interface provides an overview of any projects in your Workspace. On the left-hand side panel you will find a navigation menu allowing you to quickly navigate to different sections of your workspace. Starting from the top, the sections are:

  • Integrations - This section allows you to set up and manage connections to data sources. This is mostly used for data science work.

  • Settings & members - Manage who has access to the workspace and its resources.

  • Recents - Projects listed in order of most recently opened.
  • Private projects - A list of private projects, which only you have access to. Other workspace members can\u2019t see private projects.
  • Published apps - This section shows any apps you have published. In Deepnote, an app is a notebooks in which some blocks have been hidden to abstract away technical details. This may be useful to present your findings to stakeholders with non-technical backgrounds.
  • PROJECTS. - A list of all projects within the workspace.
"},{"location":"deepnote-reference/#project-interface","title":"Project interface","text":"

A project\u2019s interface has similarities to the interface for your workspace. Starting from the top, the sections on the left-hand side panel are:

  • NOTEBOOKS - This section is where your notebooks live. If you want to actively work on your notebooks they must be added to this location.
  • INTEGRATIONS - This section allows you to use an integration defined for the workspace. Integrations are mainly used in data science.
  • FILES - Each project in Deepnote has an integrated file system which you can view and access in this section. You can create and upload files and folders here.
  • TERMINALS - Deepnote allows you to launch terminals from this section by clicking on the \u201c+\u201d icon. As you would expect you can access the local file system through the terminal to run scripts or complete other tasks. Note that you cannot access any notebooks located in the NOTEBOOKS section from a terminal, they are stored in a separate database and not considered part of the file system.
  • TABLE OF CONTENTS - This section will show the major headings of the current notebook you are working on so you can quickly navigate through your document by clicking on them.
  • ENVIRONMENT - The environment section shows a simple overview of the environment you are currently working in. There is an option to expand this section into a more detailed view by clicking on the cog symbol next to the ENVIRONMENT heading in the top right hand corner of the panel.
"},{"location":"deepnote-reference/#notebooks","title":"Notebooks","text":"

Notebooks in Deepnote have the same core functionality as Jupyter Notebooks: they combine executable code and text in the same document.

See Deepnote vs Classic Notebook for some differences between the two.

"},{"location":"deepnote-reference/#access-levels","title":"Access levels","text":"

Access levels are the range of permissions or capabilities assigned to a user in Deepnote. They differ between the contexts of workspaces and projects.

The access levels for projects are:

  • App User: Can use published app, but cannot view the project source code.
  • View: Can inspect the project, but cannot view or post comments nor execute or edit files.
  • Comment: Can post and view comments in addition to inspecting the project.
  • Execute: Can execute code in addition to viewing and commenting, but cannot change anything or use terminals.
  • Edit: Can use terminals, connect datasets, comment and edit files as well as view and execute.

The access levels for workspaces are:

  • Viewer: Viewers can see all projects and members of a workspace. They can leave comments in projects but can\u2019t make any changes. They can duplicate a project to another workspace as well as request additional access from the team\u2019s owner.
  • Contributor: Contributors can execute all notebooks within the workspace as well as change input block values. They cannot make changes to code.
  • Editor: Editors can create and edit workspace projects.

  • Admin: Admins have all access rights, including permission to manage workspace members.

"},{"location":"deepnote-reference/#cells","title":"Cells","text":"

Cells (called \u2018blocks\u2019 in Deepnote) are the divisions within each notebook. They are a distinct area where code or text can be added depending on the type of the cell. See our how-to guide for working with cells.

"},{"location":"deepnote-reference/#terminal","title":"Terminal","text":"

A terminal will give you a command line interface for your project and runs a bash shell.

Launching a Terminal in Deepnote allows you to run scripts or complete tasks where the GUI is not suitable.

See the Deepnote documentation on terminals for more information.

"},{"location":"deepnote-reference/#environment","title":"Environment","text":"

The environment refers to the setup and configuration that supports the execution of code within your project.

The code in each project runs on a virtual machine which is an isolated computing environment with its own CPU memory and storage resources. These specifications can be adjusted in a limited way if required and various software packages can be added to your environment to suit your needs.

When you copied our project, you also copied the environment.

See Deepnote\u2019s documentation on custom environments for more information.

"},{"location":"deepnote-reference/#real-time-collaboration","title":"Real-time collaboration","text":"

Real time collaboration refers to the capability of multiple users to work on the same documents in the same project at the same time. Any changes to documents can be seen by all users working on the project as and when they happen.

See Deepnote\u2019s documentation on real-time collaboration for more details.

"},{"location":"deepnote-reference/#asynchronous-collaboration","title":"Asynchronous collaboration","text":"

Asynchronous collaboration is a method of working where users do not have to be working at the same time. Users can contribute to projects and documents at their own pace to suit their own schedule.

The main tool for asynchronous collaboration in Deepnote is the comments system. Users can comment on code and text in the corresponding cells to communicate with peers.

"},{"location":"deepnote-reference/#command-palette","title":"Command palette","text":"

The command palette provides quick access to all of the files in a project and the most popular actions.

You can open and close the command pallet by pressing Cmd + P on Mac or Ctrl + P on Windows.

"},{"location":"deepnote-reference/#members","title":"Members","text":"

A member is a Deepnote user associated with a particular workspace.

When a user is a member of a workspace, they typically have access to all the projects within that workspace, but the access permissions can be adjusted.

Projects do not have members, but you can give or be given access to a project with certain permissions. See Access levels for more information.

"},{"location":"deepnote-reference/#markdown-cheat-sheet","title":"Markdown cheat sheet","text":"Feature Syntax/Example Headers H1 Header # H1 Header H2 Header ## H2 Header H3 Header ### H3 Header H4 Header #### H4 Header H5 Header ##### H5 Header Code Inline Code `Code` Code Block ```Code block``` Formatting Italic _italic_ or *italic* Bold **bold** or __bold__ Strikethrough ~~strikethrough~~ Links External Link [Google](https://www.google.com) Section Link [Top](#top) Lists Bulleted List - List item Numbered List 1. List item Math Inline Math $x=1$ Math Block $$$x=1$$$ Other Quote > Quote Divider --- HTML <h1>Title</h1>"},{"location":"deepnote-reference/#keyboard-shortcuts","title":"Keyboard shortcuts","text":"

Deepnote has many keyboard shortcuts for quickly performing typical actions on cells and text.

General

MAC WINDOWS & LINUX ACTION \u2318 + P ctrl + P Show/Hide command palette

Block Actions

MAC WINDOWS & LINUX ACTION \u21e7 + \u21b5 shift + enter Run current block and move cursor to next block (creates a new cell if at the end of the notebook) \u2318 + \u21b5 ctrl + enter Run current block \u2318 + \u21e7 + . ctrl + shift + . Stop execution \u2318 + \u21e7 + H ctrl + shift + H Hide/Show block output \u2318 + \u21e7 + M ctrl + shift + M Toggle between code and Markdown block \u2318 + \u21e7 + \u232b ctrl + shift + backspace Delete block \u2325 + \u21e7 + \u2191 alt + shift + \u2191 Move block up \u2325 + \u21e7 + \u2193 alt + shift + \u2193 Move block down \u2318 + \u21e7 + D ctrl + shift + D Duplicate block \u2318 + J ctrl + J Add new code block below current one \u2318 + K ctrl + K Add new code block above current one \u2318 + Z ctrl + Z Undo \u2318 + \u21e7 + Z ctrl + shift + Z Redo

Code Editing

MAC WINDOWS & LINUX ACTION \u2318 + D ctrl + D Expand selection (multiple cursors) tab tab When caret is at the beginning of a line, add indent; otherwise, show autocomplete suggestions \u21e7 + tab shift + tab Decrease indent \u2318 + / ctrl + / Toggle line/selection comment \u2325 + \u2193 alt + \u2193 Move lines down \u2325 + \u2191 alt + \u2191 Move lines up

Terminal

MAC WINDOWS & LINUX ACTION \u2318 + C ctrl + shift + C Copy selected text \u2318 + V ctrl + shift + V Paste"},{"location":"deepnote/","title":"Deepnote","text":"

Deepnote is the cloud based Jupyter notebook platform we recommend for writing your essays.

The platform was chosen due to its simple interface, customisable environment, and features for collaborative working.

Warning

This Deepnote guide is limited in scope to the writing, sharing and critiquing of algorithmic essays. There are many features of the platform not covered here. See the official documentation for more extensive coverage.

You may find it helpful to watch the following introduction video.

This guide has three parts:

  • The How-to guide provides step by step instructions on how to complete specific tasks in Deepnote.
  • The Background section discusses and explains aspects of Deepnote in a longer form and sometimes broader context.
  • The Reference section is for quickly looking up information about key aspects of Deepnote.

Note

Use the side panel to navigate the Deepnote guidance.

"},{"location":"example-essays/","title":"Example essays","text":"

These examples illustrate different ways of structuring essays and different writing styles.

For some essays, there\u2019s a template that highlights the essay\u2019s structure and the purpose of each section. These templates help you start writing your own essay. Clicking on a button below will open a read-only version of the essay or template.

To get an editable version of a template to start writing your own essay, right-click on a download button. From the pop-up menu, choose \u2018Save file as\u2026\u2019 (or similar) to save the template in a folder of your choice.

If you\u2019re using Deepnote, you don\u2019t need to download anything because you will have the essay templates and examples once you copied our essay project, as explained in Getting started (Deepnote).

"},{"location":"example-essays/#sum-of-1-to-n","title":"Sum of 1 to n","text":"

This is a short and simple essay, suitable for those on introductory programming courses. The essay shows two ways of calculating 1 + 2 + \u2026 + n and compares their run-times. The essay follows a simple structure, in which each approach is outlined, implemented and tested before moving on to the next one.

Essay Template Template (download)

"},{"location":"example-essays/#jewels-and-stones","title":"Jewels and Stones","text":"

This is a longer essay, for those on data structures and algorithms courses. The problem is to count how many characters of a string occur in another string. The essay solves the problem in three ways, with linear search, a set, and a bag/multiset. The complexity of the three algorithms is analysed and compared to their run-time.

This essay follows a slightly different structure, in which each approach is outlined and its complexity analysed, before deciding which approaches are worth implementing.

Essay Template Template (download)

"},{"location":"example-essays/#two-sum-two-approaches","title":"Two Sum (two approaches)","text":"

This classic problem asks to find two numbers in a list that add up exactly to a given number. This essay solves the problem in two ways, with brute-force search (nested loops) and a map (Python dictionary).

Essay

"},{"location":"example-essays/#two-sum-three-approaches","title":"Two Sum (three approaches)","text":"

This is an extended version of the previous essay. It adds a third approach, that sorts the list of numbers.

Essay

"},{"location":"feedback/","title":"Feedback guide","text":"

Giving feedback on each other\u2019s essays is an important part of developing professional skills with algorithmic essays.

Receiving feedback helps improve your essay, while giving feedback helps develop your communication skills. Moreover, reading other people\u2019s essays is a great way to learn more about Python, algorithms, data structures, code style, etc.

Feedback is part of professional software development practices. Many companies and open source projects have formal code reviews, in which developers must submit their code for inspection by others. Code reviews help increase the quality of the code, share best practices among developers, and on-board new developers.

Many companies use pair programming, in which two developers work together on the same piece of code. While one writes the code, the other reviews it as it\u2019s written, pointing out mistakes and suggesting improvements. The two developers switch roles often during a pair programming session. With Deepnote and Colab, you and someone else can work simultaneously on the same notebook, while using Zoom, Microsoft Teams or some other app to chat.

In summary, by engaging in a feedback process for your and others\u2019 essays, you will develop valuable professional skills.

"},{"location":"feedback/#asking-for-feedback","title":"Asking for feedback","text":"

You can ask for feedback at any point during the production of your essay, not just when you have a complete draft. You will have to tell others what kind of feedback you want.

For example, if you\u2019re undecided between three problems, you may write a notebook that just describes the problems and asks others to choose one of them, by including something like:

Please help me choose a problem to tackle. Which of the previous problems would you like to see solved and why?

It helps the reader if the request for feedback stands out from the remaining text. You may prefer to use bold, italics, or HTML to format it differently.

You can also ask for feedback after choosing a problem and thinking of some algorithms, but before investing the time in implementing and testing them. Your request could be:

I\u2019m looking for feedback on the above algorithms. Are they clear? Have I missed other approaches to solving this problem?

Once your notebook is ready for feedback, you need to share it so that others can comment on it. You may invite comments from anyone or only from one or two \u2018essay buddies\u2019: they comment on your essay and you comment on theirs.

Info

For how to share your essay, see the corresponding instructions for Colab, Deepnote or M269.

"},{"location":"feedback/#giving-feedback","title":"Giving feedback","text":"

Once you have been invited to comment on an essay, read it as soon as you can, because the author is waiting for it to progress the essay to the next draft.

Feedback should be specific and constructive. Generic comments like \u201cThis is confusing\u201d or \u201cI don\u2019t understand this\u201d aren\u2019t helpful for the author. State what you\u2019re finding hard to grasp, like:

  • \u201cWhat is \u2018it\u2019 referring to in the second sentence?\u201d
  • \u201cThe term \u2018\u2026\u2019 in the last sentence hasn\u2019t been defined. Does it mean the same as \u2018\u2026\u2019?\u201d
  • \u201cWhat is variable \u2018\u2026\u2019 used for in the algorithm? Should it initially be the empty list?\u201d

Before adding a comment to a notebook cell, read the existing comments, to avoid repeating the same points others have made.

Comments that aren\u2019t about a specific text paragraph or code cell, but rather about the whole essay, e.g. its structure, should be attached to the first cell, with the essay\u2019s title, or to the last cell.

The author has put effort into their essay, and will appreciate encouraging feedback to keep polishing it. For example, if you commented on a previous version, praise the parts that improved.

"},{"location":"feedback/#acting-on-feedback","title":"Acting on feedback","text":"

Deepnote emails every time you get a comment on your essay. You may wish to improve your essay as you get each piece of feedback, or you may wait some time, e.g. a week, to collect a variety of comments and then address them in one pass.

As you scroll down your essay, look for speech bubble icons. Click on them to see the comment thread associated to that cell. Once you modify the cell to address those comments, mark the thread as resolved.

Don\u2019t feel obliged to follow every suggestion you receive. The reviewers of your essay may present contradictory suggestions and some may take too much effort to address.

If you don\u2019t understand a reviewer\u2019s comment, add a comment yourself, asking them for clarification. Be specific, explaining what you don\u2019t understand. Alternatively, rephrase the reviewer\u2019s comment and ask them to confirm your interpretation, e.g. \u201cDo you mean I should rename the variable from \u2026 to \u2026?\u201d

"},{"location":"feedback/#crediting-feedback","title":"Crediting feedback","text":"

You should acknowledge who provided input, by adding a section at end of your essay with something like this:

I thank my tutor Jane Doe and my fellow students John Smith and Carla Suarez for feedback that helped improve this essay.

Crediting others allows them to point to your essay to provide evidence, e.g. to prospective employers, of giving feedback others find useful. It\u2019s therefore best if you can be specific about each one\u2019s contribution, in particular if someone provided some of the content, beyond just commenting. Content contributions should be mentioned before feedback. For example:

I thank John Smith for analysing the complexity of the second approach. Jane Doe helped me improve the structure of this essay. Carla Suarez spotted several typos and suggested improvements to the code style.

It\u2019s easiest to keep track of contributions if you update the acknowledgements as you act upon feedback. You may wish to offer co-authorship of the essay to those who made substantial contributions.

The acknowledgments should also mention if your solutions are based on someone else\u2019s, unless you already said so when introducing the algorithms. For example:

The second algorithm was taken from the solution posted by LeetCode user soandso, but the code is my own. The third algorithm and implementation are based on user123\u2019s solution. I thank \u2026

When possible, the acknowledgement text should link directly to the original solutions.

If the code or tests were partly or wholly written by generative AI, say so, preferably indicating the prompts you used.

"},{"location":"getting-started-google-colab/","title":"Getting started (Google Colab)","text":"

Follow these steps to start writing essays on Google Colab within a few minutes.

"},{"location":"getting-started-google-colab/#copy-an-essay-template","title":"Copy an essay template","text":"

Important

To use Colab, you need a Google account. To create one, visit the Google Account sign in page and follow the instructions.

  1. Log in to your Google account
  2. Click one of these links to open a template in Colab:
    • introductory programming template
    • data structures and algorithms template
  3. In the template, click File->Save a copy in Drive to save a copy to your Google Drive.
  4. In your copy of the template, click File->Rename to rename the copy. Use a descriptive name relating to your essay.

Info

The first code cell of each template essay notebook installs the necessary software when running the notebook in Colab.

"},{"location":"getting-started-google-colab/#write-the-essay","title":"Write the essay","text":"

Now that you have saved and renamed a copy of the template, you can start writing your essay in Colab.

For how to use Jupyter notebooks in Colab, read through the Overview of Colaboratory Features.

For guidance on writing algorithmic essays, see our writing guide

"},{"location":"getting-started-google-colab/#share-the-essay","title":"Share the essay","text":"

When you are ready to share the essay with others, do the following:

  1. Click the Share button in the top right corner of your essay. A small window will appear in the middle of the screen.
  2. In the General access section of the window, select Anyone with a link from the drop-down menu, and commenter as the role from the new drop-down menu on the right.
  3. Click the Copy link button to put the link in your clipboard.
  4. Click Done to close the window and share the link with your peers.
"},{"location":"getting-started-google-colab/#comment-on-essays","title":"Comment on essays","text":"

After sharing your essay, others will be able to comment on it. You can also make comments on essays as part of the feedback process. See our feedback guide for more information.

In Colab, comments are attached to a cell and are displayed next to the cell they refer to.

If you have edit or comment permissions, you can comment on a cell in one of three ways:

  1. Select a cell and click the comment button (speech bubble) in the toolbar above the top-right corner of the cell.
  2. Right-click a text cell and select \u2018Add a comment\u2019 from the pop-up menu.
  3. Press Ctrl+Alt+M to add a comment to the currently selected cell.

You can resolve and reply to comments, and you can target comments to specific collaborators by typing @[email address] (e.g., @user@domain.com). Addressed collaborators will be emailed.

The Comment button in the top-right corner of the page shows all comments attached to the notebook.

"},{"location":"getting-started-m269/","title":"Getting started (M269)","text":"

If you\u2019re an M269 student, follow these steps to start writing essays within a few minutes.

"},{"location":"getting-started-m269/#install-software-optional","title":"Install software (optional)","text":"

While you can write essays with your current M269 software, we recommend installing two more packages to help you test, check the style, and measure the run-time of code.

  1. Activate your M269 environment as usual:
    • Open a PowerShell (Windows) or terminal (Linux / macOS), and enter m269-23j.
    • The prompt should now be (m269-23j) ... and you should be in your M269 folder.
  2. To install the software, enter pip install algoesup ruff in the PowerShell / terminal.

    Note

    You also need the most recent version of allowed, which is available from the Resources tab of the M269 website.

"},{"location":"getting-started-m269/#copy-template","title":"Copy template","text":"

We have created templates to serve as starting points for your essay.

  1. Go to our example essays page.
  2. Choose a template and right-click on the corresponding download button.
  3. Select \u2018Download / Save file as\u2026\u2019 and save the template to your M269 folder, with a filename that includes your name (e.g. essay-Michael.ipynb) or that indicates the topic (e.g. balanced-brackets.ipynb). If you prefer, you can rename the file later, when you settled on a topic.
"},{"location":"getting-started-m269/#write-the-essay","title":"Write the essay","text":"

With the essay template in your M269 folder, you can work on it in the same way you do with the book\u2019s chapters and your TMAs.

  1. Open a PowerShell or terminal.
  2. Enter m269-23j to activate your M269 environment and go to your M269 folder.
  3. Enter nb to open the Jupyter dashboard, from which you can open the essay.
"},{"location":"getting-started-m269/#share-the-essay","title":"Share the essay","text":"

Once you have a draft you want others to give feedback on, make a post in the VLE forum corresponding to the topic of your essay, and attach your notebook file.

For example, if your essay is about ordered or unordered collections, post it in the Weeks 1-10 forum, but if it also uses recursion or a sorting algorithm, post it in the Weeks 11\u201320 forum.

Once you have addressed others\u2019 feedback, post the final version of your essay, acknowledging the commenters.

"},{"location":"getting-started-m269/#publish-the-essay-optional","title":"Publish the essay (optional)","text":"

If you want to publish your essay outside the M269 forums, you have several options.

  • Ask us in the forum to add your essay to the algoesup GitHub repository, with a link from the example essays page. The copyright of your essay remains with you.
  • Create your own GitHub repository for your notebook file. (Learning how to use GitHub for software development is a valuable professional skill.)
  • Make your essay available via a cloud Jupyter environment, like Cocalc, Colab, Datalore and Deepnote.
"},{"location":"getting-started/","title":"Getting started","text":"

Follow these steps to start writing essays within a few minutes, without any software installation or configuration.

"},{"location":"getting-started/#create-a-deepnote-account","title":"Create a Deepnote account","text":"
  1. Open the Deepnote sign-up page.
  2. Enter your email address. Use your academic (rather than personal) email to get the free education plan.
  3. Check the CAPTCHA box and click Continue with email.
  4. Check your email for a sign-in link from Deepnote and click it.

    Note

    There are no passwords for Deepnote when signing up by email. If you explicitly log out of your Deepnote account, see our guide for how to log in.

  5. In Deepnote, answer the introductory questions, which may depend on the type of email you used to sign up.

    • If you\u2019re asked what you are working on, type Writing essays and click Continue.
    • If you\u2019re asked to name your workspace, which is where you will store your projects, give it a unique and descriptive name, e.g. \u201cYour Name\u2019s projects\u201d.
    • If you\u2019re asked to invite your teammates, click Continue.
    • If you\u2019re asked for your data sources, click Take me to Deepnote.

You should now be looking at an empty notebook that is part of the Welcome to Deepnote project within your workspace. You won\u2019t need that project for writing algorithmic essays, but you may wish to keep it, to later explore Deepnote\u2019s data science features.

For the moment, just proceed with the next steps.

"},{"location":"getting-started/#duplicate-our-project","title":"Duplicate our project","text":"

We created an essay project in our Deepnote workspace, to be copied to your workspace. The project has all necessary software pre-installed.

  1. Open our project.
  2. Click on the blue Duplicate button in the top right corner.
  3. Choose the name of your workspace from the drop-down menu.
  4. Click Duplicate project to finish the process.

    Note

    Do not check the box to make your project private: that would prevent sharing your essays later with others.

    After a few moments, you will see the project in your workspace, with the name Algorithmic Essays - Duplicate at the top of the screen.

  5. Click on the three dots in the top right corner.

  6. Select Rename project from the drop-down menu.
  7. In the text box that appears, type a name for your project, e.g. \u201cYour Name\u2019s essays\u201d.

You should now see some notebooks in the left panel.

"},{"location":"getting-started/#copy-a-template-and-edit-it","title":"Copy a template and edit it","text":"

We provide two templates as starting points for your essay. One template is simpler, with introductory programming in mind, the other is for data structures and algorithms courses. To write an essay, copy one of the templates and edit that copy.

  1. In the left panel, hover your mouse over the template you wish to copy.
  2. Click on the three dots that appear next to the template\u2019s name.
  3. In the pop-up menu that appears, select Duplicate.

This creates a new notebook, with the name of the template followed by \u2018-2\u2019, and opens it. You can now start editing your copy of the template. (Once you decided your essay\u2019s topic, you can rename the notebook.)

If you\u2019re familiar with the classic Jupyter interface, we recommend you first read about the differences with Deepnote.

For a video introduction to notebooks and Deepnote, see our Deepnote guide.

"},{"location":"library/","title":"Library","text":"

The algoesup library provides support for testing, timing and linting code.

"},{"location":"library/#testing","title":"Testing","text":"

Simplified testing for Python functions

"},{"location":"library/#algoesup.test.test","title":"test","text":"
test(function: Callable, test_table: list) -> None\n

Test the function with the test_table. Report failed tests.

Parameters:

Name Type Description Default function Callable

The function to be tested.

required test_table list

The list of tests. Each element of test_table is a list or tuple with: a string (the test case name); one or more values (the inputs to the function); the expected output value.

required"},{"location":"library/#timing","title":"Timing","text":"

Tools for measuring and plotting run-times

"},{"location":"library/#algoesup.time.time_functions","title":"time_functions","text":"
time_functions(\n    functions: list[Callable],\n    inputs: Callable,\n    start: int,\n    double: int,\n    text: bool = True,\n    chart: bool = False,\n    value: bool = False,\n) -> None\n

Print or plot the run-times of different functions for the same inputs.

time_functions prints or plots the run-times given list of functions and an input generator. Inputs are generated based on a starting size and are doubled a specified number of times.

Parameters:

Name Type Description Default functions list[Callable]

A list of functions whose run-times will be measured. Must be 1 to 6 functions.

required inputs Callable

A function to generate inputs when given a specific size.

required start int

The starting size for the inputs. Must be positive.

required double int

The number of times to double the input size. Must be non-negative.

required text bool

If True, print the run-times in text format

True chart bool

If True plot the run-times using a chart.

False value bool

If True x-axis is labelled \u201cInput value\u201d otherwise \u201cInput size\u201d.

False

Raises:

Type Description AssertionError

If input conditions are not satisfied.

"},{"location":"library/#algoesup.time.time_cases","title":"time_cases","text":"
time_cases(\n    function: Callable,\n    cases: list[Callable],\n    start: int,\n    double: int,\n    text: bool = True,\n    chart: bool = False,\n) -> None\n

Print or plot the run-times of function for different input cases.

time_cases prints or plots the run-times of a single function using a list of different input generators. Inputs are generated based on a starting size and are doubled a specified number of times.

Parameters:

Name Type Description Default function Callable

A function whose run-times will be measured.

required cases list[Callable]

A list of 1 to 6 functions to generate inputs of different cases, e.g. best-, normal- and worst-case.

required start int

The starting size for the inputs. Must be positive.

required double int

The number of times to double the input size. Must be non-negative.

required text bool

If True, print the run-times in text format.

True chart bool

If True, plot the run-times using a chart.

False

Raises:

Type Description AssertionError

If input conditions are not satisfied.

"},{"location":"library/#algoesup.time.time_functions_int","title":"time_functions_int","text":"
time_functions_int(\n    functions: list[Callable],\n    generator: Callable = int_value,\n    start: int = 1,\n    double: int = 10,\n    text: bool = True,\n    chart: bool = True,\n) -> None\n

Time functions that take a single integer as input.

time_functions_int uses time_functions to measure and display the run-times of a given list of functions that accept a single integer input. The integer inputs are generated starting from a specified value that defaults to 1, and are doubled a specified number of times that defaults to 10.

Parameters:

Name Type Description Default functions list[Callable]

A list of functions whose run-times will be measured. Each function must accept a single integer argument. Must be 1 to 6 functions.

required generator Callable

A function to generate integer inputs. Defaults to int_value, which returns a tuple containing the input integer.

int_value start int

The starting integer value for inputs. Defaults to 1. Must be positive.

1 double int

The number of times to double the input integer value. Defaults to 10. Must be non-negative.

10 text bool

If True, print the run-times in text format.

True chart bool

If True, plot the run-times using a chart.

True"},{"location":"library/#linting","title":"Linting","text":"

Linting tools for Jupyter Notebook environments

"},{"location":"library/#algoesup.magics.allowed","title":"allowed","text":"
allowed\n

Activate/deactivate the allowed linter.

When active, the linter checks each code cell that is executed for any Python constructs that are not listed in the given configuration file.

  • %allowed on ... activates the linter with any command options given after on
  • %allowed on is equal to %allowed on --config m269.json
  • %allowed off deactivates the linter
  • %allowed shows the current status of the linter
  • %allowed? shows this documentation and the command\u2019s options

For a list of possible options ..., enter !allowed -h in a code cell. Some options may not be appropriate when running allowed within a notebook.

The --config option expects m269.json, tm112.json or the name of a JSON file with your own configuration.

"},{"location":"library/#algoesup.magics.pytype","title":"pytype","text":"
pytype\n

Activate/deactivate the pytype linter.

When active, the linter checks each code cell that is executed for type errors.

  • %pytype on ... activates the linter with the command options given after on
  • %pytype on is equal to %pytype on --disable name-error,import-error
  • %pytype off deactivates the linter
  • %pytype shows the current status of the linter
  • %pytype? shows this documentation and the command\u2019s options

For a list of possible options ..., enter !pytype -h in a code cell. Some options may not be appropriate when running pytype within a notebook.

The --disable option expects a list of errors to ignore, without spaces.

"},{"location":"library/#algoesup.magics.ruff","title":"ruff","text":"
ruff\n

Activate/deactivate the Ruff linter.

When active, the linter checks each code cell that is executed against the selected code style rules.

  • %ruff on ... activates the linter with any command options given after on (see [ruff\u2019s list of rules])
  • %ruff on is equal to %ruff on --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415
  • %ruff off deactivates the linter
  • %ruff shows the current status of the linter
  • %ruff? shows this documentation

The command %ruff on ... will run ruff check --output-format json ... on each cell. For a list of the possible options ..., enter !ruff help check in a code cell. Some options may not be appropriate when running Ruff within a notebook.

The --select and --ignore options expect a list of rule codes, without spaces.

"},{"location":"writing/","title":"Writing guide","text":"

This document provides guidance on how to produce your essay.

Note

Although we wish to accommodate novice programmers in the future, the guide currently has data structures and algorithms students in mind.

An essay can have more than one author, although more than two is harder to manage. Deepnote and Colab make it easy to work collaboratively on a single notebook, at the same time or asynchronously, and leave comments to co-authors. You may wish to first pitch your essay idea to your peers, to recruit co-authors.

In the rest of this guide, \u2018you\u2019 and \u2018your\u2019 are both singular and plural pronouns, to refer simultaneously to a single author or multiple authors.

Note

You may wish to keep this guide open while going through your copy of our template.

"},{"location":"writing/#problem","title":"Problem","text":"

It\u2019s worth spending time on choosing an appropriate problem before putting effort into an essay about it. You may invent your own problem or select an existing one. For example, it may be a non-assessed exercise from your course, or it may relate to your hobby or work. If so, provide any information the reader needs to understand the problem. If the problem is from your work, get permission from your employer or client.

There are many websites with thousands of algorithmic problems to choose from. We have used Kattis and LeetCode in the past.

Some sites, like LeetCode, tag their problems with the data structure or algorithmic technique needed, like \u2018array\u2019 or \u2018sorting\u2019. This helps you find problems about a particular topic.

Some sites, like LeetCode, have official and user-provided solutions, but the latter may be terse (single-letter identifiers, no comments) or not fully analysed. Other sites, like the International Olympiad in Informatics, often have just solution hints or outlines. You may thus wish to write an essay that fully implements a solution outline or that improves and compares several user solutions. Either way would be useful to the user community of those sites.

It is often said that the best way to learn a topic is to have to explain it to others. You may thus wish to pick a problem on a topic you\u2019re not comfortable with, choose two existing solutions, and explain them in an essay.

If you\u2019re undecided, make a shortlist of 2\u20133 problems and ask your peers for their opinion.

"},{"location":"writing/#text","title":"Text","text":"

An essay presents two or more algorithmic solutions for a computational problem, and concludes which one is better, according to some criteria. Possible criteria include:

  • time and space complexity
  • empirical run-times and memory used
  • simplicity of the solution
  • ease of adapting the solution to similar problems.

The essay should thus have a clear narrative, going from the problem to the conclusion.

An algorithmic essay contains more text than code, and while code can and should have comments, the text carries most of the explanation. It\u2019s thus important for the text to be clear and error-free.

Deepnote notebooks can have rich-text cells (headings, paragraph, bullet item, etc.) that, contrary to the Markdown cells, are spell-checked as you write the text and support keyboard shortcuts, like Ctrl + B to put the selected text in bold. Unless you want to keep your essays in Deepnote, we do not recommend using rich-text cells, as their formatting is lost when downloading the notebook to your computer.

Essays can be written in any style: it\u2019s a personal choice. For example, you can use \u2018we\u2019, \u2018I\u2019 or an impersonal form.

"},{"location":"writing/#structure","title":"Structure","text":"

An essay starts with a title that states the problem or the algorithmic technique to be used. Next, put your name(s) and the current date, which should be updated whenever you edit the essay.

Next, without any heading, comes the introduction. It should state what the essay is about. Normally an essay\u2019s aim is to solve a particular problem, but it may also illustrate a general technique, like space-time trade-offs or recursion, or highlight an issue, like the difference between complexity analysis and run-time performance.

The introduction should also state what you assume the reader to know, as no essay can explain everything from first principles. For example, tell the reader that they must know about binary trees to understand your essay.

Following the introduction, use section headings to structure your essay, for example:

  • Problem: this section describes the problem, with some examples.
  • Algorithms: this section outlines two or more algorithms that solve the problem and their complexity.
  • Implementations: this section implements and tests only the most promising algorithms.
  • Comparison: this section compares the implemented algorithms according to other criteria, e.g. their run-times.
  • Conclusion: this section summarises the findings and concludes which approach is best.

The algorithms and implementations sections may have subsections, one per algorithm.

An alternative structure implements each approach before evaluating all of them:

  • Problem: this section describes the problem, with some examples.
  • First approach: this section outlines an algorithm, implements it and tests it.
  • Second approach: this section presents another algorithm and its implementation.
  • \u2026: further sections, one per approach.
  • Evaluation: this section states the criteria to be used and evaluates each approach according to them.
  • Conclusion: this section summarises the findings and concludes which approach is best.

If the problem description is a single paragraph, you may include it in the introduction, rather than having a separate section. If you didn\u2019t invent the problem, indicate its source, e.g. by providing a link to a website or by writing something like \u201cThis is problem 4.5 in [book title] by [author].\u201d

"},{"location":"writing/#algorithms","title":"Algorithms","text":"

You should choose at least two sufficiently different algorithms that solve the problem, and describe each one succinctly, preferably before implementing it, to make the code easier to understand for the reader.

We recommend to not describe algorithms that are only slight variants of each other, as this is usually of little interest, and to only include two algorithms in your first draft.

If you\u2019re using solutions by others, e.g. by LeetCode users, acknowledge the original author and provide a link to their solution. If you have modified their solutions, state what you have changed and explain why.

You should include worst-case complexity analyses of the various solutions you propose, as this helps discard the inefficient ones that may not be worth implementing.

"},{"location":"writing/#code","title":"Code","text":"

Your code should be correct, simple, and as readable as possible. Unless the aim of your essay is to discuss advanced Python constructs, try to use only a basic subset of the language. This allows more people, including those with limited knowledge of Python, to understand your code. It also makes your code easier to port to other programming languages.

We recommend the following workflow, which is further explained in the following subsections.

  1. Write the tests for your algorithms.
  2. Implement the algorithms and run the tests.
  3. Typecheck your code as you run each cell.
  4. Format your code, cell by cell.
  5. Check the code style as you run each cell.

Writing the tests (step 1) before the code they test (step 2) is a cornerstone of test-driven development, a widely used practice. Thinking of the tests early in the process helps you better understand the problem and think of correct solutions.

Info

If you followed our \u2018getting started\u2019 instructions, the software mentioned in the next subsections to carry out the above workflow is already installed.

"},{"location":"writing/#testing","title":"Testing","text":"

You should write tests for each function, to have some assurance that it is correct. Tests that check the behaviour of a single function are called unit tests. The unit tests should cover normal cases and edge cases: extreme input values and inputs that lead to extreme output values.

For each input, the smallest possible value, e.g. zero or the empty list, is an edge case, and so is the largest possible value, if there is one for that input. If a function is doing a search for an item in a list, then edge cases would be the item being at the start, at the end, or not occurring at all. If the output is a list, then inputs that produce the empty list are edge cases too. In summary, try to think of the \u2018trickiest\u2019 inputs the algorithm has to cope with.

We provide a small library to support algorithmic essays: algoesup. It allows you to easily write and run unit tests. Here\u2019s an example. (The # fmt: off and # fmt: on lines will be explained later.)

from algoesup import test\n\n# function to be tested\ndef absolute_difference(x: int, y: int) -> int:\n    \"\"\"Return the absolute value of the difference between x and y.\"\"\"\n    return x - y  # deliberately wrong, should be abs(x - y)\n\n# fmt: off\n# unit tests in tabular form, one test per row\nunit_tests = [\n    # test case,  x,    y,    expected result\n    (\"x == y\",    1,    1,    0),\n    (\"x > y\",     10,   -1,   11),\n    (\"x < y\",     -1,   10,   11),\n]\n# fmt: on\n\n# run the function on all test inputs and compare the actual and expected outputs\ntest(absolute_difference, unit_tests)\n

Output

Testing absolute_difference\u2026 x < y FAILED: -11 instead of 11 Tests finished: 2 passed, 1 failed.

A unit test consists of the input values to pass to your function and the output value you\u2019re expecting. The library requires a short descriptive string for each unit test, so that it can indicate which tests failed. The library expects unit tests to be in tabular format: one row per test, and one column for the description, one column for each input, and one column for the expected output. In the example above, the test table is a list of tuples, but it could as well be a list of lists, a tuple of lists, or a tuple of tuples.

You should reuse the test table for all solutions, because they\u2019re about the same problem. Here\u2019s a correct function that passes all test cases.

def absolute_difference_without_abs(x: int, y: int) -> int:\n    \"\"\"Return the absolute value of the difference between x and y.\n\n    This solution doesn't use the built-in abs() function.\n    \"\"\"\n    if x > y:\n        return x - y\n    else:\n        return y - x\n\ntest(absolute_difference_without_abs, unit_tests) # same test table\n

Output

Testing absolute_difference_without_abs\u2026 Tests finished: 3 passed, 0 failed.

"},{"location":"writing/#type-checking","title":"Type checking","text":"

As the above examples show, your code should contain type hints like x: int and ... -> int to indicate the type of the input and of the output. They make your code easier to understand, and help type checkers detect any type mismatches, like passing a string instead of an integer.

The algoesup library also provides an extension for Jupyter notebooks, which you must load first.

%load_ext algoesup.magics\n
(Magics are special commands that can change the behaviour of running a code cell.) You can now turn on type checking as follows.
%pytype on\n

Output

pytype was activated

Words that start with % are special commands (\u2018magics\u2019) for IPython, the Python interpreter used by Jupyter notebooks. The %pytype command, provided by our library, activates Google\u2019s pytype type checker.

Once the type checker is activated, it checks each cell immediately after it\u2019s executed. In this way you can detect and fix errors as you write and run each code cell. Here\u2019s an example of what happens.

def double(x: int) -> int:\n    \"\"\"Return twice the value of x.\"\"\"\n    return x * 2\n\ndouble([4])\n

Output

[4, 4]

pytype found issues:

  • 5: Function double was called with the wrong arguments [wrong-arg-types]

The function is executed and produces an output because lists can also be \u2018multiplied\u2019 with an integer, but the type checker detects that line 5 should have passed integers, not lists of integers, to the function. Clicking on the error name in square brackets leads you to pytype\u2019s website, with more info.

When a type checker only processes one cell at a time, it is missing the wider context, like the previously defined functions. Therefore, pytype won\u2019t spot all type errors. However, some checking is better than no checking.

The type checker adds some seconds to the overall time to run each code cell. You may thus wish to initially turn off the type checking, with %pytype off, and only turn it on after all code is written and tested. You will have to run all cells of your notebook for the type checking to take place.

For a list of all the options for the %pytype command, see the library reference.

"},{"location":"writing/#formatting","title":"Formatting","text":"

Note

This subsection only applies to Deepnote.

Once you have written, tested and type checked all your code, you should format it so that it follows the Python community\u2019s code style. You will need to format each cell, as explained here.

If there\u2019s a block of code that you don\u2019t want the formatter to change, write # fmt: off on its own line before the block and write # fmt: on after the block, to temporarily switch off formatting for that block. This feature is especially useful for keeping the format of unit test tables, as shown in an earlier example.

The Deepnote formatter automatically enforces simple formatting conventions, like 4 spaces for indentation and 2 empty lines between functions, so you will see fewer warnings in the next stage.

"},{"location":"writing/#linting","title":"Linting","text":"

You should lint your code, which means to check it for style violations.

"},{"location":"writing/#code-style","title":"Code style","text":"

Our library supports ruff, the fastest Python linter. To turn it on, write the following after loading the algoesup.magics extension.

%ruff on\n

Output

ruff was activated

From now on, each cell is automatically linted after it\u2019s executed. Here\u2019s an example:

l = [1, 2, 3]\nif (not 5 in l) == True:\n    print(\"5 isn't in the list\")\n

Output

5 isn\u2019t in the list

ruff found issues:

  • 1: [E741] Ambiguous variable name: l
  • 2: [PLR2004] Magic value used in comparison, consider replacing 5 with a constant variable
  • 2: [E713] Test for membership should be not in. Suggested fix: Convert to not in
  • 2: [E712] Comparison to True should be cond is True or if cond:. Suggested fix: Replace with cond is True

Every message indicates the line of the problem.

  • The first message is trying to tell us that l can be misread for 1 (one).
  • The second message recommends using constants, like EXPECTED_VALUE, instead of literals like 5 that are meaningless to the reader.
  • The third message tells us that it\u2019s better to write 5 not in l.
  • The last message says that == True should be is True or simply omitted. The suggested fix is not appropriate for this if-statement: it should be if 5 not in l:.

As this code cell shows, ruff sometimes suggests how to fix the reported error, but you must consider whether the suggestion is appropriate.

If you don\u2019t understand an error message, like the first one, click on the error code in brackets, to get more information from ruff\u2019s website.

Like for type checking, linting one cell at a time means that the linter is unaware of the wider context of your code. For example, in notebooks, variables may be defined in one cell but used in a later cell. As the linter checks each cell separately, it would report an undefined variable in the later cell. We have disabled checks for undefined variables and other checks that would lead to irrelevant error messages in notebooks, which means that genuine undefined variables won\u2019t be flagged. But again, some linting is better than none.

If you get errors that you think are irrelevant, you can disable them with the --ignore option: see the library reference.

"},{"location":"writing/#language-subset","title":"Language subset","text":"

Our library also supports the allowed linter, created by ourselves. It checks whether your code only uses a certain subset of the Python language. This gives you some reassurance that your code will be understood by a wide audience.

By default, allowed checks against the Python subset used in our algorithms and data structures course. So, if you\u2019re an M269 student, to check that your essay is easily understood by your peers in terms of Python constructs, just add the following after loading the extension:

%allowed on\n

Output

allowed was activated

Henceforth, after a cell is executed, the allowed linter will list any constructs, modules or built-in types we haven\u2019t taught, like this:

from math import pi, sin\n\nprint(f\"\u03c0 is approximately {pi:.5f}.\")\n

Output

\u03c0 is approximately 3.14159.

allowed found issues:

  • 1: sin
  • 3: f-string

We haven\u2019t taught the math.sin() function nor f-strings, and allowed reports these.

Any line that ends with the comment # allowed is ignored. This is useful when you don\u2019t want the linter to flag a construct that you explain in your essay. For example, adding the comment after print(...) would not report the f-string. Note that the comment makes the tool skip the whole line: if it has several constructs that weren\u2019t taught, none of them is reported.

The allowed linter also includes the configuration for TM112, our introductory Computing course, in case you want to use even fewer constructs in your essay. To use that configuration, write %allowed on --config tm112.json. For a list of all the options for the %allowed command, see the library reference.

You can configure the linter with a JSON file that lists the allowed constructs. In Deepnote, rename the allowed.json JSON configuration in the Files section of your project, and adapt it to your course. See the allowed website for instructions.

"},{"location":"writing/#performance-analysis","title":"Performance analysis","text":"

Complexity analysis gives an indication of how the run-times will grow as the inputs grow, but it can\u2019t predict the exact run-times nor which algorithm is in practice fastest.

Our library helps measure and plot the run-times of one function on different kinds of input, or of different functions on the same inputs.

For example, let\u2019s suppose our essay is about sorting algorithms and we have implemented selection sort.

def selection_sort(values: list[int]) -> list[int]:\n    \"\"\"Return a copy of the values, in ascending order.\"\"\"\n    result = values[:]\n    for current in range(len(result) - 1):\n        # select the smallest element in result[current:] ...\n        smallest = current\n        for index in range(current + 1, len(result)):\n            if result[index] < result[smallest]:\n                smallest = index\n        # ... and swap it with the current element\n        result[current], result[smallest] = result[smallest], result[current]\n    return result\n

"},{"location":"writing/#generating-inputs","title":"Generating inputs","text":"

To measure the run-times of sorting algorithms on increasingly large lists, we must implement functions that generate such lists. For example, we can write a function that generates lists that are already in ascending order, which is a best case for many sorting algorithms, and a function that generates lists that are in descending order, which is a worst case for some sorting algorithms.

The library expects such input-generating functions to take a non-negative integer n, and to produce a tuple of input values, with total size n. Why a tuple? Although our sorting algorithm takes a single input (a list of integers), many algorithms take more than one input. Thus the input-generating functions must generate a tuple of inputs, in the same order as expected by the algorithm.

def ascending(n: int) -> tuple[list[int]]:\n    \"\"\"Return a list of n integers in ascending order.\"\"\"\n    return (list(range(1, n + 1)),)  # trailing comma to make it a tuple\n\ndef descending(n: int) -> tuple[list[int]]:\n    \"\"\"Return a list of n integers in descending order.\"\"\"\n    return (list(range(n, 0, -1)),)\n
We should of course test these functions, to make sure they produce the expected lists, but we will skip that in this explanation because we\u2019re focusing on how to measure run-times.

"},{"location":"writing/#comparing-cases","title":"Comparing cases","text":"

To measure the run-times of a function f on best, average and worst case inputs, use library function time_cases(f, [case1, case2, ...], s, d). The second argument can be a list (or tuple) of up to 6 input-generating functions. The time_cases function works as follows.

  1. Call case1(s) to generate inputs of size s for f.
  2. Run function f on the generated inputs and measure its run-time.
  3. Do the two previous steps with each of the functions case2, ....
  4. Set s to double its value and go back to step 1.

The whole process stops when s has been doubled d times. If d is zero, the run-times are only measured for size s.

Here\u2019s how we could measure the run-times for selection sort on ascending and descending lists.

from algoesup import time_cases\n\ntime_cases(selection_sort, [ascending, descending], start=100, double=4)\n

Output

Run-times for selection_sort\n\nInput size       ascending      descending\n       100           168.2           173.2 \u00b5s\n       200           643.2           660.6 \u00b5s\n       400          2716.7          2817.9 \u00b5s\n       800         11072.4         11407.3 \u00b5s\n      1600         44285.3         45512.7 \u00b5s\n

Running selection sort on lists from 100 to 1600 integers takes about 170 microseconds to 45 milliseconds. To measure precisely such small time spans, function f (here, selection_sort) is called multiple times on the same input, within a loop, and the total time is divided by the number of iterations, to obtain a better estimate of the time taken by a single call to f. The whole process is repeated 3 times, because the run-times will vary due to other processes running on the computer. The lowest of the 3 run-times is reported.

Because function f is called multiple times, it is very important that f does not modify its inputs. For example, if selection_sort sorted the list in-place, instead of returning a new list, then the first call would put the numbers in ascending order, and the subsequent calls would just try to sort an already sorted list, swapping no numbers. We would obtain almost exact same times for ascending and descending input lists, instead of always larger times for descending lists, as shown above.

When executing a code like the previous one, be patient while waiting for the results. Even though each call may just take a few milliseconds or less, the code cell will take several seconds or even minutes to execute, because the function is called many times to make the measurements more robust.

"},{"location":"writing/#comparing-functions","title":"Comparing functions","text":"

Our library also allows you to compare different algorithms for the same input case. For that, use time_functions([f1, f2, ...], case, s, d), which does the following: 1. Call case(s) to generate inputs of total size s. 2. Call each function f1, f2, etc. on the generated inputs and measure their run-times. 3. Double the value of s and go to step 1, unless s has been doubled d times.

The run-times are measured as for time_cases: take the best of 3 run-times, each obtained by calling the function within a loop and dividing the total time by the number of iterations.

Here\u2019s a comparison of the built-in sorted function against selection sort, on descending lists.

from algoesup import time_functions\n\ntime_functions([selection_sort, sorted], descending, start=100, double=4)\n

Output

Inputs generated by descending\n\nInput size  selection_sort          sorted\n       100           172.8             0.5 \u00b5s\n       200           660.7             0.8 \u00b5s\n       400          2795.7             1.6 \u00b5s\n       800         11534.0             3.1 \u00b5s\n      1600         45470.0             5.9 \u00b5s\n

As expected, the built-in sorting implementation is much, much faster.

"},{"location":"writing/#charting-run-times","title":"Charting run-times","text":"

If you add argument chart=True to time_cases or time_functions, then you will see a line chart of the run-times, in addition to the exact run-times. If you only want to see the chart, then add arguments text=False, chart=True.

time_cases(sorted, [ascending, descending], 100, 4, text=False, chart=True)\n

Output

time_functions([sorted, selection_sort], ascending, 100, 4, chart=True)\n

Output

Inputs generated by ascending\n\nInput size          sorted  selection_sort\n       100           482.1        168025.5 ns\n       200           808.3        646093.8 ns\n       400          1496.9       2720369.2 ns\n       800          2850.4      11090135.4 ns\n      1600          5553.0      44372758.4 ns\n

The 1e7 above the y-axis means that the run-times must be multiplied by 10\u2077, i.e. 10 million.

Note that when calling time_functions([selection_sort, sorted], ...) the run-times were reported in microseconds, but when calling time_functions([sorted, selection_sort], ...) they were in nanoseconds. The reason is that the library chooses the time unit based on the first run-time measured. If there\u2019s a big time difference between the fastest and slowest cases or algorithms, you may wish for the first function in the list to be the slowest one, to report it with small values in a \u2018large\u2019 time unit, instead of very large values in a \u2018small\u2019 time unit. So, in time_functions([f1, f2, ...], case, ...) the slowest function should be f1, and in time_cases(f, [case1, case2, ...], ...) the worst case should be case1.

"},{"location":"writing/#interpreting-run-times","title":"Interpreting run-times","text":"

If, as the input size doubles, the run-times\u2026

  • \u2026remain the same, then the function has constant complexity.
  • \u2026also double, then the function has linear complexity.
  • \u2026quadruple, then the function has quadratic complexity.
  • \u2026increase by a fixed amount, then the function has logarithmic complexity.

Looking at the run-times reported in the previous subsections, we can see that sorted is linear because the run-times about double when the input size doubles, whereas selection sort is quadratic because the run-times increase about 4-fold when the input size doubles.

Remember that run-times vary every time you execute a cell because the computer is executing other processes. This may lead to the odd behaviour here and there. For example, we have noted that sorted is occasionally faster for descending lists, which is counter-intuitive because it does have to reverse them.

If you can\u2019t see any trend in the run-times, or they aren\u2019t what you expect, one possible cause is that the input sizes are too small. Increase start and run again the code cell.

If after increasing the start size several times you still don\u2019t get the run-times you expect from your complexity analysis, then there might be other explanations:

  • your complexity analysis is wrong
  • your implemented algorithm modifies its input
  • your input-generating functions are not generating best or worst cases.

For an example of the latter, see the Jewels and Stones essay.

"},{"location":"writing/#final-check","title":"Final check","text":"

Whether it\u2019s your essay\u2019s first draft or final version, before you share it with others, you should restart the kernel and run all cells, so that you have a \u2018clean\u2019 version. Then, after a break, read your essay with \u2018fresh eyes\u2019 from start to end and fix any typos or missing explanations you find.

Look at the table of contents of your notebook and check that your section headings are at the right level.

Info

In Deepnote, the table of contents is on the left sidebar; in Colab, you must click the bullet list icon in the left sidebar.

Finally, let others comment on your essay and help you produce a better version. See our feedback guide for details.

"},{"location":"writing/#further-reading","title":"Further reading","text":"

If you\u2019re interested and have the time, here are further details on some of the above.

  • Strunk and White\u2019s The Elements of Style is a classic. The examples are dated but the advice is good.
  • The websites of allowed, pytype and ruff.
  • A summary of Python\u2019s type hints provided by the mypy project (another type checker).
  • The Python code style and docstring conventions.
  • The formatting style enforced by black, which we suspect is the formatter used by Deepnote. Deepnote ignores the # fmt: skip directive to not format a single line.
"},{"location":"essays/example-1-to-n/","title":"Sum of 1 to n","text":"

This short notebook provides a very simple example of an algorithmic essay and shows the support that our library provides.

The problem to be solved is to compute the sum of the integers, 1, 2, ..., n, for some given n. Two solutions will be presented, both only using basic Python.

The first thing to do is to import the necessary functions from the algorithmic essays support library. The functions will be explained later, when they're used.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[1]: Copied!
# import functions to test the code and to measure the execution time\nfrom algoesup import test, time_functions_int\n
# import functions to test the code and to measure the execution time from algoesup import test, time_functions_int

The library also includes two commands to turn on the tools that will check the code as it's executed. There won't be any messages in this notebook because the code passes all checks.

In\u00a0[4]: Copied!
%load_ext algoesup.magics\n# check the code for style violations\n%ruff on\n# check that only the subset of Python taught in our introductory course TM112 is used\n%allowed on --config tm112.json\n
%load_ext algoesup.magics # check the code for style violations %ruff on # check that only the subset of Python taught in our introductory course TM112 is used %allowed on --config tm112.json
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
tests = [\n    # case            n,    sum\n    [\"no integers\",  -1,      0],\n    [\"one integer\",   1,      1],\n    [\"n is even\",     4,     10],  # 1 + 2 + 3 + 4 = 10\n    [\"n is odd\",      7,     28],  # 1 + 2 + 3 + 4 + 5 + 6 + 7 = 28\n]\n
tests = [ # case n, sum [\"no integers\", -1, 0], [\"one integer\", 1, 1], [\"n is even\", 4, 10], # 1 + 2 + 3 + 4 = 10 [\"n is odd\", 7, 28], # 1 + 2 + 3 + 4 + 5 + 6 + 7 = 28 ] In\u00a0[4]: Copied!
def sum_with_loop(n):\n    \"\"\"Return 1 + 2 + ... + n, using a loop.\"\"\"\n    total = 0\n    for number in range(1, n + 1):\n        total = total + number\n    return total\n
def sum_with_loop(n): \"\"\"Return 1 + 2 + ... + n, using a loop.\"\"\" total = 0 for number in range(1, n + 1): total = total + number return total

The next step is to check the code with the imported test function, which takes two arguments: the function to be tested and a test table. Every test case in the test table is checked and if the actual output isn't the expected output, the test is reported as having failed.

In\u00a0[5]: Copied!
test(sum_with_loop, tests)\n
test(sum_with_loop, tests)
Testing sum_with_loop...\nTests finished: 4 passed, 0 failed.\n

Algorithm 1 passes all tests. Since the algorithm does exactly what the problem asks for, the only test that could fail is for n < 1. However, in that case the loop is skipped and the returned total is zero, as desired.

In\u00a0[6]: Copied!
def sum_with_formula(n):\n    \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\"\n    return n * (n + 1) // 2\n
def sum_with_formula(n): \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\" return n * (n + 1) // 2

This function is solving the same problem as the previous one, so it must be tested with the exact same tests.

In\u00a0[7]: Copied!
test(sum_with_formula, tests)\n
test(sum_with_formula, tests)
Testing sum_with_formula...\nTests finished: 4 passed, 0 failed.\n

The code passes the tests, and yet the algorithm is wrong! The formula is only meant for n \u2265 1. It just happens that if n = 0 or n = -1 then n \u00b7 (n+1) / 2 = 0, as desired, but for n < -1 the result is non-zero. An additional test helps confirm this.

In\u00a0[8]: Copied!
tests.append([\"n < -1\", -5, 0])  # testing with n = -5\n\ntest(sum_with_formula, tests)\n
tests.append([\"n < -1\", -5, 0]) # testing with n = -5 test(sum_with_formula, tests)
Testing sum_with_formula...\nn < -1 FAILED: 10 instead of 0\nTests finished: 4 passed, 1 failed.\n

The result is -5 \u00b7 (\u20135 + 1) / 2 = -5 \u00b7 -4 / 2 = 10 instead of zero. The algorithm must be modified.

Algorithm 2 (corrected): If n < 1, return 0, otherwise return n \u00b7 (n+1) / 2.

In\u00a0[9]: Copied!
def sum_with_formula(n):\n    \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\"\n    if n < 1:\n        return 0\n    else:\n        return n * (n + 1) // 2\n\ntest(sum_with_formula, tests)\n
def sum_with_formula(n): \"\"\"Return 1 + 2 + ... + n, using a formula.\"\"\" if n < 1: return 0 else: return n * (n + 1) // 2 test(sum_with_formula, tests)
Testing sum_with_formula...\nTests finished: 5 passed, 0 failed.\n

Now the additional test also passes.

As a new test was added, the first algorithm must be tested again.

In\u00a0[10]: Copied!
test(sum_with_loop, tests)\n
test(sum_with_loop, tests)
Testing sum_with_loop...\nTests finished: 5 passed, 0 failed.\n
In\u00a0[11]: Copied!
time_functions_int([sum_with_loop, sum_with_formula])\n
time_functions_int([sum_with_loop, sum_with_formula])
Inputs generated by int_value\n\nInput value   sum_with_loop sum_with_formul \n          1           151.1            87.3 ns\n          2           178.9            87.5 ns\n          4           209.2            87.6 ns\n          8           278.3            88.1 ns\n         16           401.5            94.6 ns\n         32           701.0            99.0 ns\n         64          1359.7            99.2 ns\n        128          2610.7            99.2 ns\n        256          5144.2           102.8 ns\n        512         11148.8           102.8 ns\n       1024         23216.0           102.7 ns

Computing the sum with the formula always takes about the same time, around 90 to 100 nanoseconds (that's what the 'ns' means). However, the run-times for the sum with the loop grow as n gets larger. If the value of n doubles, the number of iterations also doubles, so the run-time also roughly doubles, as can be seen above the chart. As n increases, the iterative sum gets slower and slower. At n = 1024, using a loop is about 200 times slower than using the formula!

"},{"location":"essays/example-1-to-n/#sum-of-1-to-n","title":"Sum of 1 to n\u00b6","text":"

Michel Wermelinger, 22 January 2024, last updated 16 March 2024

"},{"location":"essays/example-1-to-n/#tests","title":"Tests\u00b6","text":"

Before thinking of a solution, it's best to write down some tests, as they help our understanding of the problem. Each test is the input integer n and the corresponding expected integer output 1 + 2 + ... + n.

The tests should include edge cases: inputs that are extreme values or that lead to extreme outputs. For this problem, edge cases are when there are no integers to add up (n < 1) or when there's only one (n = 1).

Tests should also include normal cases. When the input is an integer, tests typically include odd and even values.

The test function that was imported in the previous cell expects the tests to be written as a table, e.g. as a list of lists. The table must have one row per test case. Each row has a column with a string describing the case, one column per input, and a final column with the expected output.

"},{"location":"essays/example-1-to-n/#solutions","title":"Solutions\u00b6","text":"

The next step is to think of possible algorithms that solve the problem.

"},{"location":"essays/example-1-to-n/#using-a-loop","title":"Using a loop\u00b6","text":"

The probably most obvious algorithm is to explicitly calculate the sum, by iterating over the integers from 1 to n.

Algorithm 1: Set the total to zero. For each value from 1 to n, add the value to the total. Return the total.

The algorithm is implemented as follows.

"},{"location":"essays/example-1-to-n/#using-a-formula","title":"Using a formula\u00b6","text":"

It's possible to compute the sum directly, using a well-known formula, taught in many introductory math courses:

1 + 2 + 3 + ... + (n - 2) + (n - 1) + n = (1 + n) + (2 + n - 1) + (3 + n - 2) + ... = (n + 1) \u00b7 n / 2.

The formula is based on 'pairing up' the first with the last number, the second with the next to last, and so on. Each pair adds up to n + 1, and the number of pairs is half of n. The algorithm is simply the formula:

Algorithm 2: Return n \u00b7 (n+1) / 2.

"},{"location":"essays/example-1-to-n/#performance","title":"Performance\u00b6","text":"

The final, optional, step is to compare the performance of both solutions to see which is fastest. For this problem, the second algorithm is expected to be faster that the first one, as it computes the sum directly, without iterating over n integers.

The time_functions_int function, which was imported in the first cell, takes a list of functions that have a single integer n as input, and measures their run-times for n = 1, 2, 4, 8, ..., 1024. Doubling the input allows to see any trends in how run-times grow with just eleven executions of the two functions. Running the functions with n = 1, 2, 3, 4, 5, ..., 1024 would take much longer and not produce any additional insight.

"},{"location":"essays/example-1-to-n/#concluding-remarks","title":"Concluding remarks\u00b6","text":"

This essay presented two solutions to a simple problem: computing the sum 1 + ... + n, for any integer n. The sum should be zero if n < 1.

The first solution computes the sum iteratively, while the second computes it directly, with a formula. The second is always faster, even for n = 1. Moreover, as n increases, the first approach becomes slower whereas the second always takes about the same time.

This essay illustrates the need for thinking about different algorithms for the same problem, as one approach may be much more efficient than others.

The essay also showed that passing all tests doesn't mean that the code is correct. As the various algorithms are developed, new tests may have to be added.

"},{"location":"essays/example-jewels/","title":"Jewels and Stones","text":"

In this example algorithmic essay I will tackle LeetCode problem Jewels and Stones. The inputs are two strings jewels and stones, where each character represents a type of stone, e.g. A for amethyst, D for diamond, b for basalt and ? for an unknown stone. The jewels string contains the characters that represent jewels, without repetition. The desired output is the number of stones that are jewels. For example, if jewels is \"AD\" and stones is \"b?AbDAb\" then the output is 3 because three of the stones are jewels (two amethysts and one diamond). To be clear, which stones and jewels the characters represent doesn't really matter: the output is simply how many characters of the second string occur in the first string.

This essay will illustrate a space-time trade-off: by using an additional data structure, we get a faster algorithm. I assume you're familiar with sets, bags (multisets) and basic complexity analysis. This essay uses Big-Theta notation \u0398(...), but if you're not familiar with it, you can read it as Big-Oh notation O(...) for the purposes of the analyses done below. Sorting, binary search, and log-linear and logarithmic complexities are mentioned in Section 2.3, but you can skip it, as it won't affect the understanding of the rest of the essay.

Before starting to solve the problem, I import the necessary functions from algoesup, the algorithmic essays support library, and turn on the tools that will check my code for style and type violations (with Ruff and pytype), and for constructs I didn't teach (with allowed).

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[1]: Copied!
from algoesup import test, time_cases, time_functions\n
from algoesup import test, time_cases, time_functions In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%pytype on\n%allowed on\n
%load_ext algoesup.magics %ruff on %pytype on %allowed on
ruff was activated\npytype was activated\nallowed was activated\n
In\u00a0[3]: Copied!
# don't allow code formatters to change this table\n# fmt: off\ntests = [\n    # case                  jewels  stones  output\n    (\"no jewels\",           \"\",     \"abc\",      0),\n    (\"no stones\",           \"DA\",   \"\",         0),\n    (\"both empty\",          \"\",     \"\",         0),\n    (\"none is a jewel\",     \"DA\",   \"dada?\",    0),\n    (\"equal strings\",       \"DA\",   \"DA\",       2),\n    (\"all same jewel\",      \"DA\",   \"AAAA\",     4),\n    (\"some jewels missing\", \"DARJ\", \"RRD\",      3),\n    (\"some not jewels\",     \"RAD\",  \"bRADAb\",   4),\n]\n# allow code formatters to change the following cells\n# fmt: on\n
# don't allow code formatters to change this table # fmt: off tests = [ # case jewels stones output (\"no jewels\", \"\", \"abc\", 0), (\"no stones\", \"DA\", \"\", 0), (\"both empty\", \"\", \"\", 0), (\"none is a jewel\", \"DA\", \"dada?\", 0), (\"equal strings\", \"DA\", \"DA\", 2), (\"all same jewel\", \"DA\", \"AAAA\", 4), (\"some jewels missing\", \"DARJ\", \"RRD\", 3), (\"some not jewels\", \"RAD\", \"bRADAb\", 4), ] # allow code formatters to change the following cells # fmt: on In\u00a0[4]: Copied!
def count_in_string(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 2: for each stone, check if it's a jewel.\n    Complexity: best \u0398(len(stones)); worst \u0398(len(stones) * len(jewels))\n    \"\"\"\n    counter = 0\n    for stone in stones:\n        if stone in jewels:\n            counter = counter + 1\n    return counter\n\n\n# test(function, test_table) runs `function` on each test in `test_table`\n# and prints the tests that fail (actual output isn't the expected output).\ntest(count_in_string, tests)\n
def count_in_string(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 2: for each stone, check if it's a jewel. Complexity: best \u0398(len(stones)); worst \u0398(len(stones) * len(jewels)) \"\"\" counter = 0 for stone in stones: if stone in jewels: counter = counter + 1 return counter # test(function, test_table) runs `function` on each test in `test_table` # and prints the tests that fail (actual output isn't the expected output). test(count_in_string, tests)
Testing count_in_string...\nTests finished: 8 passed, 0 failed.\n

The implementation of Algorithm 2 passes all tests. Next, Algorithm 3 can be implemented with Python's Counter class.

In\u00a0[5]: Copied!
from collections import Counter\n\n\ndef count_in_bag(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 3: put stones in a bag; add the frequencies of those that are jewels.\n    Complexity: \u0398(len(stones) + len(jewels))\n    \"\"\"\n    bag = Counter(stones)\n    counter = 0\n    for jewel in jewels:\n        counter = counter + bag[jewel]\n    return counter\n\n\ntest(count_in_bag, tests)\n
from collections import Counter def count_in_bag(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 3: put stones in a bag; add the frequencies of those that are jewels. Complexity: \u0398(len(stones) + len(jewels)) \"\"\" bag = Counter(stones) counter = 0 for jewel in jewels: counter = counter + bag[jewel] return counter test(count_in_bag, tests)
Testing count_in_bag...\nTests finished: 8 passed, 0 failed.\n

Finally, Algorithm 4 can be implemented with Python's built-in set type.

In\u00a0[6]: Copied!
def count_in_set(jewels: str, stones: str) -> int:\n    \"\"\"Return the number of characters in `stones` that are in `jewels`.\n\n    Use Algorithm 4: put jewels in a set; count the stones that are in the set.\n    Complexity: \u0398(len(jewels) + len(stones))\n    \"\"\"\n    jewel_set = set(jewels)\n    counter = 0\n    for stone in stones:\n        if stone in jewel_set:\n            counter = counter + 1\n    return counter\n\n\ntest(count_in_set, tests)\n
def count_in_set(jewels: str, stones: str) -> int: \"\"\"Return the number of characters in `stones` that are in `jewels`. Use Algorithm 4: put jewels in a set; count the stones that are in the set. Complexity: \u0398(len(jewels) + len(stones)) \"\"\" jewel_set = set(jewels) counter = 0 for stone in stones: if stone in jewel_set: counter = counter + 1 return counter test(count_in_set, tests)
Testing count_in_set...\nTests finished: 8 passed, 0 failed.\n
In\u00a0[7]: Copied!
def best_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a best-case input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - all stones are jewels[0]\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)\n    # create a string with s spaces (Unicode 32), the first character in jewels\n    stones = \" \" * s\n    return (jewels, stones)\n
def best_case(size: int) -> tuple[str, str]: \"\"\"Return a best-case input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - all stones are jewels[0] \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # create a string with s spaces (Unicode 32), the first character in jewels stones = \" \" * s return (jewels, stones)

allowed found issues:

  • 14: chr()

Note that the allowed tool reports that I haven't taught my students the chr function. That's why I explained it before presenting the code.

Moving on, I write some tests, to check that best_case is generating the right strings.

In\u00a0[8]: Copied!
# fmt: off\ntest(best_case, [\n    # case,           size, (jewels, stones)\n    (\"empty strings\", 0,    (\"\",     \"\")),\n    (\"no jewels\",     1,    (\"\",     \" \")),\n    (\"odd size\",      5,    (' !',   \"   \")),\n])\n# fmt: on\n
# fmt: off test(best_case, [ # case, size, (jewels, stones) (\"empty strings\", 0, (\"\", \"\")), (\"no jewels\", 1, (\"\", \" \")), (\"odd size\", 5, (' !', \" \")), ]) # fmt: on
Testing best_case...\nTests finished: 3 passed, 0 failed.\n

To generate a worst case, I have two options: no stone is a jewel or all stones are the last jewel (Unicode 31 + j). I choose the first one. (Spoiler alert: I made the wrong choice. Can you see why?)

In\u00a0[9]: Copied!
def worst_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a worst-case input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - no stone is a jewel\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)  # allowed\n    # create a string with s different characters (Unicode 32+j onwards)\n    stones = \"\"\n    for code in range(32 + j, 32 + j + s):\n        stones = stones + chr(code)  # allowed\n    return (jewels, stones)\n\n\n# fmt: off\ntest(worst_case, [\n    # case,           size, (jewels,    stones)\n    (\"empty strings\", 0,    (\"\",        \"\")),\n    (\"no jewels\",     1,    (\"\",        \" \")),\n    (\"odd size\",      11,   (' !\"#$',   \"%&'()*\")),  # 5 jewels, 6 stones\n])\n# fmt: on\n
def worst_case(size: int) -> tuple[str, str]: \"\"\"Return a worst-case input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - no stone is a jewel \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # allowed # create a string with s different characters (Unicode 32+j onwards) stones = \"\" for code in range(32 + j, 32 + j + s): stones = stones + chr(code) # allowed return (jewels, stones) # fmt: off test(worst_case, [ # case, size, (jewels, stones) (\"empty strings\", 0, (\"\", \"\")), (\"no jewels\", 1, (\"\", \" \")), (\"odd size\", 11, (' !\"#$', \"%&'()*\")), # 5 jewels, 6 stones ]) # fmt: on
Testing worst_case...\nTests finished: 3 passed, 0 failed.\n

The # allowed comment in lines 14 and 18 tells the tool that chr() is allowed, because it was explained, and hence should not be reported.

Finally I generate random typical cases in which not all stones are jewels and not all jewels occur in the stones.

In\u00a0[10]: Copied!
import random\n\n\ndef normal_case(size: int) -> tuple[str, str]:\n    \"\"\"Return a typical input (pair of strings) of the given size.\n\n    Output: (jewels, stones) with\n    - len(jewels) + len(stones) == size, with each string half the size\n    - jewels has no duplicate characters\n    - stones has some jewels and some non-jewels, if len(jewels) > 1\n    - not all jewels occur in stones, if len(jewels) > 1\n    \"\"\"\n    j = size // 2\n    s = size - j\n    # create a string with j different characters (Unicode 32 onwards)\n    jewels = \"\"\n    for code in range(32, 32 + j):\n        jewels = jewels + chr(code)  # allowed\n    # create a string with s random characters from Unicode 33 to 33 + 2j\n    stones = \"\"\n    for _ in range(s):\n        stones = stones + chr(random.randint(33, 33 + 2 * j))  # allowed\n    return (jewels, stones)\n\n\n# Can't test with a test table, because the output is random.\n(jewels, stones) = normal_case(20)\ncounter = count_in_string(jewels, stones)\nprint(\"Some stones are jewels:\", counter > 0)\nprint(\"Some stones aren't jewels:\", counter < len(stones))\n
import random def normal_case(size: int) -> tuple[str, str]: \"\"\"Return a typical input (pair of strings) of the given size. Output: (jewels, stones) with - len(jewels) + len(stones) == size, with each string half the size - jewels has no duplicate characters - stones has some jewels and some non-jewels, if len(jewels) > 1 - not all jewels occur in stones, if len(jewels) > 1 \"\"\" j = size // 2 s = size - j # create a string with j different characters (Unicode 32 onwards) jewels = \"\" for code in range(32, 32 + j): jewels = jewels + chr(code) # allowed # create a string with s random characters from Unicode 33 to 33 + 2j stones = \"\" for _ in range(s): stones = stones + chr(random.randint(33, 33 + 2 * j)) # allowed return (jewels, stones) # Can't test with a test table, because the output is random. (jewels, stones) = normal_case(20) counter = count_in_string(jewels, stones) print(\"Some stones are jewels:\", counter > 0) print(\"Some stones aren't jewels:\", counter < len(stones))
Some stones are jewels: True\nSome stones aren't jewels: True\n

The loop variable's name is just an underscore (line 21) to avoid a warning that the loop variable isn't used in the loop's body.

In\u00a0[11]: Copied!
cases = [best_case, normal_case, worst_case]\ntime_cases(count_in_string, cases, start=10, double=4)\n
cases = [best_case, normal_case, worst_case] time_cases(count_in_string, cases, start=10, double=4)
Run-times for count_in_string\n\nInput size       best_case     normal_case      worst_case \n        10           202.2           161.6           155.0 ns\n        20           332.7           346.1           296.7 ns\n        40           589.1           625.3           420.5 ns\n        80          1084.0          1085.0           768.1 ns\n       160          2080.8          2242.0          1536.6 ns

The results are baffling: the worst case is the fastest!

The linear search is done with the in operator, which is much faster than implementing the search in Python. Due to the very small input sizes, with at most 80 jewels and 80 stones, it doesn't really matter whether the search stops at the first jewel or goes through all the jewels.

Since the search takes about the same time in the best and worst cases, what makes the difference to the overall run-time is how often the counter is updated. In the best case (all stones are the first jewel) the counter is always incremented, whereas in the worst case (no stone is a jewel) the counter is never incremented. Hence, the worst case is faster than the best case.

The real issue is that I'm not generating worst cases. I'm not constructing inputs that make Algorithm 2 do the most work. A worst case should make the linear search go through all the jewels and increment the counter for each one. The worst case is actually when all stones are the last jewel.

I should go back, modify the worst_case function so that stones = chr(31 + j) * s, and run again the previous cell. Instead, I will run count_in_string on larger inputs. That will avoid the search taking about the same time whether a stone is the first jewel or not a jewel. This time I also ask for a graphical output.

In\u00a0[12]: Copied!
time_cases(count_in_string, cases, start=1000, double=4, chart=True)\n
time_cases(count_in_string, cases, start=1000, double=4, chart=True)
Run-times for count_in_string\n\nInput size       best_case     normal_case      worst_case \n      1000            14.5            34.8            33.1 \u00b5s\n      2000            30.2           109.4           113.9 \u00b5s\n      4000            61.6           327.2           378.0 \u00b5s\n      8000           123.6          1098.9          1339.7 \u00b5s\n     16000           248.4          3954.2          5003.5 \u00b5s

The best-case run-times double as the input size doubles, confirming the linear complexity. The worst-case run-times about quadruple as the input size doubles, confirming the quadratic complexity.

In\u00a0[13]: Copied!
algorithms = [count_in_string, count_in_set, count_in_bag]\ntime_functions(algorithms, normal_case, 1000, 4, chart=True)\n
algorithms = [count_in_string, count_in_set, count_in_bag] time_functions(algorithms, normal_case, 1000, 4, chart=True)
Inputs generated by normal_case\n\nInput size count_in_string    count_in_set    count_in_bag \n      1000            34.4            36.4            65.6 \u00b5s\n      2000           111.3            88.1           147.2 \u00b5s\n      4000           342.8           189.3           306.0 \u00b5s\n      8000          1115.9           406.8           641.9 \u00b5s\n     16000          4015.2           849.7          1302.0 \u00b5s

As expected, using a set or bag pays off as the input size increases: as there are more jewels and stones, the linear search takes longer, but looking up a stone in a set of jewels or a jewel in a bag of stones takes constant time. However, for small inputs, a linear search is fast and hence the extra time to create the additional data structure doesn't pay off.

Using a set of jewels is about twice as fast as using a bag of stones, maybe because set is built-in and implemented in C, a very fast language, whereas Counter is part of the standard library implemented in Python, which is much slower. This example shows that algorithms with the same complexity can perform differently in practice, because the complexity tells us how the run-times grow, but not how fast or slow they are.

"},{"location":"essays/example-jewels/#jewels-and-stones","title":"Jewels and Stones\u00b6","text":"

Michel Wermelinger, 23 January 2024, last updated 13 March 2024

"},{"location":"essays/example-jewels/#1-tests","title":"1 Tests\u00b6","text":"

Before thinking of any solution, I write down some tests. Each test is two input strings and the expected integer output. I need to think of edge cases: inputs that are extreme values or that lead to extreme outputs.

For this problem, one edge case is that none of the stones is a jewel, which can happen in several ways: there are no stones; there are no jewels; there are stones and jewels but no character in stones occurs in jewels. Note that the problem statement doesn't prevent the strings to be empty, so we must assume they may be.

Another edge case is that all stones are jewels, which again can happen in several ways: the two input strings are the same; all stones are the same jewel; each stone is a jewel but not all jewels are among the stones.

Besides the edge cases I must also consider normal cases, where the stones include jewels and non-jewels and where the stones don't appear in the same order as the jewels.

I write the tests succinctly as a table, with one row per test, and one column with a string describing the test, one column per input, and a final column with the expected output. Later, I will use a function from algoesup to run all the tests and report those where the actual outputs differ from the expected ones.

"},{"location":"essays/example-jewels/#2-algorithms","title":"2 Algorithms\u00b6","text":"

Next I think of possible algorithms and their complexity, to decide which ones are worth implementing.

"},{"location":"essays/example-jewels/#21-without-additional-data","title":"2.1 Without additional data\u00b6","text":"

I first attempt to solve the problem directly on the input strings. One approach that comes to mind is to count how often each jewel occurs among the stones.

Algorithm 1: Set a counter to zero. For each jewel, iterate over the stones. If a stone is the same as the current jewel, increment the counter. After going through all jewels, return the counter.

If j is the number of jewels and s the number of stones, then this algorithm always has complexity j \u00d7 \u0398(s) = \u0398(j \u00d7 s) because it does at most two constant-time operations, checking equality and incrementing the counter, for each jewel and stone.

The first approach searches each jewel among the stones. The symmetric approach is to search each stone among the jewels:

Algorithm 2: Set a counter to zero. For each stone, do a linear search for it among the jewels. If it is found, increment the counter. After going through all stones, return the counter.

In the best case, each stone is the first jewel, and the search takes constant time. The best-case complexity is s \u00d7 \u0398(1) = \u0398(s). In the worst case, the search goes through all the jewels, because the stone isn't a jewel or is the last jewel. The worst-case complexity is s \u00d7 \u0398(j) = \u0398(s \u00d7 j).

"},{"location":"essays/example-jewels/#22-with-additional-data","title":"2.2 With additional data\u00b6","text":"

My next attempts consider pre-processing the inputs to make the searches faster.

Algorithm 1 goes through the stones j times, each time counting how often a jewel occurs. It's more efficient to go through the stones only once, counting how often each stone occurs, and then add the counts of those that are jewels. We need to know the frequency of each stone and the natural data type for that is the bag (or multiset).

Algorithm 3: Put all stones in a bag. Initialise a counter to zero. For each jewel, add to the counter the frequency of that jewel in the bag. Return the value of the counter.

If the bag type is implemented efficiently, both adding an item and obtaining its frequency take constant time. The complexity of Algorithm 3 is thus s \u00d7 \u0398(1) + j \u00d7 \u0398(1) = \u0398(s + j).

Algorithm 2 checks if each stone is a jewel. Is there an abstract data type (ADT) that provides such an operation? Yes, the set ADT allows checking if an item is a member of a set, which leads to...

Algorithm 4: Put all jewels in a set. Initialise a counter to zero. For each stone, if it is a member of the set, increment the counter. Return the value of the counter.

If the set ADT is implemented efficiently, both adding an item and checking membership take constant time. The complexity of Algorithm 4 is thus j \u00d7 \u0398(1) + s \u00d7 \u0398(1) = \u0398(j + s).

"},{"location":"essays/example-jewels/#23-with-sorting","title":"2.3 With sorting\u00b6","text":"

The order of jewels and stones in the input strings doesn't affect the output (the number of stones that are jewels). I can thus sort the jewels, the stones, or both, to use logarithmic binary search instead of linear search.

However, sorting takes linear time in the best case and log-linear or quadratic time in the worst case. I already have algorithms that are linear in the total size of the input, so sorting wouldn't be more efficient. I therefore do not further pursue this approach.

"},{"location":"essays/example-jewels/#24-summary","title":"2.4 Summary\u00b6","text":"

Algorithms 1 and 2 don't use additional memory, but have complexity \u0398(j \u00d7 s), while Algorithms 3 and 4 have better complexity \u0398(j + s), but have the run-time and memory overhead of an additional data structure. In practice, Algorithms 3 and 4 might be slower than Algorithms 1 and 2, so it's best to implement all of them.

However, Algorithm 1 always takes \u0398(j \u00d7 s) whereas Algorithm 2 only does so in the worst case. I therefore won't implement Algorithm 1.

Algorithm 3 creates a set with j jewels, whereas Algorithm 4 creates a bag with s stones. Typically, we expect inputs to have more stones than jewels (j < s), so Algorithm 3 is likely to be faster. Nevertheless, I'll implement Algorithm 4 too, for comparison.

"},{"location":"essays/example-jewels/#3-code","title":"3 Code\u00b6","text":"

The next function implements Algorithm 2, using Python's in operator to do the linear search.

"},{"location":"essays/example-jewels/#4-performance","title":"4 Performance\u00b6","text":"

Finally, I will measure the run-times of the above functions with the algoesup library. The library expects us to define functions that construct inputs for a given total size.

"},{"location":"essays/example-jewels/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

This problem has two inputs, so I must decide how to divide the total size among both strings. I'll divide it equally: s = j. (For odd n, s = j + 1.) In that way, the expected run-times will be linear or quadratic in s: \u0398(s + j) = \u0398(2s) = \u0398(s) or \u0398(s \u00d7 j) = \u0398(s\u00b2). This makes it easy to check them empirically.

I write a function to generate a best-case input: each stone is the first jewel. To generate the input strings, I use Python's built-in chr function to obtain a character, given its Unicode number. The first 31 characters are mostly unprintable control characters, so I start at code 32 (space character).

"},{"location":"essays/example-jewels/#42-best-normal-and-worst-run-times","title":"4.2 Best, normal and worst run-times\u00b6","text":"

Algorithms 3 and 4 always have the same complexity, but not so for Algorithm 2. I can measure its run-times on best, typical and worst cases, using the library's time_cases. I start with an input size of 10 and double it four times to 20, 40, 80 and 160.

"},{"location":"essays/example-jewels/#43-fastest-and-slowest-algorithm","title":"4.3 Fastest and slowest algorithm\u00b6","text":"

The library provides time_functions to measure the run-times of up to 6 functions on the same inputs. I run the three implemented algorithms on typical inputs, with the same sizes as before.

"},{"location":"essays/example-jewels/#5-concluding-remarks","title":"5 Concluding remarks\u00b6","text":"

This essay explored a simple problem: how many of the characters in a string occur in another string? There are at least 7 algorithms: 2 don't require any additional data structure, 2 use a set or a bag to achieve constant-time searches, and 3 other sort one or both inputs. Three of the first four algorithms were implemented in Python and their run-times measured. Using a set is the best option: the complexity is the lowest possible (linear in the total size of the inputs), and the run-times are better than using a bag.

"},{"location":"essays/example-two-sum-2/","title":"Two Sum (two solutions)","text":"

This simple algorithmic essay aims to solve the classic Two Sum problem from LeetCode.

Readers should have an intermediate understanding of Python to understand this essay.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[1]: Copied!
from algoesup import test, time_functions\n
from algoesup import test, time_functions In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%allowed on\n
%load_ext algoesup.magics %ruff on %allowed on
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
two_sum_tests = [\n  # [\"description\", nums, target, expected_output],\n    [\"minimum size for nums\", [1, 2], 3, (0, 1)],\n    [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)],\n    [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)],\n    [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)],\n    [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)],\n    [\"max and min range\", [-109, 109, 0], 0, (0, 1)],\n    [\"lowest target value\", [-50, 1, -59], -109, (0, 2)],\n    [\"highest target value\", [50, 1, 59], 109, (0, 2)],\n]\n
two_sum_tests = [ # [\"description\", nums, target, expected_output], [\"minimum size for nums\", [1, 2], 3, (0, 1)], [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)], [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)], [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)], [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)], [\"max and min range\", [-109, 109, 0], 0, (0, 1)], [\"lowest target value\", [-50, 1, -59], -109, (0, 2)], [\"highest target value\", [50, 1, 59], 109, (0, 2)], ] In\u00a0[4]: Copied!
def two_sum_bf(nums, target):\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\"\n    for index_1 in range(len(nums)):\n        for index_2 in range(len(nums)):\n            if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n                return index_1, index_2\n\ntest(two_sum_bf, two_sum_tests)\n
def two_sum_bf(nums, target): \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\" for index_1 in range(len(nums)): for index_2 in range(len(nums)): if index_1 != index_2 and nums[index_1] + nums[index_2] == target: return index_1, index_2 test(two_sum_bf, two_sum_tests)
Testing two_sum_bf...\nTests finished: 8 passed, 0 failed.\n

Next up is the Mapping algorithm implemented using Python's dict.

In\u00a0[5]: Copied!
def two_sum_map(nums, target):\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\"\n    differences = {}\n    for index in range(len(nums)):\n        difference = target - nums[index]\n        if nums[index] in differences:\n            return differences[nums[index]], index\n        differences[difference] = index\n\ntest(two_sum_map, two_sum_tests)\n
def two_sum_map(nums, target): \"\"\"Given a list of integers return the indices of the pair that sums to target.\"\"\" differences = {} for index in range(len(nums)): difference = target - nums[index] if nums[index] in differences: return differences[nums[index]], index differences[difference] = index test(two_sum_map, two_sum_tests)
Testing two_sum_map...\nTests finished: 8 passed, 0 failed.\n
In\u00a0[6]: Copied!
def worst(size):\n    \"\"\"Given a size, generate a worst-case problem instance for two sum.\"\"\"\n    nums = [0] * (size - 2) + [1, 1]\n    target = 2\n    return (nums, target)\n
def worst(size): \"\"\"Given a size, generate a worst-case problem instance for two sum.\"\"\" nums = [0] * (size - 2) + [1, 1] target = 2 return (nums, target) In\u00a0[7]: Copied!
solutions = [two_sum_bf, two_sum_map]\ntime_functions(solutions, worst, start=100, double=4, chart=True)\n
solutions = [two_sum_bf, two_sum_map] time_functions(solutions, worst, start=100, double=4, chart=True)
Inputs generated by worst\n\nInput size      two_sum_bf     two_sum_map \n       100           450.6             5.6 \u00b5s\n       200          1794.4            10.8 \u00b5s\n       400          7407.3            22.4 \u00b5s\n       800         30596.3            45.6 \u00b5s\n      1600        124085.9            92.1 \u00b5s

The numbers printed before the chart represent the run-times of our solutions in microseconds (\u00b5s) for increasing input sizes.

On the chart, the data points for two_sum_bf almost instantly eclipse that of two_sum_map. It looks as if the run-times for two_sum_map are not growing at all, but we know by looking at numbers above that this is not the case.

Let us see if we can modify the inputs of time_functions for a better visual representation.

In\u00a0[8]: Copied!
solutions = [two_sum_bf, two_sum_map]\ntime_functions(solutions, worst, start=1, double=4, text=False, chart=True)\n
solutions = [two_sum_bf, two_sum_map] time_functions(solutions, worst, start=1, double=4, text=False, chart=True)

We changed the initial input size to 1, and the trend of the run-times is a little clearer now. The Brute force algorithm's run-time's still accelerate off into the stratosphere, but we can see the separation and trend of the Mapping algorithm a little better.

"},{"location":"essays/example-two-sum-2/#two-sum-two-solutions","title":"Two Sum (two solutions)\u00b6","text":"

Michael Snowden, 24 January 2024, last updated 16 March 2024

"},{"location":"essays/example-two-sum-2/#1-problem","title":"1 Problem\u00b6","text":"

Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Only one valid answer exists.
"},{"location":"essays/example-two-sum-2/#2-algorithms","title":"2 Algorithms\u00b6","text":"

With our problem defined, the next step is to think of ways to solve it. This section presents two approaches to solving Two Sum: brute force, and mapping.

"},{"location":"essays/example-two-sum-2/#21-brute-force","title":"2.1 Brute force\u00b6","text":"

Generally speaking, a brute force algorithm tries all possibilities, and selects a correct one. For this problem, the possibilities are all sums that can be obtained by pairing each number in nums with every other number, and the correct pair is selected when the sum matches target.

Brute force algorithm: An outer loop iterates through each number in nums, then for each number, an inner loop iterates nums again. For each pair of numbers, if their indices are different and their sum matches target, return their indices.

"},{"location":"essays/example-two-sum-2/#22-mapping","title":"2.2 Mapping\u00b6","text":"

In the Brute force algorithm, we checked each pair of numbers in nums to see if the resulting sum was equal to target. Since we are already checking every number in the list, why not store some piece information from each number that will help us find our matching pair?

For every number in nums, we can map the difference between it and the target (target - number) to its corresponding index using a hashtable. This allows us to check the hashtable for matching numbers much faster.

Mapping algorithm: For each number in nums, if it's in the hashmap, return its index and the index mapped to it. Otherwise, calculate the difference (target - number) and map it to the corresponding index of number.

"},{"location":"essays/example-two-sum-2/#3-code","title":"3 Code\u00b6","text":"

In this section we will implement and test the algorithms.

"},{"location":"essays/example-two-sum-2/#31-testing","title":"3.1 Testing\u00b6","text":"

We start off by writing some tests.

To test the above solutions, we need to consider edge cases and other important functional tests. We should include tests for the minimum input size, and any extremes values that can be present. When integers are part of the input, and there are no restrictions, negative numbers and zero should be added to the tests.

"},{"location":"essays/example-two-sum-2/#32-implementations","title":"3.2 Implementations\u00b6","text":"

The next cell implements the Brute force algorithm using nested for loops

"},{"location":"essays/example-two-sum-2/#4-performance","title":"4 Performance\u00b6","text":"

In this section we measure the run-times of our solutions under certain conditions and discuss the results.

"},{"location":"essays/example-two-sum-2/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

Since time_functions from the algoesup library requires code to generate inputs, we shall write that first.

It is often useful to measure the run-times of a solution when it is doing the most work; this is called the worst-case. We want to generate inputs that will take our solution the most time to complete, and this happens when the two numbers that sum to target are the last two.

"},{"location":"essays/example-two-sum-2/#42-run-times-for-each-solution","title":"4.2 Run-times for each solution\u00b6","text":"

We now compare worst-case runtimes for both solutions. The input has an initial size of 100, and is doubled 4 times; the run times are measured for the initial size, then each time it is doubled.

"},{"location":"essays/example-two-sum-2/#5-conclusion","title":"5 Conclusion\u00b6","text":"

We started this essay with the definition of the Two Sum problem. Next, we outlined two algorithms: brute force, and mapping . After that, we implemented and tested our solutions using Python, and in the penultimate section we used empirical testing and discussed the results. Now we must decide which of our algorithms is best.

The Brute force algorithm is not very efficient when it comes to run-times. When the size of the input increases the run-times increase by a large amount. The one redeeming aspect of this algorithm is the efficient memory usage which is achieved by not using any additional data structures like dictionaries.

In contrast, the Mapping algorithm is reasonably efficient in terms of run times, but this is achieved by using extra memory in the form of the dictionary. In the final analysis, the slow run-times of the brute force algorithm cannot be ignored. The small trade of memory for faster run-times is worth it in this instance. We therefore conclude the mapping algorithm is best.

"},{"location":"essays/example-two-sum-3/","title":"Two sum (three solutions)","text":"

In this extended algorithmic essay we aim to solve the classic Two Sum problem from LeetCode. We are going to explore, analyse, and compare a selection of approaches with the end goal of finding a clear and efficient solution.

We assume the reader has an intermediate understanding of Python, including aspects like importing modules, using loops, and applying conditionals. Furthermore, Big-Oh notation is used to analyse the complexity of our solutions and we refer to terms such as binary search and brute force.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[1]: Copied!
from algoesup import test, time_functions, time_cases\n
from algoesup import test, time_functions, time_cases In\u00a0[2]: Copied!
%load_ext algoesup.magics\n%ruff on\n%allowed on\n
%load_ext algoesup.magics %ruff on %allowed on
ruff was activated\nallowed was activated\n
In\u00a0[3]: Copied!
two_sum_tests = [\n  # [\"description\", nums, target, expected_output]\n    [\"minimum size for nums\", [1, 2], 3, (0, 1)],\n    [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)],\n    [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)],\n    [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)],\n    [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)],\n    [\"max and min range\", [-109, 109, 0], 0, (0, 1)],\n    [\"lowest target value\", [-50, 1, -59], -109, (0, 2)],\n    [\"highest target value\", [50, 1, 59], 109, (0, 2)],\n]\n
two_sum_tests = [ # [\"description\", nums, target, expected_output] [\"minimum size for nums\", [1, 2], 3, (0, 1)], [\"non-adjacent indices\", [1, 4, 9, 7], 8, (0, 3)], [\"first two elements\", [5, 7, 1, 2, 8], 12, (0, 1)], [\"last two elements\", [1, 3, 5, 7, 8], 15, (3, 4)], [\"repeated elements\", [6, 2, 3, 2], 4, (1, 3)], [\"max and min range\", [-109, 109, 0], 0, (0, 1)], [\"lowest target value\", [-50, 1, -59], -109, (0, 2)], [\"highest target value\", [50, 1, 59], 109, (0, 2)], ] In\u00a0[4]: Copied!
def two_sum_bf(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    for index_1 in range(len(nums)):\n        for index_2 in range(len(nums)):\n            if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n                return index_1, index_2\n\ntest(two_sum_bf, two_sum_tests)\n
def two_sum_bf(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" for index_1 in range(len(nums)): for index_2 in range(len(nums)): if index_1 != index_2 and nums[index_1] + nums[index_2] == target: return index_1, index_2 test(two_sum_bf, two_sum_tests)
Testing two_sum_bf...\nTests finished: 8 passed, 0 failed.\n

Next up is the approach that uses sorting.

In\u00a0[5]: Copied!
def two_sum_sort(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    pairs = []\n    for index in range(len(nums)):\n        pairs.append((nums[index], index))\n    pairs.sort()\n    start = 0\n    end = len(nums) - 1\n    while start < end:\n        current_sum = pairs[start][0] + pairs[end][0]\n        if current_sum == target:\n            # return the indices in ascending order for reliable testing\n            lower_index = min(pairs[start][1], pairs[end][1])\n            upper_index = max(pairs[start][1], pairs[end][1])\n            indices = (lower_index, upper_index)\n            return indices\n        if current_sum < target:\n            start = start + 1\n        else:\n            end = end - 1\n\ntest(two_sum_sort, two_sum_tests)\n
def two_sum_sort(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" pairs = [] for index in range(len(nums)): pairs.append((nums[index], index)) pairs.sort() start = 0 end = len(nums) - 1 while start < end: current_sum = pairs[start][0] + pairs[end][0] if current_sum == target: # return the indices in ascending order for reliable testing lower_index = min(pairs[start][1], pairs[end][1]) upper_index = max(pairs[start][1], pairs[end][1]) indices = (lower_index, upper_index) return indices if current_sum < target: start = start + 1 else: end = end - 1 test(two_sum_sort, two_sum_tests)
Testing two_sum_sort...\nTests finished: 8 passed, 0 failed.\n

Finally, the mapping algorithm is implemented using Python's dict.

In\u00a0[6]: Copied!
def two_sum_map(nums: list, target: int) -> tuple[int, int]:\n    \"\"\"Given a list of integers return the indices of the pair that sums to target.\n\n    Preconditions:\n        len(nums) >= 2\n        -109 <= nums[i] <= 109\n        -109 <= target <= 109\n        Exactly one pair a and b in nums has a + b = target\n    \"\"\"\n    differences: dict[int, int] = {} # allowed\n    for index in range(len(nums)):\n        difference = target - nums[index]\n        if nums[index] in differences:\n            return differences[nums[index]], index\n        differences[difference] = index\n\ntest(two_sum_map, two_sum_tests)\n
def two_sum_map(nums: list, target: int) -> tuple[int, int]: \"\"\"Given a list of integers return the indices of the pair that sums to target. Preconditions: len(nums) >= 2 -109 <= nums[i] <= 109 -109 <= target <= 109 Exactly one pair a and b in nums has a + b = target \"\"\" differences: dict[int, int] = {} # allowed for index in range(len(nums)): difference = target - nums[index] if nums[index] in differences: return differences[nums[index]], index differences[difference] = index test(two_sum_map, two_sum_tests)
Testing two_sum_map...\nTests finished: 8 passed, 0 failed.\n

The brute force algorithm comes out on top in terms of simplicity, it is just a case of checking every pair of numbers. The double pointer approach seems like the most convoluted with the mapping differences algorithm somewhere in the middle of the two.

In\u00a0[7]: Copied!
def best(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a best case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [1, 1] + [0] * (size - 2)\n    target = 2\n    return (nums, target)\n\ndef normal(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a normal case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [0] * size\n    nums[size // 2 - 1:size // 2 + 1] = [1, 1]\n    target = 2\n    return (nums, target)\n\ndef worst(size: int) -> tuple[list[int], int]:\n    \"\"\"Given a size, generate a worst case instance for Two Sum.\n\n    Preconditions: size >= 2\n    \"\"\"\n    nums = [0] * (size - 2) + [1, 1]\n    target = 2\n    return (nums, target)\n
def best(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a best case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [1, 1] + [0] * (size - 2) target = 2 return (nums, target) def normal(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a normal case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [0] * size nums[size // 2 - 1:size // 2 + 1] = [1, 1] target = 2 return (nums, target) def worst(size: int) -> tuple[list[int], int]: \"\"\"Given a size, generate a worst case instance for Two Sum. Preconditions: size >= 2 \"\"\" nums = [0] * (size - 2) + [1, 1] target = 2 return (nums, target)

First let us see the run-times of two_sum_bf for best, normal and worst-case instances. Note the input size starts at 100 and is doubled 4 times reaching 1600 for the last data point.

In\u00a0[8]: Copied!
input_generators = [worst, normal, best]\ntime_cases(two_sum_bf, input_generators, start=100, double=4, chart=True)\n
input_generators = [worst, normal, best] time_cases(two_sum_bf, input_generators, start=100, double=4, chart=True)
Run-times for two_sum_bf\n\nInput size           worst          normal            best \n       100           456.1           225.7             0.3 \u00b5s\n       200          1825.0           901.7             0.3 \u00b5s\n       400          7434.7          3720.2             0.3 \u00b5s\n       800         30573.2         15343.2             0.3 \u00b5s\n      1600        124160.9         62221.4             0.3 \u00b5s

We can see from the chart and run-times above, that our analysis of quadratic time complexity for the worst-case seems to line up with the data. As we double the input size, the run-times quadruple. For the best case, the run-times generally stay the same for increasing inputs suggesting constant time complexity. The normal case is somewhere in the middle of the two.

Now let us do the same for two_sum_map.

In\u00a0[9]: Copied!
input_generators = [worst, normal, best]\ntime_cases(two_sum_map, input_generators, start=100, double=4, chart=True)\n
input_generators = [worst, normal, best] time_cases(two_sum_map, input_generators, start=100, double=4, chart=True)
Run-times for two_sum_map\n\nInput size           worst          normal            best \n       100             5.6             3.0             0.3 \u00b5s\n       200            11.0             5.7             0.3 \u00b5s\n       400            22.6            11.1             0.3 \u00b5s\n       800            46.6            22.6             0.3 \u00b5s\n      1600            92.9            46.1             0.3 \u00b5s

The first thing to note is the dramatic reduction in size of the run-times. The scales on the y-axis differ by orders of magnitude. Also, the plot for our worst-case on this chart has a much straighter line with run-times doubling in proportion with input size. This aligns with our prediction of linear time complexity.

In\u00a0[10]: Copied!
solutions = [two_sum_bf, two_sum_sort, two_sum_map]\ntime_functions(solutions, worst, start=100, double=4, chart=True)\n
solutions = [two_sum_bf, two_sum_sort, two_sum_map] time_functions(solutions, worst, start=100, double=4, chart=True)
Inputs generated by worst\n\nInput size      two_sum_bf    two_sum_sort     two_sum_map \n       100           454.0            14.4             5.6 \u00b5s\n       200          1805.4            28.4            11.0 \u00b5s\n       400          7469.5            57.6            22.6 \u00b5s\n       800         30776.4           116.9            45.9 \u00b5s\n      1600        124944.8           237.3            93.0 \u00b5s

The run-times for two_sum_bf almost instantly eclipse that of two_sum_sort and two_sum_map.On the chart it looks as if the run-times for two_sum_sort and two_sum_map are not growing at all, but we know by looking at the run-times above that this is not the case. Let us see if we can adjust the inputs of time_functions so the growth rates of the fastest two functions have a better visual representation in the chart.

In\u00a0[11]: Copied!
solutions = [two_sum_bf, two_sum_sort, two_sum_map]\ntime_functions(solutions, worst, start=1, double=4, text=False, chart=True)\n
solutions = [two_sum_bf, two_sum_sort, two_sum_map] time_functions(solutions, worst, start=1, double=4, text=False, chart=True)

The point at which the growth rates start to diverge is much clearer now. The brute force approach's run-times still accelerate off into the stratosphere, but we can see the separation and trend of the sorting and mapping algorithms.

"},{"location":"essays/example-two-sum-3/#two-sum-three-solutions","title":"Two sum (three solutions)\u00b6","text":"

Michael Snowden, 25 January 2024, last updated 16 March 2024

"},{"location":"essays/example-two-sum-3/#1-problem","title":"1 Problem\u00b6","text":"

To effectively solve Two Sum, it is crucial we thoroughly understand the problem. We need to identify the inputs, outputs and the relationship between them.

Leetcode provides the following problem description.

\"Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.\"

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Only one valid answer exists.

We can extract some important information from their description, namely the pre- and post-conditions.

Preconditions:

  • $-109 \\leq$ nums[i] $\\leq 109$
  • $-109 \\leq$ target $\\leq 109$
  • Exactly one pair a and b in nums has a + b = target

Postconditions:

Let indices be the output, then:

  • len(indices) = 2;
  • nums[indices[0]] + nums[indices[1]] = target

The preconditions must be satisfied for our algorithms to be defined, and the postconditions must be satisfied for our algorithms to be correct.

"},{"location":"essays/example-two-sum-3/#2-algorithms","title":"2 Algorithms\u00b6","text":"

With our problem defined, the next step is to think of ways to solve it. This section presents three distinct approaches to solving Two sum: brute force, sorting and mapping.

"},{"location":"essays/example-two-sum-3/#21-brute-force","title":"2.1 Brute force\u00b6","text":"

Generally speaking, a brute force algorithm tries all possibilities, and selects a correct one. For this problem, the possibilities are all sums that can be obtained by pairing each number in nums with every other number, and the correct pair is identified if the sum matches target. We are checking all possible sums, so we are sure to find our indices if they exist. Looking back at the preconditions, we can see that each problem instance must have exactly one pair that sums to target. Hence this approach is guaranteed to find a solution, as long as our preconditions are met.

Getting any working solution regardless of efficiency can be an important first step. Sometimes we need to solve a problem quickly, and more importantly it gets us thinking through it, which can often lead to additional solutions.

Brute force algorithm: An outer loop iterates through each number in nums, then for each number, an inner loop iterates nums again. For each pair of numbers, if their indices are different and their sum matches target, return their indices.

1. for each index_1 from 0 to len(nums)-1:\n    1. for each index_2 from 0 to len(nums)-1:\n        1. if index_1 != index_2 and nums[index_1] + nums[index_2] == target:\n            1. let indices be (index_1, index_2)\n            2. stop\n

Let n = len(nums), then this algorithm has two nested for loops that do n iterations each. The operations performed within the inner loop are constant time, meaning this solution will do at most n $\\times$ n $\\times$ O(1) steps. Thus, the worst-case time complexity is O(n $^2$). In the best-case, the first and second numbers in nums sum to target. No matter the size of nums, the run-times would not increase. Therefore, the best-case time complexity would be O(1).

"},{"location":"essays/example-two-sum-3/#22-sorting","title":"2.2 Sorting\u00b6","text":"

For many computational problems a good question to ask is: will sorting the inputs simplify the problem and lead to a more efficient solution? In this case, the answer is yes, we can exploit the properties of a sorted input in a similar way to binary search. Rather than focusing on the middle of the sequence and reducing the search space by half, we keep track of the two ends with position variables and have reduce the search space by one each time. This kind of approach is commonly referred to as a \"double pointer algorithm\" named after the two position variables.

Before we move on to a formal description of the algorithm, we need to consider a crucial aspect of the Two Sum problem: it requires indices to be returned. This has implications for our solution: direct sorting of nums is not possible because the original index positions would be altered. Thus, any additional data structures we use must keep track of the corresponding indices from elements of nums. Keeping this in mind, here is the description of our algorithm.

With sorting algorithm: Create a pair (number, index) for each number in nums. Add each pair to a list pairs, then sort the list into ascending order based on the numbers. Initialise two variables start and end to be 0 and len(nums) - 1 respectively. While start $\\neq$ end sum the numbers in pairs corresponding to the indices start and end. If the sum is less than target, move start to the right by incrementing its value by one. If the sum is greater than target, move end to the left by decrementing its value by one. If the sum matches target then return the indices of both numbers.

The logic of this strategy is as follows. The sum of the numbers at positions start and end in our pairs list will have one of the following three cases: the sum can be equal to, greater than or less than target. If the sum is equal to target, then we have found our solution and can return the indices. If the sum is less than target, we need to increase the value of our sum; the only way to do this is by moving start to the right. Remember we have sorted the list, so all values to the right are greater. If our sum is greater than target we need to decrease the value of our sum, and the only way to do that by moving end to the left.

1. let pairs be an empty list\n2. for each index from 0 to len(nums):\n    1. let `pair be (nums[index], index)\n    2. append pair to `pairs`\n3. let pairs be sorted by value at first index\n4. let start = 0\n5. let end = len(nums) -1\n6. while start != end:\n    1. pair_sum = pairs[start][0] + pairs[end][0]\n    2. if pairs_sum = target:\n        1. let indices be (pairs[start][1], pairs[end][1])\n        2. stop\n    3. otherwise if pairs_sum > target:\n        1. let end = end - 1\n    4. otherwise:\n        1. let start = start + 1\n

The important parts of this algorithm with respect analysing time complexity are: the for loop at step number two, the sorting operation at step number three and the while loop at step number six.

Let n = len(nums), then the for loop always does n iterations, and we will assume the sorting operation has worst-case complexity of O(n log(n)) and best-case of O(n), that just leaves the while loop. The while loop will do at most n iterations in a scenario where one of the variables start or end stays in place and the other is incremented until they are next to each other.

It is clear now that the sorting operation will dominate this approach when it comes to time complexity. Therefore, this algorithm has an overall worst-case time complexity of O(n log(n)) and a best-case of O(n).

"},{"location":"essays/example-two-sum-3/#23-mapping","title":"2.3 Mapping\u00b6","text":"

In the previous algorithm we paired each number in nums with its index out of necessity. We wanted to sort nums without loosing the original paring of number to index. This action of pairing numbers to indices is a useful idea; what if instead of pairing a number directly to its index, we paired the difference between our number and the target (i.e. target - number) to its index? If we did that, then finding our pair would be a case of checking if current number is in the pairs list.

This is a good start, but we still have a problem, the lookup operation takes linear time for a list. We need an alternative data structure, one with much efficient lookup times. If fast lookup times are required, then we should always consider a hashtable. This data structure is known informally by many different names such as dictionary, hashmap, map and associative array. A key property of this data structure is the lookup operation has constant time complexity in the average case.

For every number in nums, we can map the difference between it and the target (target - number) to its corresponding index using a hashtable. This allows us to check the hashmap for matching numbers in constant time.

Mapping algorithm: For each number in nums, if its in the hashmap, return its index and the index mapped to it. Otherwise, calculate the difference (target - number) and map it to the corresponding index of number.

1. let differences be an empty dictionary\n2. for index from 0 to len(nums) - 1:\n    1. if nums[index] in differences:\n        1.let indices be (differences[nums[index]], index)\n        2. stop\n    2. otherwise:\n        1. let difference = target - nums[index]\n        2. let differences[difference] = index\n

Let n = len(nums), then this algorithm has a single loop that does n iterations. Because we are using a hashmap, all the operations performed in the loop are done in constant time. Thus, our mapping algorithm has O(n) time complexity in in the worst-case. Similar to the brute force approach, if the correctly summing numbers are in the first two positions of nums, then the run-times will be unaffected by increasing input sizes, giving a best-case complexity of O(1).

"},{"location":"essays/example-two-sum-3/#24-summary","title":"2.4 Summary\u00b6","text":"

Many times a brute force approach is a good starting point; it is a simple strategy that is easy to implement. Moreover, this strategy is efficient in terms of its memory usage when compared to the other algorithms; it does not use additional data structures. However, this approach has an undesirable O(n $^2$) worst-case time complexity. Every time we double the input size, the run-times increase fourfold.

Our next approach used sorting to endow our list with properties useful for searching. This algorithm is perhaps the most convoluted and maybe harder to think through relative to the others. Furthermore, it requires additional memory compared to the brute force approach. The benefits of the strategy are the O(n log(n)) worst-case time complexity which improves considerably on the brute force algorithm.

The third solution made a single pass through nums and used a hashtable to map differences to indexes. While not as simple as the brute force algorithm, this approach is not hard to follow nor understand; everything is carried out in a single loop. On the other hand, this approach has the additional memory overhead of the hashtable itself, which needs to be taken into account. The main advantage with this approach is the O(n) time complexity for the worst-case, making it the most efficient when it comes to scaling run-times with input size.

When considering all three approaches, and taking into account aspects of efficiency as well as readability, the mapping algorithm seems to come out on top. It makes that that classic space-time trade off i.e sacrifices some memory efficiency for time efficiency, but the simplicity of the approach combined with the efficient time complexity makes it a worth while exchange.

"},{"location":"essays/example-two-sum-3/#3-code","title":"3 Code\u00b6","text":"

In this section we will implement the algorithms. We shall do so using a basic subset of Python in the hope of making our code as language agnostic as possible.

Throughout this section we will make use of code quality tools such as linters and type checkers to help us meet the standards expected for clean readable and error free code.

"},{"location":"essays/example-two-sum-3/#31-preparation-and-imports","title":"3.1 Preparation and imports\u00b6","text":"

The next two cells set up the automatic type checking linting and Construct checking for our code cells. We also import some of the functions we will use to test, time and generate instances for our solutions.

If one or more of the styling or type checking ideals are violated, the warnings will be printed alongside the corresponding line number underneath the offending cell.

"},{"location":"essays/example-two-sum-3/#32-testing","title":"3.2 Testing\u00b6","text":"

Before We start implementing our algorithms, we write some tests. The test() function from the algoesup library is a simple way to test for correctness. It takes a function and a test table then reports any failed tests.

To test the algorithms, we need to consider edge cases and other important functional tests. Edge cases often occur at the extreme ends of the spectrum of allowed inputs or outputs, they should ideally test unexpected conditions that might reveal bugs in the code. For the Two Sum problem, we should test the minimum size for nums and also the extremes of the values that can be present. We should include negative numbers and zero in our tests because integers are present in the inputs.

The cell below contains our test table, note the descriptions of each case in the first column, and how the boundary cases, negative numbers and zero are all present in the table.

"},{"location":"essays/example-two-sum-3/#33-implementations","title":"3.3 Implementations\u00b6","text":"

The next cell implements the brute force algorithm using nested for loops and a conditional to check for the correct pair. Note how this conditional looks similar to one of the postconditions; this is a good sign.

"},{"location":"essays/example-two-sum-3/#4-performance","title":"4 Performance\u00b6","text":"

In this section we will measure the run-times of our solutions under various conditions to see if our analysis matches the results.

"},{"location":"essays/example-two-sum-3/#41-generating-inputs","title":"4.1 generating inputs\u00b6","text":"

time_functions and time_cases from the algoesup library require a function that generates problem instances of a given size. We want to be able to generate instances that correspond to best, normal and worst cases for the solutions were appropriate.

The best normal and worst case scenarios might not always be the same for each algorithm, for example, the best-case for two_sum_bf and two_sum_map would be when the first two numbers encountered sum to target but this is not the case for two_sum_sort where the best-case would be dependent on the sorting algorithm.

Since two_sum_bf and two_sum_map share the same best- and worst-case scenarios, we shall focus on those for our input generators. For the normal-case the matching numbers will be in the middle two positions of nums

"},{"location":"essays/example-two-sum-3/#42-best-normal-and-worst-case-run-times","title":"4.2 Best, normal and worst case run-times\u00b6","text":""},{"location":"essays/example-two-sum-3/#43-run-times-for-each-solution","title":"4.3 Run-times for each solution\u00b6","text":"

Let us now compare the worst-case run-times for all three solutions side by side.

"},{"location":"essays/example-two-sum-3/#5-conclusion","title":"5 Conclusion\u00b6","text":"

We started this essay by defining the problem. We came up with three algorithms that used different approaches: brute force, sorting and mapping, then analysed the time complexity of each one. Next, we implemented and tested our solutions using Python, and in the penultimate section used empirical testing to see if our analysis matched the results. Now we must decide which of our algorithms is best.

The brute force approach, unsurprisingly, is not very efficient when it comes to run-times. We suspected this would be the case, then the empirical testing confirmed it. Its only positive attributes were its simplicity and efficient memory usage.

We are now left with a choice between the sorting and mapping approaches and I think there is a clear winner between the two. The mapping approach is more efficient in its worst-case complexity with O(n) compared to O(n log(n)) of the sorting, and on the surface seems simpler and easier to implement. Moreover, the mapping approach has the potential to be more memory efficient. For example, the sorting approach always has an auxiliary data structure the same size as nums, whereas the size of the dictionary will grow dynamically, only becoming the same size as nums in the worst case. Therefore, we must conclude the mapping algorithm is best.

"},{"location":"essays/template-data-structures/","title":"Your essay's title","text":"

This algorithmic essay template is for students of data structures and algorithms (DSA) courses. Throughout the template, there are links to relevant sections of our Writing Guide. Replace any text in italics by yours and delete this paragraph.

The introduction explains what the essay is about, the problem you are solving, and what you assume the reader to know. See our guidance on choosing a problem, writing the text and structuring the essay. This template follows the first structure in the guide. You don't need to change the following code cells.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff pytype\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff pytype In\u00a0[\u00a0]: Copied!
from algoesup import test, time_cases, time_functions\n
from algoesup import test, time_cases, time_functions

For information on what the following commands do, see our guide's sections on type checking and linting and remove this paragraph.

In\u00a0[\u00a0]: Copied!
%load_ext algoesup.magics\n# check the code's style\n%ruff on\n# check the data types\n%pytype on\n# optional: flag the Python constructs not taught in M269, our DSA course\n%allowed on\n
%load_ext algoesup.magics # check the code's style %ruff on # check the data types %pytype on # optional: flag the Python constructs not taught in M269, our DSA course %allowed on In\u00a0[\u00a0]: Copied!
tests = [\n    # Each line is a list or tuple of the form:\n    # (description, input1, input2, ..., expected_output),\n]\n
tests = [ # Each line is a list or tuple of the form: # (description, input1, input2, ..., expected_output), ] In\u00a0[\u00a0]: Copied!
# Replace solution_one with a more descriptive name.\ndef solution_one():\n    # Implement your solution here\n    pass\n\ntest(solution_one, tests)\n
# Replace solution_one with a more descriptive name. def solution_one(): # Implement your solution here pass test(solution_one, tests) In\u00a0[\u00a0]: Copied!
# Replace solution_two with a more descriptive name.\ndef solution_two():\n    # Implement your solution here\n    pass\n\ntest(solution_two, tests)\n
# Replace solution_two with a more descriptive name. def solution_two(): # Implement your solution here pass test(solution_two, tests) In\u00a0[\u00a0]: Copied!
# Replace solution_n with a more descriptive name.\ndef solution_n():\n    # Implement your solution here\n    pass\n\ntest(solution_n, tests)\n
# Replace solution_n with a more descriptive name. def solution_n(): # Implement your solution here pass test(solution_n, tests) In\u00a0[\u00a0]: Copied!
def best_case(size: int) -> tuple[...]:\n    # Implement your best-case input generator here.\n    pass\n
def best_case(size: int) -> tuple[...]: # Implement your best-case input generator here. pass In\u00a0[\u00a0]: Copied!
def worst_case(size: int) -> tuple[...]:\n    # Implement your worst-case input generator here.\n    pass\n
def worst_case(size: int) -> tuple[...]: # Implement your worst-case input generator here. pass In\u00a0[\u00a0]: Copied!
def normal_case(size: int) -> tuple[...]:\n    # Implement your normal-case input generator here.\n    pass\n
def normal_case(size: int) -> tuple[...]: # Implement your normal-case input generator here. pass In\u00a0[\u00a0]: Copied!
cases = [best_case, normal_case, worst_case]\n# Change solution_n to the name of your solution.\ntime_cases(solution_n, cases, start=10, double=4)\n
cases = [best_case, normal_case, worst_case] # Change solution_n to the name of your solution. time_cases(solution_n, cases, start=10, double=4)

Analyse the results. See the interpreting run-times section of our guide.

In\u00a0[\u00a0]: Copied!
# Change solution_one, solution_two, and solution_n to the names of your solutions.\nalgorithms = [solution_one, solution_two, solution_n]\n# Replace normal_case with best_case or worst_case, if you wish.\ntime_functions(algorithms, normal_case, 1000, 4, chart=True)\n
# Change solution_one, solution_two, and solution_n to the names of your solutions. algorithms = [solution_one, solution_two, solution_n] # Replace normal_case with best_case or worst_case, if you wish. time_functions(algorithms, normal_case, 1000, 4, chart=True)

Analyse the results. See the interpreting run-times section of our guide.

"},{"location":"essays/template-data-structures/#your-essays-title","title":"Your essay's title\u00b6","text":"

Your (and any co-author's) name, current date

"},{"location":"essays/template-data-structures/#1-tests","title":"1 Tests\u00b6","text":"

This section describes and defines the tests you will use to check your solutions. See the testing section of our guide.

"},{"location":"essays/template-data-structures/#2-algorithms","title":"2 Algorithms\u00b6","text":"

This section outlines some algorithms that solve the problem. See the algorithms section of our guide.

"},{"location":"essays/template-data-structures/#21-first-algorithm-name","title":"2.1 First algorithm name\u00b6","text":"

Describe your first strategy or approach.

Algorithm 1: Briefly describe your first algorithm.

Analyse at least the worst-case time complexity of your first algorithm.

"},{"location":"essays/template-data-structures/#22-second-algorithm-name","title":"2.2 Second algorithm name\u00b6","text":"

Describe your second strategy or approach.

Algorithm 2: Briefly describe your second algorithm.

Analyse at least the worst-case time complexity of your second algorithm.

"},{"location":"essays/template-data-structures/#2n-nth-algorithm-name","title":"2.n nth algorithm name\u00b6","text":"

Describe your nth strategy or approach.

Algorithm n: Briefly describe your nth algorithm.

Analyse at least the worst-case time complexity of your nth algorithm.

"},{"location":"essays/template-data-structures/#2n1-summary","title":"2.n+1 Summary\u00b6","text":"

This section compares the previously outlined algorithms to inform implementation decisions.

"},{"location":"essays/template-data-structures/#3-code","title":"3 Code\u00b6","text":"

This section implements and tests only the most promising algorithms. See the code section of our guide.

"},{"location":"essays/template-data-structures/#4-performance","title":"4 Performance\u00b6","text":"

This section measures and compares the run-times of your implementations, so that you can check them against your earlier complexity analysis.

"},{"location":"essays/template-data-structures/#41-generating-inputs","title":"4.1 Generating inputs\u00b6","text":"

Briefly describe your strategy and reasoning for generating the inputs.

"},{"location":"essays/template-data-structures/#42-best-normal-and-worst-run-times","title":"4.2 Best, normal and worst run-times\u00b6","text":"

State which solutions(s) will be timed with best-, normal- or worst-case inputs. See the comparing cases and charting run-times sections of our guide.

"},{"location":"essays/template-data-structures/#43-fastest-and-slowest-algorithm","title":"4.3 Fastest and slowest algorithm\u00b6","text":"

Compare the run times of all your solutions for the same case. See the comparing functions and charting run-times sections of our guide.

"},{"location":"essays/template-data-structures/#5-concluding-remarks","title":"5 Concluding remarks\u00b6","text":"

Summarise your findings and conclude which solution is best.

After completing a draft of your essay, do a final check and then see the feedback guide on how to ask for, give, and handle comments.

"},{"location":"essays/template-data-structures/#6-acknowledgements","title":"6 Acknowledgements\u00b6","text":"

Credit those who helped you create the essay. See the crediting feedback section of our guide.

"},{"location":"essays/template-intro-programming/","title":"Your essay's title","text":"

This algorithmic essay template is for students of introductory programming courses. Throughout the template, there are links to relevant sections of our Writing Guide. Replace any text in italics by yours and delete this paragraph.

The introduction explains what the essay is about, the problem you are solving, and what you assume the reader to know. See our guidance on choosing a problem, writing the text and structuring the essay. This template follows the second structure in the guide. You don't need to change the following code cells.

In\u00a0[\u00a0]: Copied!
import os\n\nif 'COLAB_GPU' in os.environ:  # if we are in Google Colab...\n    !pip install algoesup --no-deps\n    !pip install allowed ruff\n
import os if 'COLAB_GPU' in os.environ: # if we are in Google Colab... !pip install algoesup --no-deps !pip install allowed ruff In\u00a0[\u00a0]: Copied!
# import functions to test the code and to measure the execution time\nfrom algoesup import test, time_functions_int\n
# import functions to test the code and to measure the execution time from algoesup import test, time_functions_int

For information on what the following commands do, see our guide's section on linting and remove this paragraph.

In\u00a0[\u00a0]: Copied!
%load_ext algoesup.magics\n# check the code's style\n%ruff on\n# optional: flag the Python constructs not taught in TM112, our introductory course\n%allowed --config tm112.json on\n
%load_ext algoesup.magics # check the code's style %ruff on # optional: flag the Python constructs not taught in TM112, our introductory course %allowed --config tm112.json on In\u00a0[\u00a0]: Copied!
tests = [\n    # Each line is a list or tuple of the form:\n    # (description, input1, input2, ..., expected_output),\n]\n
tests = [ # Each line is a list or tuple of the form: # (description, input1, input2, ..., expected_output), ] In\u00a0[\u00a0]: Copied!
def solution_one():\n    # Implement your solution here\n    pass\n\ntest(solution_one, tests)\n
def solution_one(): # Implement your solution here pass test(solution_one, tests) In\u00a0[\u00a0]: Copied!
def solution_two():\n    # Implement your solution here\n    pass\n\ntest(solution_two, tests)\n
def solution_two(): # Implement your solution here pass test(solution_two, tests) In\u00a0[\u00a0]: Copied!
# The next line assumes your functions have a single integer as input.\ntime_functions_int([solution_one, solution_two])\n
# The next line assumes your functions have a single integer as input. time_functions_int([solution_one, solution_two])

Analyse the results.

"},{"location":"essays/template-intro-programming/#your-essays-title","title":"Your essay's title\u00b6","text":"

Your (and any co-author's) name, current date

"},{"location":"essays/template-intro-programming/#1-tests","title":"1 Tests\u00b6","text":"

This section describes and defines the tests you will use to check your solutions. See the testing section of our guide.

"},{"location":"essays/template-intro-programming/#2-solutions","title":"2 Solutions\u00b6","text":""},{"location":"essays/template-intro-programming/#21-first-solution-name","title":"2.1 First solution name\u00b6","text":"

Describe your first strategy or approach, then implement and test it.

"},{"location":"essays/template-intro-programming/#22-second-solution-name","title":"2.2 Second solution name\u00b6","text":"

Describe your second strategy or approach, then implement and test it.

"},{"location":"essays/template-intro-programming/#3-performance","title":"3 Performance\u00b6","text":"

This optional section compares the performance of your solutions to see which is fastest.

"},{"location":"essays/template-intro-programming/#4-concluding-remarks","title":"4 Concluding remarks\u00b6","text":"

Summarise your findings and conclude which solution is best.

After completing a draft of your essay, do a final check and then see the feedback guide on how to ask for, give, and handle comments.

"},{"location":"essays/template-intro-programming/#5-acknowledgements","title":"5 Acknowledgements\u00b6","text":"

Credit those who helped you create the essay. See the crediting feedback section of our guide.

"}]} \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0b9b32d..b17ce1e 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -2,92 +2,92 @@ https://dsa-ou.github.io/algoesup/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/deepnote-background/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/deepnote-how-to/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/deepnote-reference/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/deepnote/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/example-essays/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/feedback/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/getting-started-google-colab/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/getting-started-m269/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/getting-started/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/library/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/writing/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/example-1-to-n/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/example-jewels/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/example-two-sum-2/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/example-two-sum-3/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/template-data-structures/ - 2024-03-19 + 2024-04-02 daily https://dsa-ou.github.io/algoesup/essays/template-intro-programming/ - 2024-03-19 + 2024-04-02 daily \ No newline at end of file diff --git a/docs/sitemap.xml.gz b/docs/sitemap.xml.gz index fabc6ffa806df8b4a3a9c1cbaec7067ed4ff1cf9..0f80b3ffb599b4bd6648ec10550dab11428d3cc9 100644 GIT binary patch literal 375 zcmV--0f_z|iwFq+UJPae|8r?{Wo=<_E_iKh0M(blZo?o9hVOfdD0f1dP18E1>1|K2 zJpc(VVFR3DSlYKAX}hWC9j1Vg0LxgPEch|){%B3{1)emzUAd|1vH&-r)K2Znx7Qc; zRPMXGHhM!4isbBQSB@b}&z#TmT#*i7#Y7c)#c3GRg80?Q>uOtecSUMTfXnRMT7yHk zM4=Q$l3*AsrNbEeN+TCWAyAxn%NM`fZGwVD*)?_Z!0ObS4kx{g)$&mRz1Ljl&Pe!ClMv5Skj`=u*=z(%V*DiQulSk!+ Vwf~|Z=OO(zx^M0YU%8zO008yHvt$N3*xs)zOHsPm%z``M!~$kP^>75A9!AJPyJRDrgdGhqAQER< zvh)Zb=ND?pMg<|HbYv8S4Problem

It’s worth spending time on choosing an appropriate problem before putting effort into an essay about it. You may invent your own problem or select an existing one. For example, it may be a non-assessed exercise from your course, or -it may relate to your hobby or work. In so, provide any information the reader needs to understand the problem. +it may relate to your hobby or work. If so, provide any information the reader needs to understand the problem. If the problem is from your work, get permission from your employer or client.

There are many websites with thousands of algorithmic problems to choose from. We have used Kattis and LeetCode in the past.

@@ -1404,7 +1404,7 @@

Type checking

You may thus wish to initially turn off the type checking, with %pytype off, and only turn it on after all code is written and tested. You will have to run all cells of your notebook for the type checking to take place.

-

For a list of all the options for the %pytype command, +

For a list of all the options for the %pytype command, see the library reference.

Formatting

@@ -1425,7 +1425,7 @@

Formatting

Linting

You should lint your code, which means to check it for style violations.

Code style

-

Our library support ruff, the fastest Python linter. +

Our library supports ruff, the fastest Python linter. To turn it on, write the following after loading the algoesup.magics extension.

%ruff on
 

@@ -1509,7 +1509,7 @@

Language subset

if it has several constructs that weren’t taught, none of them is reported.

The allowed linter also includes the configuration for TM112, our introductory Computing course, in case you want to use even fewer constructs in your essay. -To use that configuration, write %allowed --config tm112.json on. +To use that configuration, write %allowed on --config tm112.json. For a list of all the options for the %allowed command, see the library reference.

You can configure the linter with a JSON file that lists the allowed constructs. diff --git a/poetry.lock b/poetry.lock index 341e9bc..e53efbb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -94,6 +94,52 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "black" +version = "24.3.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-24.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395"}, + {file = "black-24.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995"}, + {file = "black-24.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7"}, + {file = "black-24.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0"}, + {file = "black-24.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9"}, + {file = "black-24.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597"}, + {file = "black-24.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d"}, + {file = "black-24.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5"}, + {file = "black-24.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f"}, + {file = "black-24.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11"}, + {file = "black-24.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4"}, + {file = "black-24.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5"}, + {file = "black-24.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837"}, + {file = "black-24.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd"}, + {file = "black-24.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213"}, + {file = "black-24.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959"}, + {file = "black-24.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb"}, + {file = "black-24.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7"}, + {file = "black-24.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7"}, + {file = "black-24.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f"}, + {file = "black-24.3.0-py3-none-any.whl", hash = "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93"}, + {file = "black-24.3.0.tar.gz", hash = "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "bleach" version = "6.1.0" @@ -2519,6 +2565,17 @@ files = [ {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tornado" version = "6.4" @@ -2663,4 +2720,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "08c933ae89233f26bff368372a3aa22e6d109496514d01a81152f2cec0bcb002" +content-hash = "f58585b77463625e225ca73f1292acc294eb169bb04dfaa2886d994bd99a9fb2" diff --git a/pyproject.toml b/pyproject.toml index b6b1c29..c023899 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ mkdocs-material = "^9.5.11" mkdocstrings = { version = "^0.24.0", extras = ["python"] } mkdocs-jupyter = "^0.24.0" mkdocs-open-in-new-tab = "^1.0.3" +black = "^24.3.0" [build-system] requires = ["poetry-core"] diff --git a/src/algoesup/magics.py b/src/algoesup/magics.py index 8371a67..81f3674 100644 --- a/src/algoesup/magics.py +++ b/src/algoesup/magics.py @@ -1,33 +1,43 @@ """Linting tools for Jupyter Notebook environments""" +import argparse import json import os import re import subprocess import tempfile from typing import Callable +from subprocess import CompletedProcess from IPython.core.inputtransformer2 import TransformerManager from IPython.core.magic import register_line_magic -from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring from IPython.display import display_markdown -def show_errors(checker: str, output: str, filename: str) -> None: - """Print the errors for the given file in the checker's output.""" - md = [f"**{checker}** found issues:"] - for line in output.split("\n"): - if "syntax" in line.lower() and "error" in line.lower(): - continue # syntax errors already reported when running the cell - if m := re.match(rf".*{filename}[^\d]*(\d+[^:]*:.*)", line): - md.append(f"- {m.group(1)}") - if len(md) > 1: - display_markdown("\n".join(md), raw=True) +def show_allowed_errors(checker: str, output: CompletedProcess, filename: str) -> None: + """Print the errors for the given file in allowed's output.""" + if output.returncode > 0: + display_markdown(f"**{checker}** didn't check code:", raw=True) + print(output.stderr if output.stderr else output.stdout) + else: + md = [f"**{checker}** found issues:"] + for line in output.stdout.split("\n"): + if "syntax" in line.lower() and "error" in line.lower(): + continue # syntax errors already reported when running the cell + if m := re.match(rf".*{filename}[^\d]*(\d+[^:]*:.*)", line): + md.append(f"- {m.group(1)}") + if len(md) > 1: + display_markdown("\n".join(md), raw=True) -def show_ruff_json(checker: str, output: str, filename: str) -> None: +def show_ruff_json(checker: str, output: CompletedProcess, filename: str) -> None: """Print the errors in ruff's JSON output for the given file.""" - if errors := json.loads(output): + if output.stderr: + # ignore syntax errors: they're reported on running the cell + if not "Failed to parse" in output.stderr: + display_markdown(f"**{checker}** didn't check code:", raw=True) + print(output.stderr) + elif errors := json.loads(output.stdout): md = [f"**{checker}** found issues:"] # the following assumes errors come in line order for error in errors: @@ -41,27 +51,33 @@ def show_ruff_json(checker: str, output: str, filename: str) -> None: display_markdown("\n".join(md), raw=True) -def show_pytype_errors(checker: str, output: str, filename: str) -> None: +def show_pytype_errors(checker: str, output: CompletedProcess, filename: str) -> None: """Print the errors in pytype's output for the given file.""" - md = [f"**{checker}** found issues:"] - for error in output.split("\n"): - if "syntax" in error.lower() and "error" in error.lower(): - continue # syntax errors already reported when running the cell - if m := re.match(rf".*{filename}[^\d]*(\d+)[^:]*:(.*)\[(.*)\]", error): - line = m.group(1) - msg = m.group(2) - code = m.group(3) - md.append( - rf"- {line}:{msg}\[[{code}](https://google.github.io/pytype/errors.html#{code})\]" - ) - if len(md) > 1: - display_markdown("\n".join(md), raw=True) + if output.stderr: + display_markdown(f"**{checker}** didn't check code:", raw=True) + print(output.stderr) + else: + md = [f"**{checker}** found issues:"] + for error in output.stdout.split("\n"): + if "syntax" in error.lower() and "error" in error.lower(): + continue # syntax errors already reported when running the cell + if "python-compiler-error" in error: + continue + if m := re.match(rf".*{filename}[^\d]*(\d+)[^:]*:(.*)\[(.*)\]", error): + line = m.group(1) + msg = m.group(2) + code = m.group(3) + md.append( + rf"- {line}:{msg}\[[{code}](https://google.github.io/pytype/errors.html#{code})\]" + ) + if len(md) > 1: + display_markdown("\n".join(md), raw=True) # register the supported checkers, their commands and the output processor checkers: dict[str, tuple[str, Callable]] = { "pytype": [["pytype"], show_pytype_errors], - "allowed": [["allowed"], show_errors], + "allowed": [["allowed"], show_allowed_errors], "ruff": [["ruff", "check", "--output-format", "json"], show_ruff_json], } # initially no checker is active @@ -80,153 +96,131 @@ def process_status(name: str, status: str) -> None: print(name, "was deactivated") -@magic_arguments() -@argument( - "status", - choices=["on", "off"], - type=str.lower, - help="Activate or deactivate the linter. If omitted, show the current status.", - nargs="?", - default=None, -) -@argument( - "-d", - "--disable", - default="name-error,import-error", - help="Comma or space-separated list of error names to ignore", -) @register_line_magic def pytype(line: str) -> None: """Activate/deactivate the [pytype linter](https://google.github.io/pytype). When active, the linter checks each code cell that is executed for type errors. - - `%pytype --disable ... on` activates the linter but does not check the given errors - (see the [list of errors](https://google.github.io/pytype/errors.html)) - - `%pytype on` is equal to `%pytype --disable name-error,import-error on` + - `%pytype on ...` activates the linter with the command options given after `on` + - `%pytype on` is equal to `%pytype on --disable name-error,import-error` - `%pytype off` deactivates the linter - `%pytype` shows the current status of the linter - `%pytype?` shows this documentation and the command's options + + For a list of possible options `...`, enter `!pytype -h` in a code cell. + Some options may not be appropriate when running pytype within a notebook. + + The `--disable` option expects a list of + [errors](https://google.github.io/pytype/errors.html) to ignore, without spaces. """ - args = parse_argstring(pytype, line) - checkers["pytype"][0] = ["pytype", "--disable", args.disable] - process_status("pytype", args.status) - - -@magic_arguments() -@argument( - "-c", - "--config", - default=None, - help="Use configuration file CONFIG (default: m269.json).", -) -@argument( - "status", - choices=["on", "off"], - type=str.lower, - help="Activate or deactivate the linter. If omitted, show the current status.", - nargs="?", - default=None, -) + parser = argparse.ArgumentParser("pytype") + parser.add_argument("status", + choices=["on", "off"], + type=str.lower, + help="Activate or deactivate the linter. If omitted, show the current status.", + nargs="?", + default=None, + ) + parser.add_argument("-d", "--disable", + default="name-error,import-error", + help="Comma or space-separated list of error names to ignore", + ) + known, unknown = parser.parse_known_args(line.split()) + if known.status != "on" and (known or unknown): + print("warning: ignoring additional options for %pytype") + else: + checkers["pytype"][0] = ["pytype", "--disable", known.disable] + unknown + process_status("pytype", known.status) + + @register_line_magic def allowed(line: str) -> None: """Activate/deactivate the [allowed linter](https://dsa-ou.github.io/allowed). - + When active, the linter checks each code cell that is executed for any Python constructs that are not listed in the given configuration file. - - `%allowed --config ... on` activates the linter with the given configuration, - which must be `m269.json`, `tm112.json` or - [one you defined](https://dsa-ou.github.io/allowed/docs/configuration.html) - - `%allowed on` is equal to `%allowed --config m269.json on` + - `%allowed on ...` activates the linter with any command options given after `on` + - `%allowed on` is equal to `%allowed on --config m269.json` - `%allowed off` deactivates the linter - `%allowed` shows the current status of the linter - `%allowed?` shows this documentation and the command's options + + For a list of possible options `...`, enter `!allowed -h` in a code cell. + Some options may not be appropriate when running `allowed` within a notebook. + + The `--config` option expects `m269.json`, `tm112.json` or the name of a JSON file + with your own [configuration](https://dsa-ou.github.io/allowed/docs/configuration.html). """ - args = parse_argstring(allowed, line) - if args.config: - checkers["allowed"][0] = ["allowed", "-c", f"{args.config}"] - process_status("allowed", args.status) - - -@magic_arguments() -@argument( - "status", - choices=["on", "off"], - type=str.lower, - help="Activate or deactivate the linter. If omitted, show the current status.", - nargs="?", - default=None, -) -@argument( - "--select", - help="Comma-separated list of rule codes to enable", - type=str, - default="A,B,C90,D,E,W,F,N,PL", -) -@argument( - "--ignore", - help="Comma-separated list of rule codes to ignore", - type=str, - default="D100,W292,F401,F821,D203,D213,D415", -) + parser = argparse.ArgumentParser("allowed") + parser.add_argument("status", + choices=["on", "off"], + type=str.lower, + help="Activate or deactivate the linter. If omitted, show the current status.", + nargs="?", + default=None, + ) + parser.add_argument("-c", "--config", + default=None, + help="Use configuration file CONFIG (default: m269.json).", + ) + known, unknown = parser.parse_known_args(line.split()) + if known.status != "on" and (known or unknown): + print("warning: ignoring additional options for %allowed") + else: + config = ["-c", known.config] if known.config else [] + checkers["allowed"][0] = ["allowed"] + config + unknown + process_status("allowed", known.status) + + @register_line_magic def ruff(line: str) -> None: """Activate/deactivate the [Ruff linter](https://docs.astral.sh/ruff). When active, the linter checks each code cell that is executed against the selected code style rules. - - - `%ruff --select ... --ignore ... on` activates the linter with the given rules - (see [the list of rules](https://docs.astral.sh/ruff/rules)) - - `%ruff on` is equal to `%ruff --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415 on` + + - `%ruff on ...` activates the linter with any command options given after `on` + (see [ruff's list of rules]) + - `%ruff on` is equal to `%ruff on --select A,B,C90,D,E,W,F,N,PL --ignore D100,W292,F401,F821,D203,D213,D415` - `%ruff off` deactivates the linter - `%ruff` shows the current status of the linter - - `%ruff?` shows this documentation and the command's options - """ - args = parse_argstring(ruff, line) - base = ["ruff", "check", "--output-format", "json"] - checkers["ruff"][0] = base + ["--select", args.select, "--ignore", args.ignore] - process_status("ruff", args.status) + - `%ruff?` shows this documentation + The command `%ruff on ...` will run `ruff check --output-format json ...` on each cell. + For a list of the possible options `...`, enter `!ruff help check` in a code cell. + Some options may not be appropriate when running Ruff within a notebook. -# TODO: add an option to set the output processor function -@register_line_magic -def checker(line: str) -> None: - """Define or turn on/off a given checker.""" - global checkers, active - - args = line.split() - if len(args) == 0: - print("Active checkers:", *active) - print("Inactive checkers:", *(set(checkers) - active)) - return - - name = args[0] - if len(args) == 1: - if name not in checkers: - print(f"Checker {name} isn't defined.") - else: - status = "active" if name in active else "inactive" - print(f"Checker {name} is {status} and defined as '{checkers[name]}'.") - elif args[1].lower() == "on": - if name not in checkers: - print(f"Error: checker {name} isn't defined.") - else: - active.add(name) - print(f"Checker {name} has been activated.") - elif args[1].lower() == "off": - if name not in checkers: - print(f"Error: checker {name} isn't defined.") - else: - active.discard(name) - print(f"Checker {name} has been deactivated.") + The `--select` and `--ignore` options expect a list + of [rule codes](https://docs.astral.sh/ruff/rules), without spaces. + """ + parser = argparse.ArgumentParser("ruff") + parser.add_argument("status", + choices=["on", "off"], + type=str.lower, + help="Activate or deactivate the linter. If omitted, show the current status.", + nargs="?", + default=None, + ) + parser.add_argument("--select", + help="Comma-separated list of rule codes to enable", + type=str, + default="A,B,C90,D,E,W,F,N,PL", + ) + parser.add_argument("--ignore", + help="Comma-separated list of rule codes to ignore", + type=str, + default="D100,W292,F401,F821,D203,D213,D415", + ) + known, unknown = parser.parse_known_args(line.split()) + if known.status != "on" and (known or unknown): + print("warning: ignoring additional options for %ruff") else: - command = " ".join(args[1:]) - status = "redefined" if name in checkers else "defined" - checkers[name] = command - active.add(name) - print(f"Checker {name} has been {status} and activated.") + base = ["ruff", "check", "--output-format", "json"] + rules = ["--select", known.select, "--ignore", known.ignore] + checkers["ruff"][0] = base + rules + unknown + process_status("ruff", known.status) def run_checkers(result) -> None: @@ -245,7 +239,7 @@ def run_checkers(result) -> None: try: output = subprocess.run( lint_file, capture_output=True, text=True, check=False, - ).stdout + ) display(checker, output, temp_name) except Exception as e: print(f"Error on executing {command[0]}:\n{e}") @@ -256,11 +250,11 @@ def run_checkers(result) -> None: def load_ipython_extension(ipython): - """Loads the ipython extension, and registers run_checkers with post_cell_run - + """Load the ipython extension, and register run_checkers with post_cell_run + This function hooks into the ipython extension system so the magic commands defined - in this module can be loaded with `load_ext algoesup.magics`. It also registers - `run_checkers` with the `post_run_cell` event so the linters are run with the + in this module can be loaded with `load_ext algoesup.magics`. It also registers + `run_checkers` with the `post_run_cell` event so the linters are run with the contents of each ipython cell after it has been executed. """ ipython.events.register("post_run_cell", run_checkers) # type: ignore[name-defined] diff --git a/src/docs/essays/example-1-to-n.ipynb b/src/docs/essays/example-1-to-n.ipynb index b299d0e..650fe79 100644 --- a/src/docs/essays/example-1-to-n.ipynb +++ b/src/docs/essays/example-1-to-n.ipynb @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -76,7 +76,7 @@ "# check the code for style violations\n", "%ruff on\n", "# check that only the subset of Python taught in our introductory course TM112 is used\n", - "%allowed --config tm112.json on" + "%allowed on --config tm112.json" ] }, { diff --git a/src/docs/writing.md b/src/docs/writing.md index e6845d5..214efa7 100644 --- a/src/docs/writing.md +++ b/src/docs/writing.md @@ -265,7 +265,7 @@ You may thus wish to initially turn off the type checking, with `%pytype off`, and only turn it on after all code is written and tested. You will have to run all cells of your notebook for the type checking to take place. -For a list of all the options for the `%pytype` command, +For a list of all the options for the `%pytype` command, see the [library reference](library.md#algoesup.magics.pytype). ### Formatting @@ -290,7 +290,7 @@ so you will see fewer warnings in the next stage. You should **lint** your code, which means to check it for style violations. #### Code style -Our library supports `ruff`, the fastest Python linter. +Our library supports `ruff`, the fastest Python linter. To turn it on, write the following after loading the `algoesup.magics` extension. ```python %ruff on @@ -381,7 +381,7 @@ if it has several constructs that weren't taught, none of them is reported. The `allowed` linter also includes the configuration for TM112, our introductory Computing course, in case you want to use even fewer constructs in your essay. -To use that configuration, write `%allowed --config tm112.json on`. +To use that configuration, write `%allowed on --config tm112.json`. For a list of all the options for the `%allowed` command, see the [library reference](library.md#algoesup.magics.allowed).