From 2aa419816ac160804308f265fd1a34732096008f Mon Sep 17 00:00:00 2001 From: Nick Ruest Date: Sun, 3 Mar 2019 15:25:46 -0500 Subject: [PATCH] Rename existing notebook and create generic notebook (#11) - Rename notebook files - Create example notebook with existing data - Create notebook with empty collection id, and help text - Update language in README and notebooks - Tweak Dockerfile - Resolves #9 --- Dockerfile | 10 ++++--- README.md | 24 ++++++++-------- ...tebook.ipynb => auk-notebook-example.ipynb | 26 +++++------------ ...otebook_OUTPUT.ipynb => auk-notebook.ipynb | 28 ++++++------------- 4 files changed, 35 insertions(+), 53 deletions(-) rename AUK_Notebook.ipynb => auk-notebook-example.ipynb (98%) rename AUK_Notebook_OUTPUT.ipynb => auk-notebook.ipynb (97%) diff --git a/Dockerfile b/Dockerfile index 6a54a46..f6a39f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,11 @@ # Ubuntu 18.04 +# https://github.com/jupyter/docker-stacks/blob/master/base-notebook/Dockerfile FROM jupyter/base-notebook # Metadata LABEL maintainer="Nick Ruest " LABEL description="Docker image for the Archives Unleashed Notebooks" -LABEL website="http://archivesunleashed.org/" +LABEL website="https://archivesunleashed.org/" RUN pip install ggplot==0.11.5 \ matplotlib==3.0.2 \ @@ -19,10 +20,11 @@ RUN sed -i 's/pandas.lib/pandas/g' /opt/conda/lib/python3.7/site-packages/ggplot # Make things cleaner in Notebook RUN rm -rf $HOME/work -# Copy auk-notebook stuff over. +# Copy auk-notebook files over. COPY data $HOME/data COPY nltk_data $HOME/nltk_data -COPY AUK_Notebook.ipynb $HOME -COPY AUK_Notebook_OUTPUT.ipynb $HOME +COPY auk-notebook.ipynb $HOME +COPY auk-notebook-example.ipynb $HOME +# Start Jupyter Notebook CMD ["start.sh", "jupyter", "notebook"] diff --git a/README.md b/README.md index d5628dc..1edfbe5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Archives Unleashed Cloud: Jupyter Notebooks -A prototype Jupyter notebook to assist in creating additional visualizations from Archives Unleashed Cloud derivatives. +[Jupyter](https://jupyter.org/) notebooks to assist in creating additional analysis and visualizations of Archives Unleashed Cloud derivatives. ![notebook screenshot](https://user-images.githubusercontent.com/3834704/53252943-1a89b880-368e-11e9-9a9a-31c43a045a55.png) @@ -8,7 +8,7 @@ A prototype Jupyter notebook to assist in creating additional visualizations fro Jupyter Notebook. Follow the installation instructions on [their website](https://jupyter.org). -Dependencies. Any version higher than below should work: +Dependencies. Any version higher than below _should_ work: * Python 3.7 * ggplot (0.11.5) @@ -18,13 +18,18 @@ Dependencies. Any version higher than below should work: * networkx (2.2) * nltk (3.4) -## Installation +## Usage -Download this notebook from the Archives Unleashed Cloud as a derivative (or from here). Place the Cloud derivatives in a directory labelled `data`, which should be housed in the same directory that you are running the notebook from. +We suggest using [Docker](https://www.docker.com/get-started): -This repository comes with sample data, you can swap out the sample data with your own Cloud data. +```bash +git clone https://github.com/archivesunleashed/auk-notebooks.git +cd auk-notebooks +docker build -t auk-notebook . +docker run --rm -it -p 8888:8888 auk-notebook +``` -To run this sample: +If you have the dependencies installed: ```bash git clone https://github.com/archivesunleashed/auk-notebooks.git @@ -32,13 +37,10 @@ cd auk-notebooks jupyter notebook ``` -## Docker +This repository comes with sample data, you can swap out the sample data with your own Cloud data. ```bash -git clone https://github.com/archivesunleashed/auk-notebooks.git -cd auk-notebooks -docker build -t auk-notebook . -docker run --rm -it -p 8888:8888 auk-notebook +docker run --rm -it -p 8888:8888 -v "/path/to/own/data:/home/jovyan/data" auk-notebook ``` ## Contributing diff --git a/AUK_Notebook.ipynb b/auk-notebook-example.ipynb similarity index 98% rename from AUK_Notebook.ipynb rename to auk-notebook-example.ipynb index 01e18f7..1949e84 100644 --- a/AUK_Notebook.ipynb +++ b/auk-notebook-example.ipynb @@ -10,23 +10,23 @@ "\n", "# Welcome\n", "\n", - "Welcome to the Archives Unleashed Cloud Visualization Demo Jupyter Notebook. This demonstration takes the main derivatives from the Cloud and uses Python to analyze and produce information about your collection.\n", + "Welcome to the Archives Unleashed Cloud Jupyter Notebook. This demonstration takes the main derivatives from the Cloud and uses Python to analyze and produce information about your collection.\n", "\n", - "This product is in beta, so if you encounter any issues, please post an [issue in our Github repository](https://github.com/archivesunleashed/auk/issues) to let us know about any bugs you encountered or features you would like to see included.\n", + "Please feel free to create an [issue](https://github.com/archivesunleashed/auk/issues) to let us know about any bugs you encountered or improvements you would like to see.\n", "\n", - "If you have some basic Python coding experience, you can change the provided code to suit your own needs.\n", + "If you have Python experience, please feel free to change the provided code to suit your own needs.\n", "\n", "We recommend that you use `File > Make a Copy` first before changing the code in the repository. That way, you can always return to the basic visualizations we have offered here. Of course, you can also just re-download the Jupyter Notebook file from your Archives Unleashed Cloud account.\n", "\n", "### How Jupyter Notebooks Work:\n", "\n", - "If you have no previous experience of Jupyter Notebooks, the most important thing to understand is that that ` + ` will run the python code inside a window and output it to the site.\n", + "If you have no previous experience with Jupyter Notebooks, the most important thing to understand is that that ` + ` will run the Python code inside a cell and output it to below the cell.\n", " \n", - "The cells that cover the required inputs, marked \"Setup\", need to be run before the rest of the notebook will work. These will import all the libraries and set basic variables (e.g. where your derivative files are located) for the notebook. After that, everything else should be able to run on its own.\n", + "The cells that cover the required inputs, marked \"Setup\", need to be run before the rest of the notebook will work. These cells will import all the libraries and set basic variables (e.g. where your derivative files are located) for the notebook. After that, everything else should be able to run on its own.\n", "\n", "If you just want to see all results for your collection, use `Cell > Run All`.\n" ] - }, + }, { "cell_type": "markdown", "metadata": {}, @@ -38,19 +38,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.7/site-packages/ggplot/utils.py:81: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n", - "You can access Timestamp as pandas.Timestamp\n", - " pd.tslib.Timestamp,\n", - "/usr/local/lib/python3.7/site-packages/nltk/draw/__init__.py:15: UserWarning: nltk.draw package not loaded (please install Tkinter library).\n", - " warnings.warn(\"nltk.draw package not loaded \" \"(please install Tkinter library).\")\n" - ] - } - ], + "outputs": [], "source": [ "# Required imports from sys\n", "\n", diff --git a/AUK_Notebook_OUTPUT.ipynb b/auk-notebook.ipynb similarity index 97% rename from AUK_Notebook_OUTPUT.ipynb rename to auk-notebook.ipynb index 01e18f7..46fd9d4 100644 --- a/AUK_Notebook_OUTPUT.ipynb +++ b/auk-notebook.ipynb @@ -10,19 +10,19 @@ "\n", "# Welcome\n", "\n", - "Welcome to the Archives Unleashed Cloud Visualization Demo Jupyter Notebook. This demonstration takes the main derivatives from the Cloud and uses Python to analyze and produce information about your collection.\n", + "Welcome to the Archives Unleashed Cloud Jupyter Notebook. This demonstration takes the main derivatives from the Cloud and uses Python to analyze and produce information about your collection.\n", "\n", - "This product is in beta, so if you encounter any issues, please post an [issue in our Github repository](https://github.com/archivesunleashed/auk/issues) to let us know about any bugs you encountered or features you would like to see included.\n", + "Please feel free to create an [issue](https://github.com/archivesunleashed/auk/issues) to let us know about any bugs you encountered or improvements you would like to see.\n", "\n", - "If you have some basic Python coding experience, you can change the provided code to suit your own needs.\n", + "If you have Python experience, please feel free to change the provided code to suit your own needs.\n", "\n", "We recommend that you use `File > Make a Copy` first before changing the code in the repository. That way, you can always return to the basic visualizations we have offered here. Of course, you can also just re-download the Jupyter Notebook file from your Archives Unleashed Cloud account.\n", "\n", "### How Jupyter Notebooks Work:\n", "\n", - "If you have no previous experience of Jupyter Notebooks, the most important thing to understand is that that ` + ` will run the python code inside a window and output it to the site.\n", + "If you have no previous experience with Jupyter Notebooks, the most important thing to understand is that that ` + ` will run the Python code inside a cell and output it to below the cell.\n", " \n", - "The cells that cover the required inputs, marked \"Setup\", need to be run before the rest of the notebook will work. These will import all the libraries and set basic variables (e.g. where your derivative files are located) for the notebook. After that, everything else should be able to run on its own.\n", + "The cells that cover the required inputs, marked \"Setup\", need to be run before the rest of the notebook will work. These cells will import all the libraries and set basic variables (e.g. where your derivative files are located) for the notebook. After that, everything else should be able to run on its own.\n", "\n", "If you just want to see all results for your collection, use `Cell > Run All`.\n" ] @@ -38,19 +38,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.7/site-packages/ggplot/utils.py:81: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n", - "You can access Timestamp as pandas.Timestamp\n", - " pd.tslib.Timestamp,\n", - "/usr/local/lib/python3.7/site-packages/nltk/draw/__init__.py:15: UserWarning: nltk.draw package not loaded (please install Tkinter library).\n", - " warnings.warn(\"nltk.draw package not loaded \" \"(please install Tkinter library).\")\n" - ] - } - ], + "outputs": [], "source": [ "# Required imports from sys\n", "\n", @@ -69,7 +57,9 @@ "from nltk.sentiment.util import *\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "\n", - "coll_id = \"4656\"\n", + "# Add the collection id of your Archive-It collection:\n", + "coll_id = \"\"\n", + "# Change the path to your derivatives files if they are not in the data directory.\n", "auk_fp = \"./data/\"\n", "auk_full_text = auk_fp + coll_id + \"-fulltext.txt\"\n", "auk_gephi = auk_fp + coll_id + \"-gephi.gexf\"\n",