Skip to content

Commit

Permalink
Lab 4 working
Browse files Browse the repository at this point in the history
  • Loading branch information
lakshmanok committed Mar 24, 2017
1 parent 2b3535e commit 73c0efa
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 115 deletions.
204 changes: 89 additions & 115 deletions courses/unstructured/ML-Tests-Solution.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 7,
"metadata": {
"collapsed": false
},
Expand All @@ -11,23 +11,44 @@
"name": "stdout",
"output_type": "stream",
"text": [
"ENTER API KEY HERE\n",
"ENTER-PROJECT-ID-HERE\n"
"AIzaSyAZVBIe2yMB7TckGFRBvnoumMX_zMtM6LU\n",
"cloud-training-demos\n"
]
}
],
"source": [
"APIKEY=\"ENTER API KEY HERE\"\n",
"APIKEY=\"AIzaSyAZVBIe2yMB7TckGFRBvnoumMX_zMtM6LU\" # CHANGE\n",
"print APIKEY\n",
"\n",
"PROJECT_ID = \"cloud-training-demos\" # CHANGE\n",
"print PROJECT_ID \n",
"\n",
"PROJECT_ID = \"ENTER-PROJECT-ID-HERE\"\n",
"print PROJECT_ID "
"BUCKET = \"cloud-training-demos-ml\" # CHANGE"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"os.environ['BUCKET'] = BUCKET\n",
"os.environ['PROJECT'] = PROJECT_ID"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h2> Finding specific text in a corpus of scanned documents </h2>"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
Expand All @@ -38,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 35,
"metadata": {
"collapsed": false
},
Expand All @@ -47,30 +68,31 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{u'results': [{u'alternatives': [{u'confidence': 0.98267895, u'transcript': u'how old is the Brooklyn Bridge'}]}]}\n",
"how old is the Brooklyn Bridge\n"
"['gs://cloud-training-demos-ml/unstructured/photos/snapshot1.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot2.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot3.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot4.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot5.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot6.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot7.png', 'gs://cloud-training-demos-ml/unstructured/photos/snapshot8.png', '']\n"
]
}
],
"source": [
"sservice = build('speech', 'v1beta1', developerKey=APIKEY)\n",
"response = sservice.speech().syncrecognize(\n",
" body={\n",
" 'config': {\n",
" 'encoding': 'LINEAR16',\n",
" 'sampleRate': 16000\n",
" },\n",
" 'audio': {\n",
" 'uri': 'gs://cloud-training-demos/vision/audio.raw'\n",
" }\n",
" }).execute()\n",
"print response\n",
"print response['results'][0]['alternatives'][0]['transcript']"
"import subprocess\n",
"images = subprocess.check_output([\"gsutil\", \"ls\", \"gs://{}/unstructured/photos\".format(BUCKET)])\n",
"images = images.split('\\n')\n",
"print images"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are a few of the images we are going to search.\n",
"\n",
"<img src=\"https://storage.googleapis.com/cloud-training-demos-ml/unstructured/photos/snapshot1.png\" />\n",
"<img src=\"https://storage.googleapis.com/cloud-training-demos-ml/unstructured/photos/snapshot2.png\" />\n",
"<img src=\"https://storage.googleapis.com/cloud-training-demos-ml/unstructured/photos/snapshot5.png\" />"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 37,
"metadata": {
"collapsed": false
},
Expand All @@ -79,117 +101,69 @@
"name": "stdout",
"output_type": "stream",
"text": [
"dog\n",
"mammal\n",
"vertebrate\n",
"dog breed group\n",
"dog like mammal\n",
"puppy\n",
"dog crossbreeds\n"
"gs://cloud-training-demos-ml/unstructured/photos/snapshot1.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot2.png\n",
"image=gs://cloud-training-demos-ml/unstructured/photos/snapshot2.png contains the following text: ARR DELAY\n",
"DEP DELAY\n",
"count 45792.000000 46057.000000\n",
"mean 45.797650 50.822068\n",
"std 62.863612 61.079590\n",
"min -46.000000 10.000000\n",
"25%\n",
"11.000000\n",
"17.000000\n",
"5096 27000000 30.000000\n",
"75% 59000000 60.000000\n",
"max 1321.000000 1330.000000\n",
"\n",
"image=gs://cloud-training-demos-ml/unstructured/photos/snapshot2.png contains the following text: 1321.000000\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot3.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot4.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot5.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot6.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot7.png\n",
"gs://cloud-training-demos-ml/unstructured/photos/snapshot8.png\n",
"\n"
]
}
],
"source": [
"\n",
"# Running Vision API\n",
"# Running Vision API to find images that have a specific search term\n",
"import base64\n",
"IMAGE=\"gs://BUCKET_NAME/unstructured/noirbree.jpg\"\n",
"vservice = build('vision', 'v1', developerKey=APIKEY)\n",
"request = vservice.images().annotate(body={\n",
"\n",
"SEARCH_TERM = u\"1321\"\n",
"\n",
"for IMAGE in images:\n",
" print IMAGE\n",
" vservice = build('vision', 'v1', developerKey=APIKEY)\n",
" request = vservice.images().annotate(body={\n",
" 'requests': [{\n",
" 'image': {\n",
" 'source': {\n",
" 'gcs_image_uri': IMAGE\n",
" }\n",
" },\n",
" 'features': [{\n",
" 'type': 'LABEL_DETECTION',\n",
" 'type': 'TEXT_DETECTION',\n",
" 'maxResults': 100,\n",
" }]\n",
" }],\n",
" })\n",
"outputs = request.execute(num_retries=3)\n",
"#print outputs\n",
"\n",
"for output in outputs['responses'][0]['labelAnnotations']:\n",
" print u\"{0}\".format(output['description'])"
" outputs = request.execute(num_retries=3)\n",
" # print outputs\n",
" if 'responses' in outputs and len(outputs['responses']) > 0 and 'textAnnotations' in outputs['responses'][0]:\n",
" for output in outputs['responses'][0]['textAnnotations']:\n",
" if SEARCH_TERM in output['description']:\n",
" print u\"image={} contains the following text: {}\".format(IMAGE, output['description'])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Added executeTranslate() function.\n"
]
}
],
"source": [
"def executeTranslate(inputs):\n",
" from googleapiclient.discovery import build\n",
" service = build('translate', 'v2', developerKey=APIKEY)\n",
" translator = service.translations()\n",
" outputs = translator.list(source='en', target='es', q=inputs).execute() \n",
" return outputs['translations'][0]['translatedText']\n",
"\n",
"print \"Added executeTranslate() function.\""
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[u'Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do', u'']\n",
"[u' Once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, \"and what is the use of a book,\" thought Alice, \"without pictures or conversations?\" So she was considering in her own mind (as well as she could, for the day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her', u'']\n",
"[u' There was nothing so very remarkable in that, nor did Alice think it so very much out of the way to hear the Rabbit say to itself, \"Oh dear! Oh dear! I shall be too late!\" But when the Rabbit actually took a watch out of its waistcoat-pocket and looked at it and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and, burning with curiosity, she ran across the field after it and was just in time to see it pop down a large rabbit-hole, under the hedge', u'']\n",
"[u' In another moment, down went Alice after it', u'']\n"
]
}
],
"source": [
"alice = sc.textFile(\"gs://BUCKET_NAME/unstructured/alice-short-transformed.txt\")\n",
"alice = alice.map(lambda x: x.split(\".\"))\n",
" \n",
"for eachSentence in alice.take(10):\n",
" print u\"{0}\".format(eachSentence)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alice estaba empezando a cansarse de estar sentada por su hermana en la orilla, y de no tener nada que hacer\n",
"Una o dos veces se había asomado al libro que su hermana estaba leyendo, pero no tenía dibujos ni diálogos en ella, &quot;y lo que es el uso de un libro&quot;, pensó Alicia, &quot;sin dibujos ni diálogos?&quot; Así que estaba pensando en su propia mente (así como pudo, para el día hacía sentirse muy somnoliento y estúpido), si el placer de hacer una conexión en cadena valdría la pena el esfuerzo de levantarse y coger las margaritas, cuando de repente, un conejo blanco con ojos rosas pasó corriendo junto a ella\n",
"No había nada tan muy notable, ya que, ni le pareció a Alicia por lo mucho fuera del camino para escuchar el conejo se decía a sí mismo, &quot;Oh cielos! ¡Ay! Voy a ser demasiado tarde!&quot; Pero cuando el conejo sacó un reloj fuera de su bolsillo del chaleco y lo miró y luego apretó el paso, Alice empezó a ponerse en pie, ya que cruzó por su mente que ella nunca antes había visto un conejo, ya sea con un bolsillo del chaleco, o un reloj para sacar de ella, y, muerta de curiosidad, corrió a través del campo y después de que fue justo a tiempo para ver cómo se precipitaba en una gran madriguera bajo el seto\n",
"En otro momento, se fue hacia abajo después de que Alice\n"
]
}
],
"cell_type": "markdown",
"metadata": {},
"source": [
"aliceTranslated = alice.map(executeTranslate)\n",
"<h2> Running in parallel using Spark </h2>\n",
"\n",
"for eachSentance in aliceTranslated.take(10):\n",
" print u\"{0}\".format(eachSentance)"
"As the number of items increases, we need to parallelize the calls. Here, we do sentiment analysis on a bunch of text in parallel."
]
},
{
Expand Down Expand Up @@ -313,7 +287,7 @@
"sentiments = comments.map(executeSentimentAnalysis)\n",
"\n",
"for sentiment in sentiments.collect():\n",
" print u\"Score:{0} and Magnitde:{1}\".format(sentiment['documentSentiment']['score'], sentiment['documentSentiment']['magnitude'])"
" print u\"Score:{0} and Magnitude:{1}\".format(sentiment['documentSentiment']['score'], sentiment['documentSentiment']['magnitude'])"
]
},
{
Expand Down
Binary file added courses/unstructured/photos/snapshot1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added courses/unstructured/photos/snapshot8.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions courses/unstructured/replace_and_upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ done
# first the originals, then the modified
gsutil -m cp * gs://$BUCKET/unstructured
gsutil -m cp $TEMP/* gs://$BUCKET/unstructured

# photos ...
gsutil -m cp photos/* gs://$BUCKET/unstructured/photos
gsutil acl ch -g AllUsers:R gs://$BUCKET/unstructured/photos/*

0 comments on commit 73c0efa

Please sign in to comment.