-
Notifications
You must be signed in to change notification settings - Fork 1
/
Integrate with Machine Learning APIs: Challenge Lab
237 lines (97 loc) · 5.49 KB
/
Integrate with Machine Learning APIs: Challenge Lab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
FIRST AND SECOND TASK TOGETHER ---------
export SANAME=challenge
gcloud iam service-accounts create $SANAME
gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/bigquery.admin
gcloud projects add-iam-policy-binding $DEVSHELL_PROJECT_ID --member=serviceAccount:$SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com --role=roles/storage.admin
gcloud iam service-accounts keys create sa-key.json --iam-account $SANAME@$DEVSHELL_PROJECT_ID.iam.gserviceaccount.com
export GOOGLE_APPLICATION_CREDENTIALS=${PWD}/sa-key.json
THIRD TASK ----------------
gsutil cp gs://$DEVSHELL_PROJECT_ID/analyze-images.py .
REPLACE THE CONTENT OF "analyze-images.py" FILE WITH THIS --------
# Dataset: image_classification_dataset
# Table name: image_text_detail
import os
import sys
# Import Google Cloud Library modules
from google.cloud import storage, bigquery, language, vision, translate_v2
if ('GOOGLE_APPLICATION_CREDENTIALS' in os.environ):
if (not os.path.exists(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])):
print ("The GOOGLE_APPLICATION_CREDENTIALS file does not exist.\n")
exit()
else:
print ("The GOOGLE_APPLICATION_CREDENTIALS environment variable is not defined.\n")
exit()
if len(sys.argv)<3:
print('You must provide parameters for the Google Cloud project ID and Storage bucket')
print ('python3 '+sys.argv[0]+ '[PROJECT_NAME] [BUCKET_NAME]')
exit()
project_name = sys.argv[1]
bucket_name = sys.argv[2]
# Set up our GCS, BigQuery, and Natural Language clients
storage_client = storage.Client()
bq_client = bigquery.Client(project=project_name)
nl_client = language.LanguageServiceClient()
# Set up client objects for the vision and translate_v2 API Libraries
vision_client = vision.ImageAnnotatorClient()
translate_client = translate_v2.Client()
# Setup the BigQuery dataset and table objects
dataset_ref = bq_client.dataset('image_classification_dataset')
dataset = bigquery.Dataset(dataset_ref)
table_ref = dataset.table('image_text_detail')
table = bq_client.get_table(table_ref)
# Create an array to store results data to be inserted into the BigQuery table
rows_for_bq = []
# Get a list of the files in the Cloud Storage Bucket
files = storage_client.bucket(bucket_name).list_blobs()
bucket = storage_client.bucket(bucket_name)
print('Processing image files from GCS. This will take a few minutes..')
# Process files from Cloud Storage and save the result to send to BigQuery
for file in files:
if file.name.endswith('jpg') or file.name.endswith('png'):
file_content = file.download_as_string()
# TBD: Create a Vision API image object called image_object
# Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image
from google.cloud import vision_v1
import io
client = vision.ImageAnnotatorClient()
# TBD: Detect text in the image and save the response data into an object called response
# Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection
image = vision_v1.types.Image(content=file_content)
response = client.text_detection(image=image)
# Save the text content found by the vision API into a variable called text_data
text_data = response.text_annotations[0].description
# Save the text detection response data in <filename>.txt to cloud storage
file_name = file.name.split('.')[0] + '.txt'
blob = bucket.blob(file_name)
# Upload the contents of the text_data string variable to the Cloud Storage file
blob.upload_from_string(text_data, content_type='text/plain')
# Extract the description and locale data from the response file
# into variables called desc and locale
# using response object properties e.g. response.text_annotations[0].description
desc = response.text_annotations[0].description
locale = response.text_annotations[0].locale
# if the locale is English (en) save the description as the translated_txt
if locale == 'en':
translated_text = desc
else:
# TBD: For non EN locales pass the description data to the translation API
# ref: https://googleapis.dev/python/translation/latest/client.html#google.cloud.translate_v2.client.Client.translate
# Set the target_language locale to 'en')
from google.cloud import translate_v2 as translate
client = translate.Client()
translation = translate_client.translate(text_data, target_language='en')
translated_text = translation['translatedText']
print(translated_text)
# if there is response data save the original text read from the image,
# the locale, translated text, and filename
if len(response.text_annotations) > 0:
rows_for_bq.append((desc, locale, translated_text, file.name))
print('Writing Vision API image data to BigQuery...')
# Write original text, locale and translated text to BQ
# TBD: When the script is working uncomment the next line to upload results to BigQuery
errors = bq_client.insert_rows(table, rows_for_bq)
assert errors == []
4TH TASK -------------
python3 analyze-images.py $DEVSHELL_PROJECT_ID $DEVSHELL_PROJECT_ID
Thankyou!
:)