-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.sh
executable file
·39 lines (27 loc) · 1.24 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
#
# These are the S3 buckets that are used. They must exist before the script is
# used. Change these to buckets that you own before running this file.
#
LOGS=logs
INPUT=analysisfiles
OUTPUT=results
#
# Copy the files to S3
#
zip -j cache.zip include/functions.R include/parameters.R include/plspm.R include/plspm-internal.R
# You must have s3cmd properly installed.
s3cmd put cache.zip input.txt map.R reduce.R bootstrap.sh s3://$INPUT/
#
# Sets up a map reduce job on Amazon cloud. The embedded shell script sets the
# number of reduce tasks to equal the number of input file. This is not
# nescessary, but it will speed up getting the first results.
#
elastic-mapreduce --create --stream --input s3n://$INPUT/input.txt --output s3n://$OUTPUT/ --mapper s3n://$INPUT/map.R --reducer s3n://$INPUT/reduce.R --cache-archive s3n://$INPUT/cache.zip#include --log-uri s3://$LOGS/ --instance-type c1.medium --instance-count 20 --bootstrap-action s3n://$INPUT/bootstrap.sh --bootstrap-action s3://elasticmapreduce/bootstrap-actions/configure-hadoop --args "-m,mapred.reduce.tasks=`cat input.txt | wc -l | sed 's/^[^0-9]*//'`"
#
# Instance types that can be used (these provide the most bang for the buck)
#
# m1.small
# c1.medium
# c1.xlarge
#