diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47cdb03 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +CJlog +/src/external/firebase/t +/example/Python/pytorch/mnist/data diff --git a/.ssh_config.md5 b/.ssh_config.md5 new file mode 100644 index 0000000..2462c21 --- /dev/null +++ b/.ssh_config.md5 @@ -0,0 +1 @@ +MD5 (/Users/hatef/github_projects/CJ/clusterjob/ssh_config) = 2b4285a9e9bbc85dec08872803880058 diff --git a/README.md b/README.md index b650f1a..12adf50 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ Clusterjob, hereafter CJ, is an automation software system, written mainly in pe computational jobs to clusters in a hassle-free and reproducible manner. CJ produces 'reporoducible' computational packages for academic publications at no-cost. CJ project started in 2013 at Stanford University by Hatef Monajemi and his PhD advisor David L. Donoho with the goal of encouraging more efficient and reproducible research paradigm. -CJ is currently under development. Current implementation allows submission of MATLAB jobs. +CJ is currently under development. Current implementation allows submission of MATLAB and Python jobs. In the future versions, we hope to include other heavily used programming languages -such as Python and R. +such as R and Julia. You can read more about CJ on http://clusterjob.org diff --git a/cj_config b/cj_config index 7a30324..80892f0 100644 --- a/cj_config +++ b/cj_config @@ -1,4 +1,5 @@ CJID moosh CJKEY eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhZG1pbiI6ZmFsc2UsImRlYnVnIjpmYWxzZSwiZCI6eyJ1aWQiOiJtb29zaCIsImNqcGFzc2NvZGUiOiIyYjM0ZTBiZTM2MDljYzE4MzQwMDg4MGZiNTEwMzhlNiJ9LCJ2IjowLCJpYXQiOjAsImV4cCI6MjE0NzQ4MzY0N30.kPWllSAJTLeZAYlkNW81JQaiKip2onnAxaL__z7FKRg SYNC_TYPE manual -SYNC_INTERVAL 300 \ No newline at end of file +SYNC_INTERVAL 300 + diff --git a/dep.pl b/dep.pl new file mode 100644 index 0000000..e72798a --- /dev/null +++ b/dep.pl @@ -0,0 +1 @@ +sudo cpan -i Data::Dumper Data::UUID FindBin File::chdir File::Basename File::Spec IO::Socket::INET IO::Socket::SSL Getopt::Declare Term::ReadLine JSON::PP JSON::XS Digest::SHA Time::Local Time::Piece Moo HTTP::Thin HTTP::Request::Common JSON URI diff --git a/example/MATLAB/.hatef b/example/MATLAB/.hatef new file mode 100644 index 0000000..e69de29 diff --git a/example/MATLAB/dev-cj b/example/MATLAB/dev-cj new file mode 100755 index 0000000..d9ed1ab --- /dev/null +++ b/example/MATLAB/dev-cj @@ -0,0 +1,14 @@ +#!/bin/bash +if [ "$1" == "run" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl run myMatlabJob.m sherlock -dep ./ -m "test of CJ run" -mem "2G" +elif [ "$1" == "rrun" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl rrun simpleExample.m sherlock -dep ./ -m "test of CJ simple rrun" -mem "2G" +elif [ "$1" == "parrun" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl parrun simpleExample.m sherlock -dep ./ -m "test of CJ simple parrun" -mem "2G" +elif [ "$1" == "parrun" ] && [ "$2" == "matlab" ];then +perl ../../src/CJ.pl parrun myMatlabJob.m sherlock -dep ./ -m "test of CJ parrun -- invoking Matlab for range" -mem "2G" +elif [ "$1" == "reduce" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl reduce Results.txt +else +perl ../../src/CJ.pl "$@" +fi diff --git a/example/MATLAB/example_reduce.m b/example/MATLAB/example_reduce.m index 82358a2..5be7cdf 100644 --- a/example/MATLAB/example_reduce.m +++ b/example/MATLAB/example_reduce.m @@ -7,13 +7,16 @@ clc + +L = [1/8,2/8,3/8,4,5, 7e-1/8] + % Always initiate your outputs -output.myStructCell = cell(5,5); -output.myCharCell = cell(5,5); -output.myMatrix = zeros(5,5); +% otherwise reduce will not work +output.myStructCell = cell(6,5); +output.myCharCell = cell(6,5); +output.myMatrix = zeros(6,5); -L = [1/8,2/8,3/8,4,5, 7e-1/8] for i = 1:length(L) for j = 1:5 diff --git a/example/MATLAB/myMatlabJob.m b/example/MATLAB/myMatlabJob.m index e3e3ef3..2810558 100644 --- a/example/MATLAB/myMatlabJob.m +++ b/example/MATLAB/myMatlabJob.m @@ -15,19 +15,19 @@ fprintf(fid, '%s, %s, %s, %s\n','SUID','counter1', 'counter2','random_number'); for i = 1:length(l) - for j = 1:length(k) - - counter = (i-1)*length(k) + j ; - % open a file for testing gather - file2 = sprintf('file_%i', counter); - fid2 = fopen(file2,'at'); - fprintf(fid2, '%i\n', counter); - fclose(fid2) - - - % File for testing reduce - fprintf(fid, '%s, %i,%i, %f\n', SUID,i,j,r(j)); - end +for j = 1:length(k) + +counter = (i-1)*length(k) + j ; +% open a file for testing gather +file2 = sprintf('file_%i', counter); +fid2 = fopen(file2,'at'); +fprintf(fid2, '%i\n', counter); +fclose(fid2) + + +% File for testing reduce +fprintf(fid, '%s, %i,%i, %f\n', SUID,i,j,r(j)); +end end fclose(fid); diff --git a/example/MATLAB/r_superres1d.m b/example/MATLAB/r_superres1d.m new file mode 100644 index 0000000..fdafa06 --- /dev/null +++ b/example/MATLAB/r_superres1d.m @@ -0,0 +1,58 @@ + +close all +clear all + + + +pd.fc=10; +pd.SRFid=30; % must be even + +pd.method='l2constr'; +pd.oracle='nooracle'; + +pd.fid=pd.SRFid*pd.fc; +pd.fhi=pd.fc:pd.fc:pd.fid; +pd.SRF=pd.fhi/pd.fc; +pd.N=2*pd.fid; %must be even + +pd.dpoints=2; % number of independent examples generated for each selection of parameters +pd.alpha=1; %spike dynamic range + +pd.n=2*pd.fc+1; % ?? do I need n? + +P=[25,50,75,100,25e5,50e5,75e5,100e5]; +r=[1]; +d=[1,2]; + +output.param=cell(length(P),length(r),length(d)); +%output.result=cell(length(P),length(r),length(d)); + +%output=zeros(length(P),length(r),length(d)) + + + +for i = 1:length(P) + for j = 1:length(r) + for k = 1:length(d) + + pd.P=P(i); + pd.r=r(j); + pd.d=d(k); + + + output.param{i,j,k} = pd; + % output.result{i,j,k}=run_superres_1d_fixedparam(pd); + + filename='Results.mat'; + savestr = sprintf('save ''%s'' output', filename); + eval(savestr); + fprintf('CREATED OUTPUT FILE %s EXPERIMENT COMPLETE\n',filename); + + + end + end +end + + + + diff --git a/example/MATLAB/simpleExample.m b/example/MATLAB/simpleExample.m index 0c1591e..3705321 100644 --- a/example/MATLAB/simpleExample.m +++ b/example/MATLAB/simpleExample.m @@ -3,13 +3,16 @@ file = 'results.txt'; -for i = 1:3 - for j = 1:5 - % write to a text file for testing reduce - fid = fopen(file,'at'); - fprintf(fid, '%i,%i,%i\n', i,j,i+j); - fclose(fid) - end + +n_list = [2^5] + +for i = 1:length(n_list) + for j = 1:4 + % write to a text file for testing reduce + fid = fopen(file,'at'); + fprintf(fid, '%i,%i,%i\n', i,j,i+j); + fclose(fid) + end end - \ No newline at end of file + diff --git a/example/Python/DEP/dep.py b/example/Python/DEP/dep.py new file mode 100644 index 0000000..e69de29 diff --git a/example/Python/dev-cj b/example/Python/dev-cj new file mode 100755 index 0000000..a556a8c --- /dev/null +++ b/example/Python/dev-cj @@ -0,0 +1,14 @@ +#!/bin/bash +if [ "$1" == "run" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl run simpleExample.py sherlock -dep ./ -m "test of CJ run" -mem "2G" +elif [ "$1" == "rrun" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl rrun simpleExample.py sherlock -dep ./ -m "test of CJ simple rrun" -mem "2G" +elif [ "$1" == "parrun" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl parrun simpleExample.py sherlock -dep ./ -m "test of CJ simple parrun" -mem "2G" +elif [ "$1" == "parrun" ] && [ "$2" == "python" ];then +perl ../../src/CJ.pl parrun myMatlabJob.py sherlock -dep ./ -m "test of CJ parrun -- invoking Matlab for range" -mem "2G" +elif [ "$1" == "reduce" ] && [ "$2" == "default" ];then +perl ../../src/CJ.pl reduce Results.csv +else +perl ../../src/CJ.pl "$@" +fi diff --git a/example/Python/pytorch/mnist/mnist.py b/example/Python/pytorch/mnist/mnist.py new file mode 100644 index 0000000..6018aa5 --- /dev/null +++ b/example/Python/pytorch/mnist/mnist.py @@ -0,0 +1,172 @@ +# DCNN Tranining Example +# Data: MNIST +# Author: Hatef Monajemi (monajemi AT stanford DOT edu) +# Date: Aug 2017 +# Stanford, CA + +import numpy, os.path +#import matplotlib.pyplot as plt +import torch +import torchvision +import torchvision.transforms as transforms +from torch.autograd import Variable + +use_gpu = torch.cuda.is_available() + + +# Set the seed for pytorch +seed = 1915; +numpy.random.seed(seed) +torch.manual_seed(seed) +if use_gpu: + torch.cuda.manual_seed(seed) + print('using GPU') +else: + print('using CPUs only') + + +# load data using torchvision and do some transformations +batchSize=4; +transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) +training_data = torchvision.datasets.MNIST(root='./data', train=True , download=True, transform=transform); +test_data = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform); + + +# build a trainloader to sample data +trainloader = torch.utils.data.DataLoader(training_data , batch_size=batchSize, shuffle=True, num_workers=2) +testloader = torch.utils.data.DataLoader(test_data , batch_size=batchSize, shuffle=True, num_workers=2) + +############################################### +## Experiments with images to get familir with +## them +## functions to show image +#from torchvision.utils import make_grid; +#def imshow(img): +# img = img / 2 + 0.5 # unnormalize +# npimg = img.numpy() +# plt.imshow(numpy.transpose(npimg, (1, 2, 0))) +# +# +# +## get some random training images +#dataiter = iter(trainloader) +#images, labels = dataiter.next() +# +## show images +#imshow(make_grid(images)) +## print labels +#print(' '.join('%5s' % labels[j] for j in range(4))) +############################################### + + +# Define a CNN +class CNN(torch.nn.Module): + def __init__(self): + super(CNN,self).__init__(); + self.conv1 = torch.nn.Conv2d(1,10,5) # 1 input Channel, 10 output Channel, 5x5 filter (28 -> 24) + self.relu = torch.nn.ReLU(); + self.pool = torch.nn.MaxPool2d(2,stride=2); #(24 -> 12) + self.fc1 = torch.nn.Linear(10*12*12, 120); + self.fc2 = torch.nn.Linear(120,10); + + def forward(self,x): + x = self.pool(self.relu(self.conv1(x))) + x = x.view(-1,10*12*12); # reshape it to a row vector + x = self.relu(self.fc1(x)); + x = self.fc2(x) + return x; + +model = CNN(); + +if use_gpu: + model = model.cuda() + + +# initiate model parameters with the ones we have, if any +if os.path.exists('model_params.pt'): + model.load_state_dict(torch.load('model_params.pt')) + + + + + +loss_fn = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) + + + +running_loss = 0.0; +for epoch in range(4): + + for i, data in enumerate(trainloader,0): + # read inputs and labels + + inputs, labels = data; + # wrap them in Variable + if use_gpu: + inputs = Variable(inputs.cuda()) + labels = Variable(labels.cuda()) + else: + inputs, labels = Variable(inputs), Variable(labels) + + # generate prediction + preds = model(inputs); + # compute loss + loss = loss_fn(preds,labels); + + # update the weights by backprop algo + optimizer.zero_grad() # zero the gradients from previous calls + loss.backward(); # compute gradient of loss w.r.t all parameters + optimizer.step(); # This updates all the parameters of the model + + # print some statistics of loss + running_loss += loss.data[0]; + if i % 2000 == 1999: + print('loss[%-2i,%6i] -> %3.2f' % (epoch+1,i+1,running_loss)) + running_loss = 0.0; + +print('Done training'); + + +############################################### +## Predict for 4 images +#dataiter = iter(testloader) +#images, labels = dataiter.next() +# +## print images +#imshow(torchvision.utils.make_grid(images)) +#print('GroundTruth: ', ' '.join('%5s' % labels[j] for j in range(4))) +# +#prediction = model(Variable(images)); +#_, predicted = torch.max(prediction.data, 1) +#print('Predicted: ', ' '.join('%5s' % predicted[j][0] for j in range(4))) +############################################### + + + +# Whole data set +correct = 0 +total = 0 +for data in testloader: + inputs, labels = data + # wrap them in Variable + if use_gpu: + inputs = Variable(inputs.cuda()) + else: + inputs = Variable(inputs) + prediction = model(inputs); + _, predicted = torch.max(prediction.data, 1) + total += labels.size(0) + correct += (predicted.cpu() == labels).sum() + +print('Accuracy of the network on %i test images of MNIST: %3.2f %%' % (total, 100 * correct / total)) + + + +# save the model params for future use: +torch.save(model.state_dict(), 'model_params.pt'); +# To reload later +#model = CNN(); +#model.load_state_dict(torch.load(PATH)) diff --git a/example/Python/simpleExample.py b/example/Python/simpleExample.py new file mode 100644 index 0000000..b96af73 --- /dev/null +++ b/example/Python/simpleExample.py @@ -0,0 +1,19 @@ +# This is a test Python script for CJ +# Author: Hatef Monajemi June 11 2017 +import numpy as np; +import csv; + +SUID = 'monajemi' +file = SUID+'_results.csv'; + +Var0 = np.array([1,2,3]); +Var1 = np.array([1,2,3,4,10,5]); +with open('file.txt','w') as myfile: + for i in range(len(Var0)): + for j in range(len(Var1)): # This is a comment + # write to a text file for testing reduce + with open(file,'a') as csvfile: + resultswriter = csv.writer(csvfile,delimiter=','); + resultswriter.writerow([i,j,i+j]); + + diff --git a/example/MATLAB/test b/example/R/test similarity index 100% rename from example/MATLAB/test rename to example/R/test diff --git a/example/R/touch b/example/R/touch new file mode 100644 index 0000000..e69de29 diff --git a/src/.app_list b/src/.app_list new file mode 100644 index 0000000..e9072ec --- /dev/null +++ b/src/.app_list @@ -0,0 +1,8 @@ +{ +"anaconda":{"version":"Anaconda3-4.4.0-Linux-x86_64","space":"6.9G", "install_time":"10-20 min"}, +"miniconda":{"version":"Miniconda3-latest-Linux-x86_64", "space":"392M", "install_time":" 1-5 min"}, +"cvx":{"version":"cvx-rd", "space":"108M", "install_time":"20-60 sec"}, +"matlab":{"version":"", "space":"", "install_time":""}, +"composer":{"version":"latest", "space":"", "install_time":""} + +} diff --git a/src/CJ.pl b/src/CJ.pl index 3b91543..2d0a18d 100644 --- a/src/CJ.pl +++ b/src/CJ.pl @@ -1,7 +1,8 @@ #/usr/bin/perl -w # # Copyright (c) 2015 Hatef Monajemi (monajemi@stanford.edu) -# visit www.clsuetrjob.org +# visit http://clsuetrjob.org +# use strict; use FindBin qw($Bin); @@ -16,15 +17,15 @@ use CJ::Matlab; # Contains Matlab related subs use CJ::Get; # Contains Get related subs use CJ::Scripts; # Contains shell scripts +use CJ::Run; # Contains run object and methods use Getopt::Declare; use Data::Dumper; use Term::ReadLine; use JSON::PP; -use Digest::SHA qw(sha1_hex); # generate hexa-decimal SHA1 PID #use Term::ANSIColor qw(:constants); # for changing terminal text colors #use Term::ReadKey; -use vars qw( $sync_status $message $mem $runtime $dep_folder $verbose $log_script $text_header_lines $show_tag $log_tag $qsub_extra $cmdline); # options +use vars qw( $submit_defaults $qSubmitDefault $sync_status $message $dep_folder $verbose $log_script $text_header_lines $show_tag $log_tag $force_tag $qsub_extra $cmdline); # options $::VERSION = &CJ::version_info(); @@ -48,21 +49,26 @@ -#==================================== +#========================================= # INITIALIZE VARIABLEs -#==================================== -$dep_folder = "."; -$mem = "8G"; # default memeory -$runtime = "40:00:00"; # default memeory -$message = ""; # default message -$verbose = 0; # default - redirect to CJlog +#========================================= + +$message = ""; +$dep_folder = undef; +$verbose = 0; # default - redirect to CJlog $text_header_lines = undef; $show_tag = "program"; $qsub_extra = ""; $log_tag = "all"; $log_script = undef; $sync_status = 0; +$qSubmitDefault = 1; + +#========================================= +# CJ SUMBMIT DEFAULTS +#========================================= +$submit_defaults = &CJ::submit_defaults(); @@ -70,7 +76,9 @@ #========================================= # refresh CJlog before declaring options. # it keeps updated for each new run -&CJ::my_system("rm $CJlog") unless (! -f $CJlog); +&CJ::my_system("rm $CJlog_out") unless (! -f $CJlog_out); +&CJ::my_system("rm $CJlog_error") unless (! -f $CJlog_error); + #========================================= @@ -78,9 +86,6 @@ my @nosync_cmds = qw ( init who help -help -h -Help -HELP prompt version -v install-update); my %nosync = map { $_ => 1 } @nosync_cmds; - - - if($CJKEY && (!exists($nosync{$cjcmd0})) ){ &CJ::add_agent_to_remote(); # if there is no agent, add it. $sync_status = &CJ::AutoSync(); @@ -91,11 +96,14 @@ my $spec = <<'EOSPEC'; - prompt opens CJ prompt command [undocumented] - {defer{cj_prompt}} + prompt opens CJ prompt command [undocumented] + {defer{cj_prompt}} + hi prints out CJ welcome [undocumented] + {defer{cj_heart}} + nihao [ditto] [undocumented] -help Show usage information [undocumented] {defer{&CJ::add_cmd($cmdline);$self->usage(0);exit;}} - help [ditto] [undocumented] + help [ditto] [undocumented] -Help [ditto] [undocumented] -HELP [ditto] [undocumented] @@ -107,80 +115,112 @@ -v [ditto] [undocumented] --v[erbose] verbose mode [nocase] {$verbose=1} + -vv[v[v[v]]] [ditto] [undocumented] + --clean show cleaned packages in log [nocase] [requires: log] + {$log_tag="showclean";} --err[or] error tag for show [nocase] [requires: show] {$show_tag="error"} + --no-submit-default turns off default submit parameters [nocase] + {$qSubmitDefault=0} + --json json tag for show [nocase] [requires: show] + {$show_tag="json";} --less less tag for show [nocase] [requires: show] {$show_tag="less";} + --runlog runlog tag for show [nocase] [requires: show] + {$show_tag="runlog";} --ls list tag for show [nocase] [requires: show] {$show_tag="ls";} - --clean show cleaned packages in log [nocase] [requires: log] - {$log_tag="showclean";} - --script [=] shows log of specific script [requires: log] + --script [=] shows log of specific script [requires: log] {$log_script=$pattern;} - --header [=] number of header lines for reducing text files [requires: reduce] + --header [=] number of header lines for reducing text files [requires: reduce] {$text_header_lines=$num_lines;} + -alloc[ate] machine specific allocation [nocase] + {$qsub_extra=$resources} -dep dependency folder path [nocase] {$dep_folder=$dep_path} -m reminder message {$message=$msg} -mem memory requested [nocase] - {$mem=$memory} - -runtime run time requested (default=40:00:00) [nocase] - {$runtime=$r_time} - -alloc[ate] machine specific allocation [nocase] - {$qsub_extra=$resources} - init initiates CJ installation [nocase] - {defer{CJ::init}} + {$submit_defaults->{'mem'}=$memory} + -runtime run time requested (default=48:00:00) [nocase] + {$submit_defaults->{'runtime'}=$r_time} + avail list available resources = cluster|app + { defer{ &CJ::avail($tag) } } sync force sync [nocase] - {defer{CJ::sync_forced($sync_status)}} + {defer{&CJ::sync_forced($sync_status)}} who prints out user and agent info [nocase] - install-update updates installation to the most recent commit on GitHub [nocase] - log [] log -n|all|pid [nocase] + update updates installation to the most recent commit on GitHub [nocase] + config[-update] [ [...]] list|update cluster configuration + {defer{ + &CJ::add_cmd($cmdline); + if ($_PUNCT_{"-update"}) { + &CJ::update_cluster_config($cluster,@keyval); + }else{ + &CJ::show_cluster_config($cluster); + } + } + } + + connect connect to a cluster + log [] log -n|all|pid [nocase] {defer{&CJ::add_cmd($cmdline); &CJ::show_log($argin,$log_tag,$log_script) }} - hist[ory] [] history of runs -n|all + hist[ory] [] history of runs -n|all {defer{&CJ::add_cmd($cmdline); &CJ::show_history($argin) }} - cmd [] command history -n|all [nocase] - {defer{ &CJ::show_cmd_history($argin) }} - clean [] clean certain package [nocase] + clean [] clean certain package [nocase] {defer{ &CJ::add_cmd($cmdline); &CJ::clean($pid,$verbose); }} - state [ [[/] []]] state of package [nocase] - {defer{ &CJ::add_cmd($cmdline);&CJ::get_print_state($pid,$counter) }} - info [] info of certain package [nocase] + cmd [] command history -n|all [nocase] + {defer{ &CJ::show_cmd_history($argin) }} + deploy deploy code on the cluster [nocase] [requires: -m] + {my $runflag = "deploy"; + {defer{&CJ::add_cmd($cmdline);run($cluster,$code,$runflag,$qsub_extra)}} + } + err[or] [ [[/] []] ] shortcut for '--err show' [nocase] + {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,"","error") }} + gather [] gather results of parrun [nocase] + {defer{&CJ::add_cmd($cmdline);&CJ::Get::gather_results($pid,$pattern,$dir_name,$verbose)}} + get [ [/] []] bring results (fully/partially) back to local machine [nocase] + {defer{&CJ::add_cmd($cmdline);&CJ::Get::get_results($pid,$subfolder,$verbose)}} + info [] info of certain package [nocase] {defer{ &CJ::add_cmd($cmdline);&CJ::show_info($pid); }} - show [ [[/] [] [[/] ]] ] show program/error of certain package [nocase] - {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,$file,$show_tag) }} - ls [ [[/] []] ] shortcut for '--ls show' [nocase] + init initiates CJ installation [nocase] + {defer{CJ::init}} + install [-f[]] install app on a remote machine + json [ [[/] [] [[/] ]] ] shortcut for '--json show' [nocase] + {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,$file,"json") }} + ls [ [[/] []] ] shortcut for '--ls show' [nocase] {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,"","ls") }} - less [ [[/] [] [[/] ]] ] shortcut for '--less show' [nocase] + less [ [[/] [] [[/] ]] ] shortcut for '--less show' [nocase] {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,$file,"less") }} - rerun [ [[/] [...]]] rerun certain (failed) job [nocase] - {defer{&CJ::add_cmd($cmdline);&CJ::rerun($pid,\@counter,$mem,$runtime,$qsub_extra,$verbose) }} - run run code on the cluster [nocase] [requires: -m] + rerun [ [[/] [...]]] rerun certain (failed) job [nocase] + {defer{&CJ::add_cmd($cmdline); + &CJ::rerun($pid,\@counter,$submit_defaults,$qSubmitDefault,$qsub_extra,$verbose) }} + run run code on the cluster [nocase] [requires: -m] {my $runflag = "run"; - {defer{&CJ::add_cmd($cmdline); run($cluster,$code,$runflag,$qsub_extra)}} + {defer{&CJ::add_cmd($cmdline); run($cluster,$code,$runflag,$qsub_extra)}} } - deploy deploy code on the cluster [nocase] [requires: -m] - {my $runflag = "deploy"; + pardeploy pardeploy code on the cluster [nocase] [requires: -m] + {my $runflag = "pardeploy"; {defer{&CJ::add_cmd($cmdline);run($cluster,$code,$runflag,$qsub_extra)}} - } - parrun parrun code on the cluster [nocase] [requires: -m] + } + parrun parrun code on the cluster [nocase] [requires: -m] {my $runflag = "parrun"; {defer{&CJ::add_cmd($cmdline);run($cluster,$code,$runflag,$qsub_extra)}} } - pardeploy pardeploy code on the cluster [nocase] [requires: -m] - {my $runflag = "pardeploy"; + reduce [-f[]] [...] reduce results of parrun [nocase] + rrun array run code on the cluster [nocase] [requires: -m][undocumented] + {my $runflag = "rrun"; {defer{&CJ::add_cmd($cmdline);run($cluster,$code,$runflag,$qsub_extra)}} - } - reduce [] reduce results of parrun [nocase] - {defer{&CJ::add_cmd($cmdline);&CJ::Get::reduce_results($pid,$filename,$verbose,$text_header_lines)}} - gather [] gather results of parrun [nocase] - {defer{&CJ::add_cmd($cmdline);&CJ::Get::gather_results($pid,$pattern,$dir_name,$verbose)}} - get [ [/] []] bring results (fully/partially) back to local machine [nocase] - {defer{&CJ::add_cmd($cmdline);&CJ::Get::get_results($pid,$subfolder,$verbose)}} - summary gives a summary of the number of jobs on particlur cluster with their states [nocase] - {defer{&CJ::add_cmd($cmdline); &CJ::get_summary($cluster)}} - save [] save a package in path [nocase] + } + runlog [ [[/] []] ] shows the run log of a script [nocase] + {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,"","runlog") }} + save [] save a package in path [nocase] {defer{&CJ::add_cmd($cmdline); &CJ::save_results($pid,$path,$verbose)}} + show [ [[/] [] [[/] ]] ] show program/error of certain package [nocase] + {defer{ &CJ::add_cmd($cmdline);&CJ::show($pid,$counter,$file,$show_tag) }} + state [ [[/] []]] state of package [nocase] + {defer{ &CJ::add_cmd($cmdline);&CJ::get_print_state($pid,$counter) }} + summary gives a summary of the number of jobs on particlur cluster with their states [nocase] + {defer{&CJ::add_cmd($cmdline); &CJ::CheckConnection($cluster);&CJ::get_summary($cluster)}} @ re-executes a previous command avaiable in command history [nocase] {defer{&CJ::reexecute_cmd($cmd_num,$verbose) }} @$ re-executes the last command avaiable in command history [nocase] @@ -192,24 +232,36 @@ my $opts = Getopt::Declare->new($spec); -if($opts->{'install-update'}){ + + + +if($opts->{'connect'}){ + CJ::message("connecting to $opts->{'connect'}"); + &CJ::connect2cluster($opts->{'connect'}, $verbose); +} + +if($opts->{'update'}){ + &CJ::add_cmd($cmdline); + update_install(); +} + +sub update_install { + my $star_line = '*' x length($install_dir); - # make sure s/he really want a deletion + # make sure s/he really wants a deletion CJ::message("This update results in cloning the newest version of ClusterJob in"); CJ::message("$star_line",1); CJ::message("$install_dir",1); CJ::message("$star_line",1); CJ::message("The newest version may not be compatible with your old data structure",1); CJ::message("It is recommended that you backup your old installation before this action.",1); - CJ::message("Are you sure you want to update your installation? Y/N",1); - my $yesno = ; chomp($yesno); + CJ::yesno("Are you sure you want to update your installation? Y/N",1); - exit unless (lc($yesno) eq "y" or lc($yesno) eq "yes"); CJ::message("Updating CJ installation..."); my $date = CJ::date(); my $datetag = $date->{year}-$date->{month}-$date->{day}; # update installation - my $cmd = "cd /tmp && curl -sL https://github.com/monajemi/clusterjob/tarball/master | tar -zx -"; + my $cmd = "cd /tmp && curl -sL https://github.com/monajemi/clusterjob/tarball/master | tar -zx "; $cmd .= "&& mv monajemi-clusterjob-* clusterjob-$datetag"; $cmd .= "&& cp -r /tmp/clusterjob-$datetag/src $install_dir/"; $cmd .= "&& cp -r /tmp/clusterjob-$datetag/example $install_dir/"; @@ -220,7 +272,7 @@ CJ::my_system($cmd,$verbose); CJ::message("Installation updated."); - exit; + exit 0; } if($opts->{who}) @@ -234,6 +286,70 @@ # print "$text_header_lines\n"; #$opts->usage(); +#mimi print out mimi [undocumented] +#{print $/;print $"x(15&ord), "Mimi", $/x/\D/ for'3h112a05e0n1l2j4f6b9'=~/./g; print $/;} + + + +if($opts->{'reduce'}) +{ + &CJ::add_cmd($cmdline); + my $force_tag = defined($opts->{'reduce'}{'-f'}) ? 1 : 0; + &CJ::Get::reduce_results($opts->{'reduce'}{''},$opts->{'reduce'}{''},$verbose,$text_header_lines, $force_tag); +} + +if($opts->{'install'}) +{ + &CJ::add_cmd($cmdline); + my $force_tag = defined($opts->{'install'}{'-f'}) ? 1 : 0; + &CJ::install_software($opts->{'install'}{''},$opts->{'install'}{''}, $force_tag) +} + + + + + + + + + + + + + +sub cj_heart{ + + my @myString = split //,'cjccjjccjcjcjcjcjcjj' ; + my @myChr = split //, '4g143d07g0o1m2k4g6c8'; + + my $counter = 0 ; + print $/, " "x4; + foreach my $chr (@myChr){ + + my $space = ord($chr) % 16; + print $" x $space , $myString[$counter]; + print $/, " "x4 if ($chr =~ /\D/); + $counter = $counter + 1; + } + + print "\n"; + print "\n"; +} + + + + + + + + + + + + + + + @@ -251,9 +367,9 @@ sub cj_prompt{ my $COLOR = "\033[47;30m"; my $RESET = "\033[0m"; - #my $prompt = "${COLOR}[$localHostName:$localUserName] CJ>$RESET "; + #my $prompt = "${COLOR}[$localUserName] CJ>$RESET "; - my $prompt = "[$localHostName:$localUserName] CJ> "; + my $prompt = "[$localUserName] CJ> "; print "$::VERSION\n \n \n"; @@ -278,7 +394,7 @@ sub cj_prompt{ next; }elsif($input =~ m/\bcd\b/){ $input =~ s/cd//g; - $input =~ s/^\s|\s$//g; + $input =~ s/^\s*|\s*$//g; if (-d $input){ $CWD = $input }else{ @@ -307,390 +423,25 @@ sub run{ my ($machine,$program, $runflag,$qsub_extra) = @_; my $BASE = `pwd`;chomp($BASE); # Base is where program lives! - -#=================== -# Check connection -#=================== -my $ssh = &CJ::host($machine); -my $account = $ssh->{account}; -my $bqs = $ssh->{bqs}; -my $remotePrefix = $ssh->{remote_repo}; -my $date = &CJ::date(); - -# create remote directory using outText -my $check = $date->{year}.$date->{month}.$date->{min}.$date->{sec}; -my $sshres = `ssh $account 'mkdir CJsshtest_$check; rm -rf CJsshtest_$check; exit;' 2>&1`; -&CJ::err("Cannot connect to $account: $sshres") if($sshres); - - -#==================================== -# DATE OF CALL -#==================================== - -# PID -my $sha_expr = "$CJID:$localHostName:$program:$account:$date->{datestr}"; -my $pid = sha1_hex("$sha_expr"); -my $short_pid = substr($pid, 0, 8); # we use an 8 character abbrviation - - -# Check to see if the file and dep folder exists -&CJ::err("$BASE/$program not found") if(! -e "$BASE/$program" ); -&CJ::err("Dependency folder $BASE/$dep_folder not found") if(! -d "$BASE/$dep_folder" ); - - -#======================================= -# BUILD DOCSTRING -# WE NAME THE REMOTE FOLDERS -# BY PROGRAM AND PID -# EXAMPLE : MaxEnt/20dd3203e29ec29... -#======================================= -my ($program_name,$ext) = &CJ::remove_extension($program); -my $programType = CJ::getProgramType($program); -# -# my $programType; -# if(lc($ext) eq "m"){ -# $programType = "matlab"; -# }elsif(lc($ext) eq "r"){ -# $programType = "R"; -# }else{ -# CJ::err("Program type .$ext is not recognized"); -# } - -CJ::message("$runflag"."ing [$program] on [$machine]"); -&CJ::message("Sending from: $BASE"); - - - -my $localDir = "$localPrefix/"."$program_name"; -my $local_sep_Dir = "$localDir/" . "$pid" ; -my $saveDir = "$savePrefix"."$program_name"; - - -#==================================== -# CREATE LOCAL DIRECTORIES -#==================================== -# create local directories -if(-d $localPrefix){ - - mkdir "$localDir" unless (-d $localDir); - mkdir "$local_sep_Dir" unless (-d $local_sep_Dir); - -}else{ - # create local Prefix - mkdir "$localPrefix"; - mkdir "$localDir" unless (-d $localDir); - mkdir "$local_sep_Dir" unless (-d $local_sep_Dir); -} - - -# cp code -my $cmd = "cp $BASE/$program $local_sep_Dir/"; -&CJ::my_system($cmd,$verbose); -# cp dependencies -my $cmd = "cp -r $dep_folder/* $local_sep_Dir/"; -&CJ::my_system($cmd,$verbose); - - - -#===================== -# REMOTE DIRECTORIES -#===================== -my $remoteDir = "$remotePrefix/"."$program_name"; -my $remote_sep_Dir = "$remoteDir/" . "$pid" ; - -# for creating remote directory -my $outText; -if($bqs eq "SLURM"){ -$outText=<[0]; # there is only one in this case - my $numJobs = $#{$job_ids}+1; - CJ::message("$numJobs job(s) submitted ($job_id)"); - -#delete the local qsub.info after use -my $cmd = "rm $local_qsub_info_file"; -&CJ::my_system($cmd,$verbose); -}else{ - $job_id =""; -} - - - -my $runinfo={ -pid => ${pid}, -agent => ${AgentID}, -user => ${CJID}, -local_ip => ${localIP}, -local_host => ${localHostName}, -date => ${date}, -machine => ${machine}, -account => ${account}, -local_prefix => ${localPrefix}, -local_path => "${localDir}/${pid}", -remote_prefix => ${remotePrefix}, -remote_path => "${remoteDir}/${pid}", -job_id => $job_id, -bqs => $bqs, -save_prefix => ${savePrefix}, -save_path => "${saveDir}/${pid}", -runflag => $runflag, -program => $program, -message => $message, -}; - -# add_record locally -&CJ::add_record($runinfo); -# write runinfo to FireBaee as well -&CJ::write2firebase($pid,$runinfo,$date->{epoch},0); - - -}elsif($runflag eq "parrun" || $runflag eq "pardeploy"){ -#========================================== -# clusterjob parrun myscript.m DEP -# this implements parrallel for in perl -# so for each grid point, we will have -# one separate job -#========================================== - -# read the script, parse it out and -# find the for loops -my $matlab = CJ::Matlab->new($BASE,$program); -my $parser = $matlab->parse(); -my ($idx_tags,$ranges) = $matlab->findIdxTagRange($parser,$verbose); - - -#Check that user has initialized for loop vars -$matlab->check_initialization($parser,$idx_tags,$verbose); - -#============================================== -# MASTER SCRIPT -#============================================== - -my $nloops = $parser->{nloop}; -my $counter = 0; # counter gives the total number of jobs submited: (1..$counter) -my $extra={}; -$extra->{TOP}= $parser->{TOP}; -$extra->{FOR}= $parser->{FOR}; -$extra->{BOT}= $parser->{BOT}; -$extra->{local_sep_Dir}= $local_sep_Dir; -$extra->{remote_sep_Dir}= $remote_sep_Dir; -$extra->{runflag}= $runflag; -$extra->{program}= $program; -$extra->{date}= $date; -$extra->{pid}= $pid; -$extra->{bqs}= $bqs; -$extra->{mem}= $mem; -$extra->{qsub_extra}=$qsub_extra; -$extra->{runtime}=$runtime; -$extra->{ssh}=$ssh; - -# Recursive loop for arbitrary number of loops. -my $master_script = &CJ::Scripts::build_nloop_master_script($nloops, $idx_tags,$ranges,$extra); - -#=================================== -# write out master_script -#=================================== -my $local_master_path="$local_sep_Dir/master.sh"; -&CJ::writeFile($local_master_path, $master_script); - - -#================================== -# PROPAGATE THE FILES -# AND RUN ON CLUSTER -#================================== -my $tarfile="$pid".".tar.gz"; -my $cmd="cd $localDir; tar --exclude '.git' --exclude '*~' --exclude '*.pdf' -czf $tarfile $pid/ ; rm -rf $local_sep_Dir ; cd $BASE"; -&CJ::my_system($cmd,$verbose); - - -# create remote directory using outText -my $cmd = "ssh $account 'echo `$outText` ' "; -&CJ::my_system($cmd,$verbose); - -&CJ::message("Sending package \033[32m$short_pid\033[0m"); -# copy tar.gz file to remoteDir -my $cmd = "rsync -arvz ${localDir}/${tarfile} ${account}:$remoteDir/"; -&CJ::my_system($cmd,$verbose); - - -&CJ::message("Submitting job(s)"); -my $cmd = "ssh $account 'source ~/.bashrc;cd $remoteDir; tar -xzf ${tarfile} ; cd ${pid}; bash -l master.sh > $remote_sep_Dir/qsub.info; sleep 2'"; -&CJ::my_system($cmd,$verbose) unless ($runflag eq "pardeploy"); - - - -# bring the log file -my $qsubfilepath="$remote_sep_Dir/qsub.info"; -my $cmd = "rsync -avz $account:$qsubfilepath $info_dir/"; -&CJ::my_system($cmd,$verbose) unless ($runflag eq "pardeploy"); - - - -my $job_ids; -my $job_id; -if($runflag eq "parrun"){ - # read run info - my $local_qsub_info_file = "$info_dir/"."qsub.info"; - $job_ids = &CJ::read_qsub($local_qsub_info_file); - $job_id = join(',', @{$job_ids}); - my $numJobs = $#{$job_ids}+1; - - CJ::message("$numJobs job(s) submitted ($job_ids->[0]-$job_ids->[-1])"); - -#delete the local qsub.info after use -my $cmd = "rm $local_qsub_info_file"; -&CJ::my_system($cmd,$verbose); - -}else{ -$job_ids = ""; -$job_id = ""; + my $run = CJ::Run->new($BASE,$program,$machine,$runflag,$dep_folder,$message,$qsub_extra,$qSubmitDefault, $submit_defaults,$verbose); + + if ($runflag eq "deploy" || $runflag eq "run"){ + $run->SERIAL_DEPLOY_RUN(); + }elsif($runflag eq "parrun" || $runflag eq "pardeploy"){ + $run->PAR_DEPLOY_RUN(); + }elsif($runflag eq "rrun" || $runflag eq "rdeploy"){ + $run->SLURM_ARRAY_DEPLOY_RUN(); + }else{ + &CJ::err("Runflag $runflag was not recognized"); + } } - -my $runinfo={ -pid => ${pid}, -user => ${CJID}, # will be changed to CJusername later -agent => ${AgentID}, -local_ip => ${localIP}, -local_host => ${localHostName}, -date => ${date}, -machine => ${machine}, -account => ${account}, -local_prefix => ${localPrefix}, -local_path => "${localDir}/${pid}", -remote_prefix => ${remotePrefix}, -remote_path => "${remoteDir}/${pid}", -job_id => $job_id, -bqs => $bqs, -save_prefix => ${savePrefix}, -save_path => "${saveDir}/${pid}", -runflag => $runflag, -program => $program, -message => $message, -}; - - -&CJ::add_record($runinfo); - -# write runinfo to FB as well -&CJ::write2firebase($pid,$runinfo, $date->{epoch},0); - -}else{ -&CJ::err("Runflag $runflag was not recognized"); -} - - -exit 0; - - -} - - - - - - - - - - #==================================== # USEFUL SUBs #==================================== - - - - - + #sub matlab_var #{ @@ -703,18 +454,3 @@ sub run{ # } # #} - - - - - - - - - - - - - - - diff --git a/src/CJ.pm b/src/CJ.pm index e6bba06..31a4208 100644 --- a/src/CJ.pm +++ b/src/CJ.pm @@ -5,31 +5,31 @@ use strict; use warnings; use CJ::CJVars; use CJ::Sync; +use CJ::Install; use Term::ReadLine; use Time::Local; use Time::Piece; use JSON::PP; use Data::Dumper; use Data::UUID; +use Getopt::Declare; use feature 'say'; sub version_info{ -my $version_script="\n\n This is ClusterJob (CJ) version V0.0.3"; -$version_script .= "\n Copyright (c) 2015 Hatef Monajemi (monajemi\@stanford.edu)"; -$version_script .="\n CJ may be copied only under the terms and conditions of"; -$version_script .= "\n the GNU General Public License, which may be found in the CJ"; -$version_script .= "\n source code. For more info please visit"; -$version_script .= "\n https://github.com/monajemi/clusterjob"; -$version_script .= "\n https://clusterjob.org"; +my $version_script="\n\n This is ClusterJob (CJ) version V0.0.4"; +$version_script .= "\n Copyright (c) 2015 Hatef Monajemi (monajemi\@stanford.edu)"; +$version_script .= "\n CJ may be copied only under the terms and conditions of"; +$version_script .= "\n the BSD 3-clause License, which may be found in the CJ"; +$version_script .= "\n source code. For more info please visit"; +$version_script .= "\n https://github.com/monajemi/clusterjob"; +$version_script .= "\n https://clusterjob.org"; return $version_script ; } - - sub init{ @@ -58,10 +58,17 @@ sub init{ #my $cmd="sed -i '' 's|.*<>.*|$agent_line|' $CJVars_file"; #system($cmd); + mkdir "$CJlog_dir"; # this if for logging + mkdir "$info_dir"; &CJ::writeFile($AgentIDPATH, $AgentID); # Record the AgentID in a file. &CJ::create_info_files(); + + # record the md5 file of ssh_config + &CJ::create_ssh_config_md5(); + + if(defined($CJKEY)){ # Add this agent to the the list of agents eval{ @@ -71,7 +78,7 @@ sub init{ if($@->message eq '401 Unauthorized'){ CJ::warning("Your CJKEY is invalid. Please provide a valid one and then issue 'cj sync' "); }else{ - CJ::warning("Unable to connect to CJ database"); + CJ::warning("Unable to connect to CJ database $@"); } } &CJ::AutoSync() unless ($@); @@ -81,22 +88,113 @@ sub init{ } -sub getProgramType{ - my ($program) = @_; - - my ($program_name,$ext) = &CJ::remove_extension($program); - my $programType; - if(lc($ext) eq "m"){ - $programType = "matlab"; - }elsif(lc($ext) eq "r"){ - $programType = "R"; - }else{ - CJ::err("Program type .$ext is not recognized"); - } - - return $programType; +sub parse_qsub_extra{ + my ($qsub_extra) = @_; + + return undef if ($qsub_extra eq ""); + + my $specification = q{ + --partition [=] Partition + -p [=] [ditto] + --qos [=] Quality of Service + }; + + my $args = Getopt::Declare->new($specification,$qsub_extra); # parse a string + #print Dumper($args); +return $args; +} + + +sub CheckConnection{ + my ($cluster) = @_; + my $ssh = &CJ::host($cluster); + my $date = &CJ::date(); + + my $check = $date->{year}.$date->{month}.$date->{min}.$date->{sec}; + my $sshres = `ssh $ssh->{account} 'mkdir CJsshtest_$check; rm -rf CJsshtest_$check; exit;' 2>$CJlog_error`; + &CJ::err("Cannot connect to $ssh->{account}: $sshres") if($sshres); + + return 1; +} + + + + + +sub max_slurm_arraySize{ + + my($ssh) = @_; + + my $max_array_size = ` ssh $ssh->{account} 'scontrol show config | grep MaxArraySize' | awk \'{print \$3}\' `; + chomp($max_array_size); + $max_array_size = $max_array_size - 1; # last number not allowed + $max_array_size = int(1) unless &CJ::isnumeric($max_array_size); # default max size allowed! + + return $max_array_size; + +} + + +sub max_jobs_allowed{ + my ($ssh, $qsub_extra) = @_; + + + + my $account = $ssh->{account}; + my $bqs = $ssh->{bqs}; + my $user = $ssh->{user}; + + my $qos; + +if($bqs eq "SLURM"){ + + # We need to parse it and get partitions out + # partitions are given with flag '-p, --partition=' + + my $alloc = &CJ::parse_qsub_extra($qsub_extra); + # print defined($alloc->{'-p'}) ? $alloc->{'-p'}->{''} . "\n" : "nothing\n"; + # print defined($alloc->{'--qos'}) ? $alloc->{'--qos'}->{''} . "\n" : "nothing\n"; + + if( defined($alloc->{'--qos'}->{''}) ){ + $qos = $alloc->{'--qos'}->{''}; + }elsif( defined( $alloc->{'-p'}->{''}) ){ + $qos = $alloc->{'-p'}->{''}; + }elsif( defined($alloc->{'--partition'}->{''}) ){ + $qos = $alloc->{'--partition'}->{''}; + }else{ + #$qos = `ssh $account 'sacctmgr -n list assoc where user=$user format=defaultqos'`; chomp($qos); + $qos = `ssh $account 'sacctmgr -n list assoc where user=$user format=qos'`; chomp($qos); + $qos = &CJ::remove_white_space($qos); + &CJ::message("no SLURM partition specified. CJ is using default partition: $qos"); + } + + $qos = (split(/,/, $qos))[0]; # if multiple get the first one + $qos = &CJ::remove_white_space($qos); +} + + my $max_u_jobs; + my $live_jobs; + if($bqs eq "SGE"){ + $max_u_jobs = `ssh $account 'qconf -sconf | grep max_u_jobs' | awk \'{print \$2}\' `; chomp($max_u_jobs); + $live_jobs = (`ssh ${account} 'qstat | grep "\\b$user\\b" | wc -l' 2>$CJlog_error`); chomp($live_jobs); + + }elsif($bqs eq "SLURM"){ + + $max_u_jobs = `ssh $account 'sacctmgr show qos -n format=Name,MaxSubmitJobs | grep "\\b$qos\\b"' | awk \'{print \$2}\' `; chomp($max_u_jobs); + #currently live jobs + $live_jobs = (`ssh ${account} 'qstat | grep "\\b$qos\\b" | grep "\\b$user\\b" | wc -l' 2>$CJlog_error`); chomp($live_jobs); + + }else{ + &CJ::err("Unknown batch queueing system"); + } + $live_jobs = int(0) unless &CJ::isnumeric($live_jobs); + $max_u_jobs = int(3000) unless &CJ::isnumeric($max_u_jobs); # default max allowed! + + my $jobs_allowed = int($max_u_jobs-$live_jobs); + + return $jobs_allowed; } @@ -107,18 +205,21 @@ sub check_hash { return unless @$keys; foreach my $key ( @$keys ) { - return unless eval { exists $hash->{$key} }; - $hash = $hash->{$key}; - } + return unless eval { exists $hash->{$key} }; + $hash = $hash->{$key}; + } return 1; - } +} sub write2firebase { my ($pid, $runinfo, $timestamp, $inform) = @_; + + $timestamp = 0+$timestamp; # treat time stamp as number for JSON. Has to be explicit. Otherwise you get qouted stuff in Firebase + return if not defined($CJKEY); my $firebase = Firebase->new(firebase => $firebase_name, auth_token => $CJKEY); @@ -168,7 +269,7 @@ sub add_agent_to_remote{ # This is the first time agent is added. my $firebase = Firebase->new(firebase => $firebase_name, auth_token => $CJKEY); # make sure agent doesnt exist already - return if defined($firebase->get("users/${CJID}/agents/$AgentID")); + return if eval {my $fb_get = $firebase->get("users/${CJID}/agents/$AgentID")}; my $agentHash = {"SyncReq" => "null", "last_instance" => "null", "push_timestamp" =>0 ,"pull_timestamp" => 0}; my $result = $firebase->patch("users/${CJID}/agents/$AgentID", $agentHash); } @@ -209,8 +310,6 @@ sub informOtherAgents{ - - sub sync_forced { my ($status) = @_; @@ -272,33 +371,9 @@ sub getLastSync - - - - - - - - - - - - - - - - - - - - - - - - sub rerun { - my ($pid,$counter,$mem,$runtime,$qsub_extra,$verbose) = @_; + my ($pid,$counter,$submit_defaults,$qSubmitDefault,$qsub_extra,$verbose) = @_; my $info; @@ -351,17 +426,17 @@ sub rerun my $master_script; if ($#job_ids eq 0) { # if there is only one job #run - $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$bqs,$mem,$runtime,$remote_path,$qsub_extra); + $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$info,$submit_defaults,$qSubmitDefault,$remote_path,$qsub_extra); }else{ #parrun if(@$counter){ foreach my $count (@$counter){ - $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$bqs,$mem,$runtime,$remote_path,$qsub_extra,$count); + $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$info,$submit_defaults,$qSubmitDefault,$remote_path,$qsub_extra,$count); } }else{ # Package is parrun, run the whole again! foreach my $i (0..$#job_ids){ - $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$bqs,$mem,$runtime,$remote_path,$qsub_extra,$i); + $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$info,$submit_defaults,$qSubmitDefault,$remote_path,$qsub_extra,$i); } } } @@ -386,7 +461,7 @@ my $cmd = "rsync -arvz $local_master_path ${account}:$remote_path/"; &CJ::message("Submitting job(s)"); - $cmd = "ssh $account 'source ~/.bashrc;cd $remote_path; bash -l rerun_master.sh > $remote_path/rerun_qsub.info; sleep 2'"; + $cmd = "ssh $account 'source ~/.bashrc && cd $remote_path && bash -l rerun_master.sh > $remote_path/rerun_qsub.info && sleep 2'"; &CJ::my_system($cmd,$verbose); @@ -399,25 +474,26 @@ my $cmd = "rsync -arvz $local_master_path ${account}:$remote_path/"; my $rerun_qsub_info_file = "$install_dir/.info/"."rerun_qsub.info"; - my $rerun_job_ids = &CJ::read_qsub($rerun_qsub_info_file); # array ref + my ($rerun_job_ids,$errors) = &CJ::read_qsub($rerun_qsub_info_file); # array ref #my $rerun_job_id = join(',', @{$rerun_job_ids}); - + foreach my $error (@{$errors}) { + CJ::warning($error); + } #======================================= # write changes to the run_history file #======================================= # - replace the old job_id's by the new one - - if($#job_ids eq 0){ - $job_id =~ s/$job_ids[0]/$rerun_job_ids->[0]/g; + if($#job_ids eq 0){ + $job_id =~ s/\b$job_ids[0]\b/$rerun_job_ids->[0]/g; &CJ::message("job-id: $rerun_job_ids->[0]"); }else{ &CJ::message("job-id: $rerun_job_ids->[0]-$rerun_job_ids->[-1]"); foreach my $i (0..$#{$counter}){ my $this = $counter->[$i] - 1; - $job_id =~ s/$job_ids[$this]/$rerun_job_ids->[$i]/g; + $job_id =~ s/\b$job_ids[$this]\b/$rerun_job_ids->[$i]/g; } } @@ -433,22 +509,21 @@ my $cmd = "rsync -arvz $local_master_path ${account}:$remote_path/"; } my $runinfo = join(',', @runinfo); -my $this_rerun = "$date -> $runinfo"; my $type = "rerun"; my $change = {new_job_id => $job_id, date => $date, - old_job_id => $runinfo + old_job_id => $runinfo, + submit_defaults => $submit_defaults, + alloc => $qsub_extra, }; - my $newinfo = &CJ::add_change_to_run_history($pid, $change, $type); &CJ::add_to_history($newinfo,$date,$type); - # write runinfo to FB as well my $timestamp = $date->{epoch}; my $inform = 1; @@ -667,7 +742,7 @@ sub show_log{ $log_argin = ""; }elsif( $log_argin =~ m/^\-?all$/ ){ my $pid_timestamp = &CJ::read_pid_timestamp(); - $num_show = keys $pid_timestamp; + $num_show = keys %$pid_timestamp; }elsif( $log_argin =~ m/^\-?\d*$/ ){ $log_argin =~ s/\D//g; #remove any non-digit $num_show = $log_argin; @@ -739,7 +814,7 @@ sub show_log{ print "script: $info->{program}\n"; #print "remote_path: $info->{remote_path}\n"; print "initial_flag: $info->{runflag}\n"; - print "reruned: ", (keys $info->{rerun}) . " times \n" if($info->{rerun}) ; + print "reruned: ", 0+keys(%{$info->{rerun}}) . " times \n" if($info->{rerun} && ref $info->{rerun} eq ref {}) ; print "cleaned: $info->{clean}->{date}->{datestr}\n" if($info->{clean}) ; print "\n"; print ' ' x 10; print "$info->{message}\n"; @@ -747,10 +822,6 @@ sub show_log{ } - - - - exit 0; @@ -803,6 +874,7 @@ sub clean my $remote_path; my $job_id; my $save_path; + my $bqs ; my $info; if((!defined $pid) || ($pid eq "") ){ @@ -824,7 +896,7 @@ sub clean } - + $bqs = $info->{'bqs'}; $account = $info->{'account'}; $local_path = $info->{'local_path'}; $remote_path = $info->{'remote_path'}; @@ -844,22 +916,51 @@ sub clean # make sure s/he really want a deletion - CJ::message("Are you sure you would like to clean $short_pid? Y/N"); - my $yesno = ; chomp($yesno); - - if(lc($yesno) eq "y" or lc($yesno) eq "yes"){ - + CJ::yesno("Are you sure you would like to clean $short_pid"); CJ::message("Cleaning $short_pid"); my $local_clean = "$local_path\*"; my $remote_clean = "$remote_path\*"; my $save_clean = "$save_path\*"; - if (defined($job_id) && $job_id ne "") { + + + my $avail_ids; + if($bqs eq "SGE"){ + + my $expr = "qstat -xml | tr \'\n\' \' \' | sed \'s#]*>#\\\n#g\' | sed \'s#<[^>]*>##g\' | grep \" \" | column -t"; + $avail_ids = `ssh ${account} $expr | grep CJ_$short_pid | awk \'{print \$1}\' | tr '\n' ' ' ` ; + #print $avail_ids . "\n"; + + }elsif($bqs eq "SLURM"){ + $avail_ids = `ssh ${account} ' sacct -n --format=jobid,jobname%15 | grep -v "^\\s*[0-9\_]*\\." | grep CJ_$short_pid ' | awk \'{print \$1}\' | tr '\n' ' ' `; + }else{ + &CJ::err("Unknown batch queueing system"); + } + + $avail_ids = $job_id if $info->{runflag} eq "rrun"; + + if (defined($avail_ids) && $avail_ids ne "") { CJ::message("Deleting jobs associated with package $short_pid"); - my @job_ids = split(',',$job_id); - $job_id = join(' ',@job_ids); - my $cmd = "rm -rf $local_clean; rm -rf $save_clean; ssh ${account} 'qdel $job_id; rm -rf $remote_clean' " ; + + #my @job_ids = split(',',$job_id); + #$job_id = join(' ',@job_ids); + + # make sure that all are deleted. Sometimes we dont catch a jobID locally because of a failure + # So this really cleans up the mess + + #print $job_id . "\n"; + + my $cmd; + if($bqs eq "SGE"){ + $cmd = "rm -rf $local_clean; rm -rf $save_clean; ssh ${account} 'qdel $avail_ids; rm -rf $remote_clean' " ; + }elsif($bqs eq "SLURM"){ + $cmd = "rm -rf $local_clean; rm -rf $save_clean; ssh ${account} 'scancel $avail_ids; rm -rf $remote_clean' " ; + }else{ + &CJ::err("Unknown batch queueing system"); + } + &CJ::my_system($cmd,$verbose); + }else { my $cmd = "rm -rf $local_clean;rm -rf $save_clean; ssh ${account} 'rm -rf $remote_clean' " ; &CJ::my_system($cmd,$verbose); @@ -889,9 +990,8 @@ my $timestamp = $date->{epoch}; my $inform = 1; &CJ::write2firebase($info->{'pid'},$newinfo, $timestamp, $inform); -} - exit 0; +exit 0; } @@ -904,11 +1004,9 @@ my $inform = 1; sub show { my ($pid, $num, $file, $show_tag) = @_; - - - + my $info; - if( (!defined $pid) || ($pid eq "") ){ + if( (!defined $pid) || ($pid eq "") || ($pid eq '$') ){ #read last_instance.info; $info = &CJ::retrieve_package_info(); $pid = $info->{'pid'}; @@ -959,7 +1057,13 @@ sub show }else{ $script = (`ssh ${account} 'cat $remote_path/logs/*stderr'`) ;chomp($script); } - + + }elsif($show_tag eq "runlog" ){ + if($num){ + $script = (`ssh ${account} 'less $remote_path/$num/logs/CJrun*.log'`) ;chomp($script); + }else{ + $script = (`ssh ${account} 'less $remote_path/logs/CJrun*.log'`) ;chomp($script); + } }elsif($show_tag eq "ls" ){ if($num){ $script = (`ssh ${account} 'ls -C1 $remote_path/$num/'`) ;chomp($script); @@ -980,10 +1084,27 @@ sub show $script = (`ssh ${account} 'less -C1 $remote_path/$file'`) ;chomp($script); } - } + }elsif($show_tag eq "json" ){ - print "$script \n"; + if(!defined($file)){ + $file=$num; + $num = ""; + } + + if($num){ + $script = (`ssh ${account} 'python -m json.tool $remote_path/$num/$file'`) ;chomp($script); + }else{ + $script = (`ssh ${account} 'python -m json.tool $remote_path/$file'`) ;chomp($script); + } + } + + + + + + print "$script \n"; + exit 0; } @@ -1035,7 +1156,7 @@ sub show_info print "script: $info->{program}\n"; print "remote_path: $info->{remote_path}\n"; print "initial_flag: $info->{runflag}\n"; - print "reruned: ",1+$#{$info->{rerun}} . " times \n" if($info->{rerun}) ; + print "reruned: ", 0+keys(%{ $info->{rerun} }) . " times \n" if($info->{rerun}) ; print "cleaned: $info->{clean}->{date}->{datestr}\n" if($info->{clean}) ; print "\n"; print ' ' x 10; print "$info->{message}\n"; @@ -1063,20 +1184,21 @@ sub show_info sub get_summary { my ($machine) = @_; - - - - my $ssh = &CJ::host($machine); my $account = $ssh->{'account'}; my $bqs = $ssh->{'bqs'}; - my $user = $ssh->{'user'}; + + #my $remoteinfo = &CJ::remote(); - + my $qstat = "qstat"; + $qstat = 'squeue --format="%.18i %.9P %.8j %.20u %.2t %.10M %.6D %R"' if($bqs eq "SLURM"); + + + my $live_jobs = (`ssh ${account} '$qstat | grep $user | wc -l' 2>$CJlog_error` ); chomp($live_jobs); #my $REC_STATES = ""; my $REC_PIDS_STATES = ""; @@ -1087,18 +1209,26 @@ sub get_summary # This now works for SGE my $expr = "qstat -xml | tr \'\n\' \' \' | sed \'s#]*>#\\\n#g\' | sed \'s#<[^>]*>##g\' | grep \" \" | column -t"; +<<<<<<< HEAD $REC_PIDS_STATES = (`ssh ${account} $expr | awk \'{print \$3,\$5}\' `) ;chomp($REC_PIDS_STATES); +======= + $REC_PIDS_STATES = (`ssh ${account} $expr | awk \'{print \$3,\$5}\' 2>$CJlog_error `) ;chomp($REC_PIDS_STATES); +>>>>>>> master #print $REC_PIDS_STATES . "\n"; #print $expr . "\n"; #my $expr = "qstat -xml | tr \'\\n\' \' \' | sed \'s#]*>#\\n#g\' | sed \'s#<[^>]*>##g\' | grep \" \" | column -t"; #$REC_PIDS_STATES = `ssh ${account} $expr | awk \'{print \$3,\$5}\'` ;chomp($REC_PIDS_STATES); +<<<<<<< HEAD +======= +>>>>>>> master + }elsif($bqs eq "SLURM"){ - # $REC_STATES = (`ssh ${account} 'sacct --format=state | grep -v "^[0-9]*\\."'`) ;chomp($REC_STATES); - $REC_PIDS_STATES = (`ssh ${account} 'sacct -n --format=jobname%15,state | grep -v "^[0-9]*\\."'`);chomp($REC_PIDS_STATES); + # $REC_STATES = (`ssh ${account} 'sacct --format=state | grep -v "^\\s*[0-9\_]*\\." '`) ;chomp($REC_STATES); + $REC_PIDS_STATES = (`ssh ${account} 'sacct -n --format=jobname%15,state | grep -v "^\\s*[0-9\_]*\\." ' 2>$CJlog_error`);chomp($REC_PIDS_STATES); }else{ &CJ::err("Unknown batch queueing system"); @@ -1114,7 +1244,7 @@ sub get_summary my ($longpid,$state) = split(' ',$rec_pids_states[$i]); #print $longpid . "\n"; if ( $longpid =~ m/^CJ\_.*/){ - push @rec_pids, substr($longpid,3,8); # remove the first 3 (CJ_), and read the firt 8 from the rest + push @rec_pids, substr($longpid,3,8); # remove the first 3 (CJ_), and read the first 8 from the rest push @rec_states, $state; } } @@ -1150,6 +1280,7 @@ sub get_summary my @this_states = values %$this_states; my @this_unique_states = do { my %seen; grep { !$seen{$_}++ } @this_states}; + push @unique_states, @this_unique_states; #print $this_unique_states[0] . "\n"; @@ -1170,12 +1301,12 @@ sub get_summary @unique_states = do { my %seen; grep { !$seen{$_}++ } @unique_states}; - + #print '-' x 35;print "\n"; print "\n"; print "\033[32m$user\@$machine \033[0m\n\n"; - print ' ' x 5; print "Total Jobs : ", 1+$#rec_states . "\n"; + print ' ' x 5; print "Live Jobs : ", $live_jobs . "\n"; print ' ' x 5;print '-' x 17;print "\n"; foreach my $i (0..$#unique_states){ @@ -1197,14 +1328,33 @@ sub get_summary +sub numeric_month(){ + my ($mon) = @_; + + # Given 3 character month, give the number. + my $month_map = {"Jan" => 1, + "Feb" => 2, + "Mar" => 3, + "Apr" => 4, + "May" => 5, + "Jun" => 6, + "Jul" => 7, + "Aug" => 8, + "Sep" => 9, + "Oct" => 10, + "Nov" => 11, + "Dec" => 12 + }; + + return $month_map->{$mon}; +} + sub get_state { my ($pid,$num) = @_; - #print "$pid\n"; - my $info; if( (!defined $pid) || ($pid eq "") ){ #read last_instance.info; @@ -1228,21 +1378,34 @@ sub get_state } - - my $short_pid = substr($info->{pid},0,8); + &CJ::CheckConnection($info->{'machine'}); + my $short_pid = substr($info->{pid},0,8); my $account = $info->{'account'}; my $job_id = $info->{'job_id'}; my $bqs = $info->{'bqs'}; my $runflag = $info->{'runflag'}; + + + # This is a workaround for a bug in SLURM + # one must provide start time of the job + + my $yy = $info->{'date'}{year}; + my $dd = $info->{'date'}{day}; + my $mm = &CJ::check_hash( $info->{'date'}, ['numericmonth'] ) ? $info->{'date'}{numericmonth}:&CJ::numeric_month($info->{'date'}{month}); + + my $starttime = sprintf ("%04d-%02d-%02d",$yy,$mm,$dd); + + my $states={}; -if ( $runflag =~ m/^par*/ ){ +if ( $runflag =~ m/^parrun$/ ){ # par case my @job_ids = split(',',$job_id); my $jobs = join('|', @job_ids); + my $REC_STATES; my $REC_IDS; @@ -1250,31 +1413,61 @@ if ( $runflag =~ m/^par*/ ){ $REC_STATES = (`ssh ${account} 'qstat -u \\* | grep -E "$jobs" ' | awk \'{print \$5}\'`) ;chomp($REC_STATES); $REC_IDS = (`ssh ${account} 'qstat -u \\* | grep -E "$jobs" ' | awk \'{print \$1}\'`) ;chomp($REC_IDS); + my @rec_states = split('\n',$REC_STATES); + my @rec_ids = split('\n',$REC_IDS); + + foreach my $i (0..$#rec_ids){ + my $key = $rec_ids[$i]; + my $val = $rec_states[$i]; + $states->{$key} = $val; + } + + }elsif($bqs eq "SLURM"){ - $REC_STATES = (`ssh ${account} 'sacct -n --jobs=$job_id | grep -v "^[0-9]*\\." ' | awk \'{print \$6}\'`) ;chomp($REC_STATES); - $REC_IDS = (`ssh ${account} 'sacct -n --jobs=$job_id | grep -v "^[0-9]*\\." ' | awk \'{print \$1}\'`) ;chomp($REC_IDS); + + my $REC_IS_STATE= (`ssh ${account} 'sacct -S $starttime -n --jobs=$job_id --format=jobid%20,state%10 | grep -v "^\\s*[0-9\_]*\\." ' | awk \'{print \$1,\$2}\'`) ; + + chomp($REC_IS_STATE); - #$states = (`ssh ${account} 'sacct -n --format=state --jobs=$job_id'`) ;chomp($state); + my @REC_IS_STATE = split /^/, $REC_IS_STATE; + + + foreach my $i (0..$#REC_IS_STATE){ + chomp($REC_IS_STATE[$i]); + my ($key, $val) = split(/\s/,$REC_IS_STATE[$i],2); + $states->{$key} = $val if ($key); + } + }else{ &CJ::err("Unknown batch queueing system"); } - my @rec_states = split('\n',$REC_STATES); - my @rec_ids = split('\n',$REC_IDS); + +}elsif ($runflag =~ m/^rrun$/){ - foreach my $i (0..$#rec_ids){ - my $key = $rec_ids[$i]; - my $val = $rec_states[$i]; - $states->{$key} = $val; + + # SLURM ONLY + my $REC_IS_STATE= (`ssh ${account} 'sacct -S $starttime -n --jobs=$job_id --format=jobid%20,state%10 | grep -v "^\\s*[0-9\_]*\\." ' | awk \'{print \$1,\$2}\'`) ; + chomp($REC_IS_STATE); + + my @REC_IS_STATE = split /^/, $REC_IS_STATE; + + + foreach my $i (0..$#REC_IS_STATE){ + chomp($REC_IS_STATE[$i]); + my ($key, $val) = split(/\s/,$REC_IS_STATE[$i],2); + $states->{$key} = $val; } - + + }else{ my $state; if($bqs eq "SGE"){ $state = (`ssh ${account} 'qstat | grep $job_id' | awk \'{print \$5}\'`) ;chomp($state); }elsif($bqs eq "SLURM"){ - $state = (`ssh ${account} 'sacct | grep $job_id | grep -v "^[0-9]*\\." ' | awk \'{print \$6}\'`) ;chomp($state); + + $state = (`ssh ${account} 'sacct -S $starttime -n --jobs=$job_id | grep -v "^\\s*[0-9\_]*\\." ' | awk \'{print \$6}\'`) ;chomp($state); }else{ &CJ::err("Unknown batch queueing system"); } @@ -1284,7 +1477,7 @@ if ( $runflag =~ m/^par*/ ){ } my $key = $job_id; my $val = $state; - $states->{$key} = $val; + $states->{$key} = $val if ($key); } return $states; @@ -1348,14 +1541,10 @@ sub get_print_state my $runflag = $info->{'runflag'}; - - - - my $states = &CJ::get_state($pid,$num); my $size = scalar keys %$states; -if($size eq 1){ +if($runflag =~ m/^run$/){ my ($job_id) = keys %$states; my $state = $states->{$job_id};chomp($state); @@ -1366,6 +1555,46 @@ if($size eq 1){ print "state: $state\n"; +}elsif($runflag =~ m/^rrun$/){ + my @job_ids = keys %$states; + + + my @allocated; + my @not_allocated; + my $array_id; + foreach my $i (0..$#job_ids) + { + ($array_id, my $task) = split('_',$job_ids[$i],2);chomp($task); + $task =~ m/^\d+$/ ? push @allocated , $task : push @not_allocated , $task; + + } + + + my @sorted_counter = sort { $a <=> $b } @allocated if(@allocated); + + print "\n"; + print "\033[32mpid $info->{'pid'}\033[0m\n"; + print "remote_account: $account\n"; + + foreach my $i (0..$#sorted_counter) + { + my $key = "$array_id\_$sorted_counter[$i]"; + my $state = $states->{$key}; chomp($state); + $state =~ s/[^A-Za-z]//g; + printf "%-20s%-10s\n", $key, $state; + } + + foreach my $i (0..$#not_allocated) + { + my $key = "$array_id\_$not_allocated[$i]"; + my $state = $states->{$key}; chomp($state); + $state =~ s/[^A-Za-z]//g; + printf "%-20s%-10s\n", $key, $state; + } + + + print '-' x 35;print "\n"; + }else{ my @job_ids = split(',',$job_id); @@ -1384,19 +1613,19 @@ if($size eq 1){ } #$state = s/^\s+|\s+$/; $state =~ s/[^A-Za-z]//g; - print "$counter " . "$job_ids[$i] " . "$state" . "\n"; + printf "%-10i%-20s%-10s\n",$counter , $job_ids[$i],$state; } }elsif(&CJ::isnumeric($num) && $num <= $#job_ids+1){ print '-' x 50;print "\n"; print "\033[32mpid $info->{'pid'}\033[0m\n"; print "remote_account: $account\n"; - my $tmp = $num -1; + my $tmp = $num-1; my $val = $states->{$job_ids[$tmp]}; if (! $val){ $val = "unknwon"; } - print "$num " . "$job_ids[$tmp] " . "$val" . "\n"; - + printf "%-10i%-20s%-10s\n",$num , $job_ids[$tmp] ,$val; + }else{ my $lim =1+$#job_ids; @@ -1474,7 +1703,6 @@ sub grep_var_line sub add_record{ my ($info) = @_; - # Find the last number &CJ::add_to_history($info, $info->{date}, $info->{runflag}); &CJ::add_to_run_history($info); &CJ::add_to_pid_timestamp( { $info->{pid} => $info->{date}{epoch} } ); @@ -1483,69 +1711,287 @@ sub add_record{ } -sub host{ - my ($machine_name) = @_; - - my $ssh_config = {}; + + + + +sub show_cluster_config{ + my ($cluster) = @_; - my $lines; - open(my $FILE, $ssh_config_file) or die "could not open $ssh_config_file: $!"; - local $/ = undef; - $lines = <$FILE>; - close ($FILE); - - my $this_host ; - if($lines =~ /\[$machine_name\](.*?)\[$machine_name\]/isg) - { - $this_host = $1; + if (!defined $cluster || $cluster eq ""){ + my $cmd = "less $ssh_config_file 2>$CJlog_error"; + system($cmd); }else{ - &CJ::err(".ssh_config:: Machine $machine_name not found. "); + CJ::err("No such cluster found. add $cluster to ssh_config (you may use 'CJ config-update $cluster') .") if !is_valid_machine($cluster); + my $ssh_config_hashref = &CJ::read_ssh_config(); + my $fieldsize = 20; + while ( my ($key, $value) = each %{ $ssh_config_hashref->{$cluster} } ){ + printf "\n\033[32m%-${fieldsize}s\033[0m%s", $key, $value; + } + print "\n\n"; } - my ($user) = $this_host =~ /User[\t\s]*(.*)/;$user =~ s/^\s+|\s+$//g; - my ($host) = $this_host =~ /Host[\t\s]*(.*)/;$host =~ s/^\s+|\s+$//g; - my ($bqs) = $this_host =~ /Bqs[\t\s]*(.*)/ ;$bqs =~ s/^\s+|\s+$//g; - my ($remote_repo) = $this_host =~ /Repo[\t\s]*(.*)/ ;$remote_repo =~ s/^\s+|\s+$//g; - my ($remote_matlabpath) = $this_host =~ /MATlib[\t\s]*(.*)/;$remote_repo =~ s/^\s+|\s+$//g; - my $account = $user . "@" . $host; - - - $ssh_config->{'account'} = $account; - $ssh_config->{'bqs'} = $bqs; - $ssh_config->{'remote_repo'} = $remote_repo; - $ssh_config->{'matlib'} = $remote_matlabpath; - $ssh_config->{'user'} = $user; - return $ssh_config; + return 1; } -sub retrieve_package_info{ + + +sub cluster_config_template{ + # for sorting purposes + my @config_keys=('Host','User','Bqs','Repo','MAT','MATlib','Python','Pythonlib'); + my $cluster_config = { + 'Host' => {example=>'35.185.238.124', default=>undef}, + 'User' => {example=>$CJID, default=>undef}, + 'Bqs' => {example=>undef, default=>'SLURM'}, + 'Repo' => {example=>'/home/ubuntu/CJRepo_Remote',default=>undef}, + 'MAT' => {example=>undef,default=>'matlab/r2016b'}, + 'MATlib' => {example=>undef,default=>'CJinstalled/cvx:CJinstalled/mosek/7/toolbox/r2013a'}, + 'Python' => {example=>undef,default=>'python3.4'},, + 'Pythonlib' => {example=>undef,default=>'pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith'} + }; - my ($pids) = @_; - #### EVERY THING IS DONE LOCALLY NOW. - # From commit 87ec10b - - if(!$pids){ - $pids =`sed -n '1{p;q;}' $last_instance_file`; chomp($pids); - } - - - my $is_scalar = is_valid_pid($pids) ? 1 : 0; - $pids = [$pids] if $is_scalar; #change the single pid to be a array ref - - # Make sure all PIDs are valid - foreach my $pid (@{$pids}){ - &CJ::err("No valid PID detected") unless &CJ::is_valid_pid($pid); - } - - my $records = &CJ::read_record($pids); # pids can be a scalar or a array ref + return ($cluster_config,\@config_keys); +} - + +sub update_cluster_config{ + + my ($cluster, @keyval) = @_; + + + my $file_content = &CJ::readFile($ssh_config_file); + + # read the contents + + my %machine_hash = $file_content =~ /\[($cluster)\](.*?)\[\g{-2}\]/isg; + my $size = keys %machine_hash; + + if ($size lt 1){ + my $yesno = &CJ::yesno("machine $cluster is not found in ssh_config. Do you want to add it"); + if ($yesno){ + + my ($cluster_config,$config_keys) = &CJ::cluster_config_template(); + my $new_config = "\n[$cluster]\n"; + foreach my $key (@{$config_keys}){ + my $yesno = "no"; + my $new_value = undef; + while ( $yesno !~ m/y[\t\s]*|yes[\t\s]*/i ){ + my $prompt = defined($cluster_config->{$key}{default}) ? "enter '$key' (press Enter key for default value '$cluster_config->{$key}{default}'):":"Enter $key (e.g., $cluster_config->{$key}{example}):"; + my $default_entry = defined($cluster_config->{$key}) ? '':undef; + ($new_value, $yesno)=getuserinput($prompt, $default_entry); + + if ($new_value eq $default_entry){ + if (defined($cluster_config->{$key}{default}) ){ + $new_value = $cluster_config->{$key}{default} + }else{ + $yesno="no"; + } + } + } + $new_config .= "$key\t$new_value" . "\n"; + } + $new_config .= "[$cluster]\n"; + + $file_content .= $new_config ; + &CJ::writeFile($ssh_config_file, $file_content); + &CJ::message("added $cluster to ssh_config."); + &CJ::show_cluster_config($cluster); + return 1; + } + } + + #print $machine_hash{$cluster}; + my @lines = split '\n', $machine_hash{$cluster}; + + #print Dumper @lines; + + + #print Dumper $ssh_config; + + my $num_changes = 0; + + if ( not @keyval){ + + foreach my $i (0..$#lines){ + if ($lines[$i] !~ /^\s*$/){ + my ($old_key,$old_value) = split(/\s/, $lines[$i], 2); + $old_key =remove_white_space($old_key); + $old_value =remove_white_space($old_value); + my $yesno = "no"; + my $new_value = undef; + while ( $yesno !~ m/y[\t\s]*|yes[\t\s]*/i ){ + ($new_value, $yesno)=getuserinput("press Enter $old_key (Enter to keep $old_value):", ''); + } + if (not $new_value eq ''){ + $lines[$i] = "$old_key\t$new_value"; + $num_changes += 1; + } + } + } + + }else{ + # just update those keys that exists + + + #print Dumper @keyval; + my $new_keyval = {}; + foreach (@keyval){ + my ($new_key, $new_val) = split( /=|:/ , $_); + $new_keyval->{$new_key} = $new_val; + } + + + my %lc_new_keyval = map { lc $_ => { name => $_, value => $new_keyval->{$_} } + } keys %$new_keyval; + + foreach my $i (0..$#lines){ + if ($lines[$i] !~ /^\s*$/){ + my ($old_key,$old_value) = split(/\s/, $lines[$i], 2); + $old_key =remove_white_space($old_key); + $old_value =remove_white_space($old_value); + #print $key . " => " . $value . "\n"; + my $lc_old_key = lc $old_key; + + if ( exists $lc_new_keyval{$lc_old_key} ){ + $lines[$i] = "$lc_new_keyval{$lc_old_key}{name}\t$lc_new_keyval{$lc_old_key}{value}" ; + $num_changes += 1; + } + } + } + + } + + + if ($num_changes > 0 ){ + my $new_config = "[$cluster]"; + foreach (@lines){ + $new_config .= $_ . "\n"; + } + $new_config .= "[$cluster]"; + $file_content =~ s/\[$cluster\](.*?)\[$cluster\]/$new_config/isg ; + &CJ::writeFile($ssh_config_file, $file_content); + &CJ::message("updated ssh_config file with $num_changes changes."); + &CJ::show_cluster_config($cluster); + }else{ + &CJ::message("no change applied to ssh_config."); + } + +} + + + +sub read_ssh_config{ + + my $ssh_config = {}; + + my $file_content = &CJ::readFile($ssh_config_file); + + # read the contents + + my %machine_hash = $file_content =~ /\[([\w\-]+)\](.*?)\[\g{-2}\]/isg; + + foreach my $machine (keys %machine_hash){ + $ssh_config->{$machine} = &CJ::parse_ssh_config($machine_hash{$machine}); + } + return $ssh_config; +} + + + +sub host{ + my ($machine_name) = @_; + my $ssh_config_hashref = &CJ::read_ssh_config(); + &CJ::err(".ssh_config:: machine $machine_name not found. ") unless &CJ::check_hash($ssh_config_hashref, [$machine_name]) ; + return $ssh_config_hashref->{$machine_name}; +} + + + + + +sub parse_ssh_config{ + my ($this_machine_string) = @_; + + my $ssh_config = {}; + + my ($user) = $this_machine_string =~ /User[\t\s]*(.*)/i; + $user =remove_white_space($user); + + my ($host) = $this_machine_string =~ /Host[\t\s]*(.*)/i; + $host =remove_white_space($host); + + my ($bqs) = $this_machine_string =~ /Bqs[\t\s]*(.*)/i ; + $bqs =remove_white_space($bqs); + + my ($remote_repo) = $this_machine_string =~ /Repo[\t\s]*(.*)/i ; + $remote_repo = remove_white_space($remote_repo); + + my ($remote_matlab_lib) =$this_machine_string =~ /MATlib[\t\s]*(.*)/i; + $remote_matlab_lib =remove_white_space($remote_matlab_lib); + + my ($remote_matlab_module) = $this_machine_string =~ /\bMAT\b[\t\s]*(.*)/i; + $remote_matlab_module =remove_white_space($remote_matlab_module); + + my ($remote_python_lib) = $this_machine_string =~ /Pythonlib[\t\s]*(.*)/i; + $remote_python_lib =remove_white_space($remote_python_lib); + + my ($remote_python_module) = $this_machine_string =~ /\bPython\b[\t\s]*(.*)/i; + $remote_python_module =remove_white_space($remote_python_module); + + + + my $account = $user . "@" . $host; + + $ssh_config->{'user'} = $user; + $ssh_config->{'host'} = $host; + $ssh_config->{'account'} = $account; + $ssh_config->{'bqs'} = $bqs; + $ssh_config->{'remote_repo'} = $remote_repo; + $ssh_config->{'matlib'} = $remote_matlab_lib; + $ssh_config->{'mat'} = $remote_matlab_module; + $ssh_config->{'user'} = $user; + $ssh_config->{'py'} = $remote_python_module; + $ssh_config->{'pylib'} = $remote_python_lib; + + return $ssh_config; + +} + + + + + + + + +sub retrieve_package_info{ + + my ($pids) = @_; + #### EVERY THING IS DONE LOCALLY NOW. + # From commit 87ec10b + + if(!$pids){ + $pids =`sed -n '1{p;q;}' $last_instance_file`; chomp($pids); + } + + + my $is_scalar = is_valid_pid($pids) ? 1 : 0; + $pids = [$pids] if $is_scalar; #change the single pid to be a array ref + + # Make sure all PIDs are valid + foreach my $pid (@{$pids}){ + &CJ::err("No valid PID detected") unless &CJ::is_valid_pid($pid); + } + + my $records = &CJ::read_record($pids); # pids can be a scalar or a array ref + + my $info_hash; foreach my $pid ( @$pids ){ @@ -1671,9 +2117,12 @@ my ($gmt_offset_hour, $remainder_in_second) = (int($abs_offset/3600), $abs_offse my $offset = sprintf("%s%02d:%02d:%02d", $sign,$gmt_offset_hour,$gmt_offset_min,$remainder_in_second); my $datestr = sprintf ("%04d-%03s-%02d %02d:%02d:%02d \(GMT %s\)", $year, $month_abbr[$mon], $mday, $hour,$min, $sec, $offset); +my $numeric_month = sprintf ("%02d", 1+$mon); + my $date = { year => $t->year, month => $month_abbr[$mon], + numericmonth => 1+$mon, day => $mday, hour => $hour, min => $min, @@ -1686,6 +2135,28 @@ my $date = { return $date; } + + +##################### +sub is_valid_machine{ +##################### + my ($machine) = @_; + my $ssh_config_all = CJ::read_ssh_config(); + return &CJ::check_hash($ssh_config_all, [$machine]) ? 1:0; +} + + +##################### +sub is_valid_app{ +##################### + my ($app) = @_; + my $app_all = decode_json CJ::readFile($app_list_file); + my $lc_app = lc $app; + return (&CJ::check_hash($app_all, [$lc_app]) and $app_all->{$lc_app}->{'version'} ne "") ? 1:0; +} + + + # Check the package name given is valid sub is_valid_pid { @@ -1714,6 +2185,7 @@ if($bqs eq "SGE"){ $HEADER=<; +PROGRAM=""; +PID=""; +cd $DIR; + #mkdir scripts + #mkdir logs +SHELLSCRIPT=${DIR}/scripts/CJrun.${PID}.sh; +LOGFILE=${DIR}/logs/CJrun.${PID}.log; +MID + +my ($program_name,$ext)=remove_extension($program); + +$shell_neck =~ s||$pid|; +$shell_neck =~ s||$remote_path|; +if (&CJ::program_type($program) eq 'python') { +$shell_neck =~ s||$program_name|; +} else{ +$shell_neck =~ s||$program| ; +} + return $shell_neck; +} + + + + +##################################### +sub par_shell_neck{ +##################################### +my ($program,$pid,$counter,$remote_path) = @_; + +my $shell_neck; +$shell_neck = <<'MID'; +DIR=; +PROGRAM=""; +PID=""; +COUNTER=; +cd $DIR; + #mkdir scripts + #mkdir logs +SHELLSCRIPT=${DIR}/scripts/CJrun.${PID}.${COUNTER}.sh; +LOGFILE=${DIR}/logs/CJrun.${PID}.${COUNTER}.log; +MID + + +my ($program_name,$ext)=remove_extension($program); + +$shell_neck =~ s||$pid|; +$shell_neck =~ s||$counter|; +$shell_neck =~ s||$remote_path|; +if (&CJ::program_type($program) eq 'python') { + $shell_neck =~ s||$program_name|; +} else{ + $shell_neck =~ s||$program| ; +} + + + return $shell_neck; +} + + + + + + + + + + + + + + + # Check Numeric sub isnumeric { @@ -1761,16 +2377,60 @@ sub message{ } +sub yesno{ + my ($question,$noBegin) = @_; + my $prompt = $question . "?(Y/N)"; + CJ::message($prompt,$noBegin); + my $yesno = ; chomp($yesno); + exit 0 unless (lc($yesno) eq "y" or lc($yesno) eq "yes"); +} + + +sub getuserinput{ + my ($question,$default) = @_; + print $question; + my $user_input = ; + chomp($user_input); + $user_input = remove_white_space($user_input); + my $yesno; + if ( !defined($default) || not $user_input eq $default){ + print ' ' x 5 . "You have entered \'$user_input\'. Is this correct? (Y/N)"; + $yesno = ; chomp($yesno); + }else{ + $yesno = 'yes'; + } + return ($user_input, $yesno); +} + + + + + + + + + + + + + + + + + + sub my_system { my($cmd,$verbose) = @_; if($verbose){ - print("system: ",$cmd,"\n"); + &CJ::message("system:$cmd",1); system("$cmd"); }else{ - system("touch $CJlog") unless (-f $CJlog); - system("$cmd >> $CJlog 2>&1") ;#Error messages get sent to same place as standard output. + system("touch $CJlog_out") unless (-f $CJlog_out); + system("touch $CJlog_error") unless (-f $CJlog_error); + &CJ::writeFile($CJlog_out,"system: $cmd\n", "-a"); + system("$cmd >> $CJlog_out 2>$CJlog_error") ; } } @@ -1791,18 +2451,32 @@ sub touch sub writeFile { - # it should generate a bak up later! - my ($path, $contents) = @_; - open(FILE,">$path") or die "can't create file $path"; - print FILE $contents; - close FILE; + my ($path, $contents, $flag) = @_; + + if( -e "$path" ){ + #bak up + my $bak= "$path" . ".bak"; + my $cmd="cp $path $bak"; + system($cmd); + } + + my $fh; + open ( $fh , '>', "$path" ) or die "can't create file $path" if not defined($flag); + + if(defined($flag) && $flag eq '-a'){ + open( $fh ,'>>',"$path") or die "can't create file $path"; + } + + print $fh $contents; + close $fh ; } + sub readFile { my ($filepath) = @_; - + my $content; open(my $fh, '<', $filepath) or die "cannot open file $filepath"; { @@ -1824,9 +2498,17 @@ sub readFile +######################### +sub short_pid(){ +######################### + my ($pid) = @_; + return substr($pid,0,8); +} -sub add_to_history -{ + +########################## +sub add_to_history{ +########################## my ($info, $date, $flag) = @_; # create if it doesnt exist; &CJ::create_history_file(); @@ -1903,21 +2585,12 @@ if(lc($type) eq "clean"){ #say Dumper($info); }elsif(lc($type) eq "rerun"){ + $info->{rerun} = {} if (! $info->{'rerun'}); + $info->{'job_id'} = $change->{new_job_id}; #firt time calling rerun + #$info->{rerun}->{"$change->{date}->{epoch}"} = $change->{old_job_id}; + $info->{rerun}->{"$change->{date}->{epoch}"} = $change; - if($info->{'rerun'}){ - $info->{'job_id'} = $change->{new_job_id}; - $info->{rerun}->{"$change->{date}->{epoch}"} = $change->{old_job_id}; - - #say Dumper($info); - }else{ - #firt time calling rerun - $info->{'job_id'} = $change->{new_job_id}; - $info->{rerun} = {}; - $info->{rerun}->{"$change->{date}->{epoch}"} = $change->{old_job_id}; - #say Dumper($info); - } # -# }else{ &CJ::err("Change of type '$type' is not recognized"); } @@ -1933,7 +2606,8 @@ if(lc($type) eq "clean"){ sub update_record{ my ($pid,$new_info) = @_; my $new_record = encode_json($new_info); - my $cmd="sed -i '' 's|.*$pid.*|$new_record|' $run_history_file"; + #backup run history file with -i flag + my $cmd="sed -i '.bak' 's|.*$pid.*|$new_record|' $run_history_file"; &CJ::my_system($cmd,0); } @@ -1960,7 +2634,7 @@ sub read_record{ $regex .= join "|", @pids; $regex .= ")"; - #print $regex . "\n"; + #print $regex . "\n"; my $remaining = scalar @pids; my $record_hash; @@ -1968,8 +2642,14 @@ sub read_record{ my $i=$#records; while ($i ge 0 & $remaining gt 0 ) { my $record = $records[$i]; - #print $record . "\n"; - if ($record =~ m/$regex/){ + + # make sure that we don't pick up PIDs in messages + my $record_json = decode_json $record; + + #print Dumper $record_json->{pid} . "\n"; + + + if ( $record_json->{pid} =~ m/$regex/){ my $matched_pid = $1; $record_hash->{$matched_pid} = $record; # $1 is the captured PID # delete this PID from the array @@ -1990,7 +2670,16 @@ sub read_record{ } +sub submit_defaults { + my $submit_defaults={}; + + $submit_defaults->{mem} = "8G"; # default memeory + $submit_defaults->{runtime} = "48:00:00"; # default memeory + $submit_defaults->{numberTasks} = 1 ; # default value for number of task + + return $submit_defaults; +} sub read_qsub{ my ($qsub_file) = @_; @@ -1998,17 +2687,30 @@ sub read_qsub{ open my $FILE, '<', $qsub_file or CJ::err("Job submission failed. Try --verbose for error explanation."); my @job_ids; +my @errors; while(<$FILE>){ my $job_id_info = $_;chomp($job_id_info); - my ($this_job_id) = $job_id_info =~/(\d+)/; # get the first string of integer, i.e., job_id - push @job_ids, $this_job_id; + push @errors, $job_id_info if ($job_id_info =~ m/.*[eE]rror.*/ ); + my ($this_job_id) = $job_id_info =~/job\D*(\d+)/i; # get the first string of integer, i.e., job_id + push @job_ids, $this_job_id unless !defined($this_job_id); + } close $FILE; -return \@job_ids; +return (\@job_ids,\@errors); } + + + + +sub remove_white_space +{ + my ($string) = @_; + $string =~ s/^\s+|\s+$//g unless not defined($string); + return $string; +} sub remove_extension { my ($program) = @_; @@ -2022,6 +2724,30 @@ sub remove_extension } +sub program_type +{ + my ($program) = @_; + + my ($program_name,$ext) = &CJ::remove_extension($program); + + my $type; + if(lc($ext) eq "m"){ + $type = "matlab"; + }elsif(lc($ext) eq "r"){ + $type = "R"; + }elsif(lc($ext) eq "py"){ + $type = "python"; + }else{ + CJ::err("Code type .$ext is not recognized $!"); + } + + return $type; +} + + + + + sub reexecute_cmd{ my ($cmd_num,$verbose) = @_; if (!$cmd_num){ @@ -2124,7 +2850,6 @@ sub create_history_file{ if( ! -f $history_file ){ &CJ::touch($history_file); - #my $header = sprintf("%-15s%-15s%-21s%-10s%-15s%-20s%-30s", "count", "date", "pid", "action", "machine", "job_id", "message"); my $header = sprintf("%-15s%-15s%-15s%-10s%-15s%-40s", "count", "date", "pid", "action", "machine","message"); @@ -2158,6 +2883,219 @@ sub create_pid_timestamp_file{ sub create_run_history_file{ &CJ::touch($run_history_file) unless ( -f $run_history_file); } + + + +sub create_ssh_config_md5{ + + &CJ::touch($ssh_config_md5) unless ( -f $ssh_config_md5); + + # Keep track of file changes for next time + if( -f $ssh_config_file ){ + ssh_config_md5('update') + } +} + + + + +sub ssh_config_md5{ + my ($mode) = @_; + + if ( $mode eq 'update' ){ + &CJ::message("updating CJ_python_venv",1); + my $cmd = `md5 $ssh_config_file > $ssh_config_md5`; + return 1; + }elsif($mode eq 'check'){ + # check whether things are modified + my $cmd = `grep \"\$(md5 $ssh_config_file)\" $ssh_config_md5 || echo 1`;chomp($cmd); # find or else exit 1. + return ($cmd eq "1") ? 1:0; + } + +} + + + + +sub install_software{ + + my ($app, $machine, $force_tag, $q_yesno) = @_; + #set the default to 1 + $q_yesno = defined($q_yesno) ? $q_yesno : 1; + + my $lc_app = lc($app); + # Sanity checks + &CJ::err('Incorrect specification \'install \'.') if ($machine =~ /^\s*$/ || $app =~ /^\s*$/); + &CJ::err("Application <$app> is not available.") unless &CJ::is_valid_app($app); + &CJ::err("Machine <$machine> is not valid.") unless &CJ::is_valid_machine($machine); + &CJ::yesno("Are you sure you would like to install '$lc_app' on '$machine'") if ($q_yesno eq 1); + + + &CJ::message("Installing $app on $machine."); + + my $installObj = CJ::Install->new($app,$machine,undef); + $installObj->anaconda($force_tag) if $lc_app eq 'anaconda'; + $installObj->miniconda($force_tag) if $lc_app eq 'miniconda'; + $installObj->cvx($force_tag) if $lc_app eq 'cvx'; + $installObj->composer($force_tag) if $lc_app eq 'composer'; + +} + + + + + + +sub CodeObj{ + +my ($path,$program,$dep_folder) = @_; + + $dep_folder ||= ''; # default +my $program_type = &CJ::program_type($program); + +my $code; +if($program_type eq 'matlab'){ + $code = CJ::Matlab->new($path,$program,$dep_folder); +}elsif($program_type eq 'r'){ + $code = CJ::R->new($path,$program,$dep_folder); +}elsif($program_type eq 'python'){ + $code = CJ::Python->new($path,$program,$dep_folder); +}else{ + CJ::err("ProgramType $program_type is not recognized.$!") +} + return $code; +} + + + + + +sub getExtension{ + my ($filename) = @_; + #print "$filename\n"; + + my ($ext) = $filename =~ /\.([^.]+)$/; + return $ext; +} + + +sub connect2cluster{ + my ($machine, $verbose) = @_; + my $ssh = &CJ::host($machine); + my $cmd = "ssh $ssh->{account}"; + &CJ::message("system:$cmd",1) if $verbose; + system("$cmd"); + return 1; +} + + + + + +sub avail{ + my ($tag) = @_; + + if( $tag =~ /^machine[s]?$|^cluster[s]?$/i ){ + my $ssh_config_hashref = &CJ::read_ssh_config(); + + # find max size of strings + my @length; + for (keys %{$ssh_config_hashref} ){ + push @length, length($_); + } + my $fieldsize = &CJ::max(@length) + 4; + + #print + foreach my $machine ( keys %{$ssh_config_hashref}){ + my $account = $ssh_config_hashref->{$machine}->{'account'}; + printf "\n\033[32m%-${fieldsize}s\033[0m%s", $machine, $account; + } + print "\n\n"; + + }elsif($tag =~ /^app[s]?$/) { + # read the .app_list + my $app_all = decode_json CJ::readFile($app_list_file); + + # find max size of app name + my @length_0; + my @length_1; + + for (keys %{$app_all} ){ + push @length_0, length($_); + push @length_1, length($app_all->{$_}->{'version'}); + } + my $fieldsize_0 = &CJ::max(@length_0) + 4; + my $fieldsize_1 = &CJ::max(@length_1) + 4; + + + #print + for (keys %{$app_all} ){ + my $version = $app_all->{$_}->{'version'}; + my $space = $app_all->{$_}->{'space'}; + my $time = $app_all->{$_}->{'install_time'}; + printf "\n\033[32m%-${fieldsize_0}s\033[0m%-${fieldsize_1}s%-10s%s", $_, $version, $space, $time unless $version eq ""; + } + print "\n\n"; + + + }else{ + &CJ::err("unknown tag $tag"); + } + + + + exit 0; +} + + + +sub max { + my (@vars) = @_; + + my $max = shift @vars; + + for (@vars) { + $max = $_ if $_ > $max; + } + + return $max; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/CJ/CJVars.pm b/src/CJ/CJVars.pm index e8e6ecf..2bd3a0b 100644 --- a/src/CJ/CJVars.pm +++ b/src/CJ/CJVars.pm @@ -35,11 +35,15 @@ our $install_dir = join '/', @CJ_dir_array; our $info_dir = "$install_dir/.info"; our $HOME = $ENV{"HOME"}; -our $localPrefix = "$HOME/RunRepo_local/"; -our $savePrefix = "$HOME/Dropbox/clusterjob_saveRepo/"; +our $localPrefix = "$HOME/CJRepo_Local/"; +our $savePrefix = "$HOME/CJRepo_Save/"; our $last_instance_file = "$install_dir/.info/last_instance.info"; -our $CJlog = "$install_dir/.info/CJcall.log"; + +our $CJlog_dir = "$install_dir/CJlog"; +our $CJlog_out = "$CJlog_dir/call.log"; +our $CJlog_error = "$CJlog_dir/errors.log"; + our $AgentIDPATH = "$install_dir/.info/agent_id"; # The UUID of installation our $get_tmp_dir = "$HOME/CJ_get_tmp"; @@ -52,7 +56,10 @@ our $lastSync_file = "$info_dir/last_sync"; our $save_info_file = "$info_dir/save.info"; our $ssh_config_file = "$install_dir/ssh_config"; our $remote_config_file = "$install_dir/cj_config"; -our $firebase_name = "clusterjob-78552"; +our $firebase_name = "clusterjob-78552"; +our $app_list_file = "$src_dir/.app_list"; +our $ssh_config_md5 = "$install_dir/.ssh_config.md5"; + # Read AgentID our $AgentID= undef; @@ -97,7 +104,7 @@ if($KEY){ # Export global variables -our @EXPORT = qw( $lastSync_file $local_push_timestamp_file $pid_timestamp_file $firebase_name $AgentIDPATH $AgentID $CJID $CJKEY $info_dir $src_dir $install_dir $remotePrefix $localPrefix $savePrefix $last_instance_file $get_tmp_dir $history_file $cmd_history_file $run_history_file $save_info_file $ssh_config_file $remote_config_file $CJlog $localIP $localHostName $localUserName); +our @EXPORT = qw( $lastSync_file $local_push_timestamp_file $pid_timestamp_file $firebase_name $AgentIDPATH $AgentID $CJID $CJKEY $info_dir $src_dir $install_dir $localPrefix $savePrefix $last_instance_file $get_tmp_dir $history_file $cmd_history_file $run_history_file $save_info_file $ssh_config_file $remote_config_file $CJerrorlog $CJlog_dir $CJlog_out $CJlog_error $localIP $localUserName $app_list_file $ssh_config_md5); diff --git a/src/CJ/Get.pm b/src/CJ/Get.pm index caaebed..2e754e3 100644 --- a/src/CJ/Get.pm +++ b/src/CJ/Get.pm @@ -6,6 +6,7 @@ use strict; use warnings; use CJ; use CJ::CJVars; +use Data::Dumper; @@ -53,6 +54,12 @@ sub gather_results{ my $program = $info->{'program'}; + + # Check Connection; + &CJ::CheckConnection($machine); + + + # gather IS ONLY FOR PARRUN if(! $runflag =~ m/^par*/){ CJ::err("GATHER must be called for a 'parrun' package. Please use GET instead."); @@ -124,19 +131,19 @@ GATHER &CJ::message("Gathering $pattern in $dir_name..."); - $cmd = "ssh $account 'cd $remote_path; bash -l $gather_name 2> cj_gather.out'"; + $cmd = "ssh $account 'cd $remote_path; bash -l $gather_name 2> cj_gather.stderr'"; &CJ::my_system($cmd,1); # Get the feedback - $cmd = "scp $account:$remote_path/cj_gather.out /tmp/"; + $cmd = "scp $account:$remote_path/cj_gather.stderr /tmp/"; &CJ::my_system($cmd,$verbose); my $short_pid = substr($info->{'pid'},0,8); - if ( ! -s "/tmp/cj_gather.out" ){ + if ( -z "/tmp/cj_gather.stderr" ){ &CJ::message("Gathering results done! Please use \"CJ get $short_pid \" to get your results."); }else{ - my $error = `cat "/tmp/cj_gather.out"`; + my $error = `cat "/tmp/cj_gather.stderr"`; &CJ::err("$error"); } @@ -149,13 +156,39 @@ GATHER - - sub reduce_results{ - my ($pid,$res_filename,$verbose, $text_header_lines) = @_; + my ($pids,$res_filename,$verbose, $text_header_lines, $force_tag) = @_; + + my $yesno = undef; # wether they need to reduce with qsub (for big reduction) or not; + + if(! @$pids){ + # just the last instance + my $info = &CJ::retrieve_package_info(); + my $pid = $info->{'pid'}; + CJ::message("$pid"); + + reduce_one_pid($pid,$res_filename,$verbose, $text_header_lines,$force_tag,$yesno); + }else{ + # here $pids is a reference to an array + foreach my $pid (@$pids){ + CJ::message("$pid"); + $yesno = reduce_one_pid($pid,$res_filename,$verbose, $text_header_lines,$force_tag,$yesno); + + } + } + +} + + +########################### +sub reduce_one_pid{ +########################### + my ($pid,$res_filename,$verbose, $text_header_lines,$force_tag,$yesno) = @_; + + my $info; if( (!defined $pid) || ($pid eq "") ){ #read last_instance.info; @@ -188,16 +221,34 @@ sub reduce_results{ my $job_id = $info->{'job_id'}; my $program = $info->{'program'}; - # REDUCE IS ONLY FOR PARRUN - if(! $runflag =~ m/^par*/){ + + + # Check Connection; + &CJ::CheckConnection($machine); + + + + # REDUCE IS ONLY FOR PARRUN + if(! $runflag =~ m/^par*/){ CJ::err("REDUCE must be called for a 'parrun' package. Please use GET instead."); - } + } + + # Check that job has been actually submitted. + my @job_ids = split(',', $job_id); + my $num_res = 1+$#job_ids; + my $short_pid = &CJ::short_pid($pid); + + if ( $num_res < 1 ){ + CJ::message(" Nothing to reduce. no job id found. try 'cj rerun $short_pid' to resubmit this PID."); + exit 0; + } + # Get current remote directory from .ssh_config - # user might wanna rename, copy to anothet place, + # user might wanna rename, copy to another place, # etc. We consider the latest one , and if the # saved remote is different, we issue a warning # for the user. @@ -223,6 +274,18 @@ sub reduce_results{ &CJ::err("The result filename must be provided for Reduce with parrun packages, eg, 'clusterjob reduce Results.mat' "); } + + + # clear everything if force + if($force_tag == 1){ + ### You need to delete all the previous files generated by reduce in PID top directory. + my $cmd = "ssh $account 'rm -f $remote_path/$res_filename $remote_path/*.cjr'"; + &CJ::my_system($cmd,$verbose); + } + + + + my $check_runs = &CJ::Get::make_parrun_check_script($info,$res_filename); my $check_name = "check_complete.sh"; my $check_path = "/tmp/$check_name"; @@ -232,69 +295,67 @@ sub reduce_results{ my $cmd = "rsync $check_path $account:$remote_path/;ssh $account 'source ~/.bashrc;cd $remote_path; bash $check_name'"; &CJ::my_system($cmd,$verbose); # Run a script to gather all files of the same name. - my $completed_filename = "completed_list.txt"; - my $remaining_filename = "remaining_list.txt"; - - my $ext = lc(getExtension($res_filename)); + my $completed_filename = "completed_list.cjr"; + my $remaining_filename = "remaining_list.cjr"; + my $ext = lc(&CJ::getExtension($res_filename)); + #print "$res_filename\n"; my $collect_bash_script; if( $ext =~ m/mat/){ - $collect_bash_script = &CJ::Matlab::make_MAT_collect_script($res_filename, $completed_filename,$bqs); - + $collect_bash_script = &CJ::Get::make_MAT_collect_script($res_filename, $completed_filename,$bqs,$ssh); }elsif ($ext =~ m/txt|csv/){ - $collect_bash_script = &CJ::Get::make_TEXT_collect_script($res_filename, $remaining_filename,$completed_filename,$bqs, $text_header_lines); + $collect_bash_script = &CJ::Get::make_TEXT_collect_script($res_filename,$remaining_filename,$completed_filename,$bqs, $text_header_lines); }else{ &CJ::err("File extension not recognized"); } #print "$collect_bash_script"; - - my $CJ_reduce_matlab = "$install_dir/CJ/CJ_reduce.m"; + + my $CJ_reduce_matlab = "$src_dir/CJ/CJ_reduce.m"; my $collect_name = "cj_collect.sh"; my $collect_bash_path = "/tmp/$collect_name"; &CJ::writeFile($collect_bash_path,$collect_bash_script); $cmd = "scp $collect_bash_path $CJ_reduce_matlab $account:$remote_path/"; &CJ::my_system($cmd,$verbose); - - - - - - - my $short_pid=substr($info->{'pid'},0,8); - - &CJ::message("Reducing results..."); + &CJ::message("Reducing $res_filename"); if($bqs eq "SLURM"){ - - CJ::message("Do you want to submit the reduce script to the queue via srun?(recommneded for big jobs) Y/N?"); - my $input = ; chomp($input); - if(lc($input) eq "y" or lc($input) eq "yes"){ + + if(not defined($yesno) ){ + CJ::message("Do you want to submit the reduce script to the queue via srun? (recommneded for big jobs) Y/N?"); + $yesno = ; chomp($yesno); + } + + + if(lc($yesno) eq "y" or lc($yesno) eq "yes"){ &CJ::message("Reducing results..."); my $cmd = "ssh $account 'cd $remote_path; srun bash -l $collect_name'"; #my $cmd = "ssh $account 'cd $remote_path; qsub $collect_name'"; &CJ::my_system($cmd,1); - &CJ::message("Reducing results done! Please use \"CJ get $short_pid \" to get your results."); + &CJ::message("Reducing results done! Use \"CJ get $short_pid \" to get your results."); - }elsif(lc($input) eq "n" or lc($input) eq "no"){ + }elsif(lc($yesno) eq "n" or lc($yesno) eq "no"){ my $cmd = "ssh $account 'cd $remote_path; bash -l $collect_name'"; &CJ::my_system($cmd,1); - &CJ::message("Reducing results done! Please use \"CJ get $short_pid \" to get your results."); + &CJ::message("Reducing results done! Use \"CJ get $short_pid \" to get your results."); }else{ &CJ::message("Reduce Canceled!"); exit 0; } }else{ - my $cmd = "ssh $account 'cd $remote_path; bash -l $collect_name'"; - &CJ::my_system($cmd,1); - &CJ::message("Reducing results done! Please use \"CJ get $short_pid \" to get your results."); + my $cmd = "ssh $account 'cd $remote_path; bash -l $collect_name'"; + &CJ::my_system($cmd,1); + &CJ::message("Reducing results done! Please use \"CJ get $short_pid \" to get your results."); + } - + +return $yesno; + } @@ -347,7 +408,9 @@ sub get_results{ my $program = $info->{'program'}; - + # Check Connection; + &CJ::CheckConnection($machine); + # Get current remote directory from .ssh_config @@ -412,53 +475,27 @@ sub get_results{ - - - - - - -sub getExtension{ - my ($filename) = @_; - #print "$filename\n"; - - my ($ext) = $filename =~ /\.([^.]+)$/; - return $ext; -} - - - - - - - - - - - - - - sub make_parrun_check_script{ my ($info,$res_filename) = @_; -my $machine = $info->{'machine'}; -my $pid = $info->{'pid'}; -my $account = $info->{'account'}; +my $machine = $info->{'machine'}; +my $pid = $info->{'pid'}; +my $account = $info->{'account'}; my $remote_path = $info->{'remote_path'}; -my $runflag = $info->{'runflag'}; -my $bqs = $info->{'bqs'}; -my $job_id = $info->{'job_id'}; -my $program = $info->{'program'}; - -my $collect_filename = "collect_list.txt"; -my $alljob_filename = "job_list.txt"; -my $remaining_filename = "remaining_list.txt"; -my $completed_filename = "completed_list.txt"; +my $runflag = $info->{'runflag'}; +my $bqs = $info->{'bqs'}; +my $job_id = $info->{'job_id'}; +my $program = $info->{'program'}; + +my $collect_filename = "collect_list.cjr"; +my $alljob_filename = "job_list.cjr"; +my $remaining_filename = "remaining_list.cjr"; +my $completed_filename = "completed_list.cjr"; #find the number of folders with results in it my @job_ids = split(',', $job_id); my $num_res = 1+$#job_ids; + # header for bqs's my $HEADER = &CJ::bash_header($bqs); # check which jobs are done. @@ -492,10 +529,9 @@ done TEXT - + ### IMPROVE THIS LATER: COMPLETED JOBS MUST BE CHECK BY STATUS OF THE JOB NOT BY THE PRESENCE OF THE RESULTS. + ### RESULTS FILE MIGHT BE EXTENDING OVER TIME. return $check_runs; - - } @@ -509,11 +545,7 @@ sub make_TEXT_collect_script { my ($res_filename, $remaining_filename, $completed_filename, $bqs, $text_header_lines) = @_; - - - - my $collect_filename = "collect_list.txt"; - + my $collect_filename = "collect_list.cjr"; my $num_header_lines; if(defined($text_header_lines)){ @@ -529,9 +561,10 @@ my $HEADER = &CJ::bash_header($bqs); my $text_collect_script=< "$res_filename"; # Pop the first line of remaining_list and add it to collect_list @@ -584,10 +617,10 @@ else # Remove header-lines! startline=\$(($num_header_lines+1)); - sed -n "\$startline,\\\$p" < "\$COUNTER/$res_filename" >> "$res_filename"; #simply append (no header modification yet) + sed -n "\$startline,\\\$p" < "\$COUNTER/$res_filename" >> "$res_filename"; #simply append # Pop the first line of remaining_list and append it to collect_list -#sed -i '1d' $completed_filename + #sed -i '1d' $completed_filename if [ -f $collect_filename ];then echo \$COUNTER >> $collect_filename else @@ -626,4 +659,120 @@ BASH -1; \ No newline at end of file +############################# +sub make_MAT_collect_script{ +############################# + +my ($res_filename, $completed_filename, $bqs, $ssh) = @_; + +my $collect_filename = "collect_list.cjr"; + +my $matlab_collect_script=< +unset _JAVA_OPTIONS +matlab -nosplash -nodisplay <|$ssh->{mat}|; + +return $script; +} + + + + +1; diff --git a/src/CJ/Install.pm b/src/CJ/Install.pm new file mode 100644 index 0000000..60bfeda --- /dev/null +++ b/src/CJ/Install.pm @@ -0,0 +1,467 @@ +package CJ::Install; +# This class takes care of Installation +# Copyright 2017 Hatef Monajemi (monajemi@stanford.edu) + +use strict; +use warnings; +use CJ; +use CJ::CJVars; +use Data::Dumper; +use feature 'say'; + + + +#################### +# class constructor +sub new { +#################### + my $class= shift; + my ($app,$machine,$path) = @_; + + $path //= "CJinstalled"; #SOME_DEFAULT # This path relative to ~/ + + my $self = bless { + app => $app, + machine => $machine, + path => $path + }, $class; + return $self; +} + + + + +sub composer{ + my $self = shift; + my($force_tag) = @_; + +my $distro="https://composer.github.io/installer.sig"; +my $composer = "composer-setup.php"; +my $installer = "https://getcomposer.org/installer"; +my $installpath = "\$HOME/$self->{path}/PHP/composer"; + +# ------------------- +my $install_bash_script =<<'BASH'; + + # INSTALL PHP if not installed + if [ -n "$(which php)" ] ; then + sudo apt-get update + sudo apt-get install php + fi + + + if [ -n "$(which composer)" ] ; then + echo "composer is already installed in $(which composer)"; + exit 0; + elif [ -n "$(command -v composer)" ] ; then + echo "composer is already installed in $(command -v composer)"; + exit 0; + else + START=`date +%s` + + + echo "GETTING composer from "; + if [ -f ]; then rm -f ; fi; + EXPECTED_SIGNATURE=$(wget -q -O - "") + + echo "INSTALLING composer"; + if [ -d ]; then + printf "ERROR: directory exists. Aborting install. \ + \nYou may use 'cj install -f ...' to remove this directory for a fresh install\n"; + exit 1; + else + mkdir -p + fi + + php -r "copy('', '');" + ACTUAL_SIGNATURE=$(php -r "echo hash_file('SHA384', '');") + + + if [ "$EXPECTED_SIGNATURE" != "$ACTUAL_SIGNATURE" ];then + printf "ERROR: Invalid installer signature" + rm + exit 1 + fi + + php composer-setup.php --install-dir= --filename=composer + rm + echo 'export PATH=":$PATH" ' >> $HOME/.bashrc + echo 'export PATH=":$PATH" ' >> $HOME/.bash_profile + + + if [ -f "$HOME/.bashrc" ]; then source $HOME/.bashrc; fi + if [ -f "$HOME/.bash_profile" ] ; then source $HOME/.bash_profile; fi + + composer self-update + if [ $? -eq 0 ]; then + END=`date +%s`; + RUNTIME=$((END-START)); + echo "INSTALL SUCCESSFUL ($RUNTIME seconds)" + exit 0; + else + echo "****INSTALL FAILED***** $? " + exit 1 + fi + + fi + + +BASH + +$install_bash_script =~ s||$distro|g; +$install_bash_script =~ s||$installer|g; +$install_bash_script =~ s||$composer|g; +$install_bash_script =~ s||$installpath|g; + + +#--------------------- +my $ssh = CJ::host($self->{'machine'}); + + +# if forced clear the previous installation if any +if($force_tag == 1){ + &CJ::message("(forced) removing prior installation of $self->{app} in $installpath"); + my $cmd = "ssh $ssh->{account} 'rm -rf $installpath' "; + &CJ::my_system($cmd,0); +} + + + + +my $filename = "CJ_install_". $self->{app} . ".sh"; +my $filepath = "/tmp/$filename"; +&CJ::writeFile($filepath, $install_bash_script); +my $cmd = "scp $filepath $ssh->{account}:."; +&CJ::my_system($cmd,1); + +&CJ::message("----- START BASH ON $self->{'machine'}-----",1); +$cmd = "ssh $ssh->{account} 'cd \$HOME && bash -l $filename' "; +system($cmd); + +$cmd = "ssh $ssh->{account} 'if [ -d \$HOME/$self->{path} ] ; then mv \$HOME/$filename \$HOME/$self->{path}/; fi' "; +system($cmd); + +&CJ::message("----- END BASH ON $self->{'machine'}-----",1); + +return 1; + +} + + + +sub miniconda{ + +my $self = shift; +my ($force_tag) = @_; + + +my $miniconda = "Miniconda3-latest-Linux-x86_64"; +my $distro = "https://repo.continuum.io/miniconda/${miniconda}.sh"; +my $installpath = "\$HOME/$self->{path}/miniconda"; + + +# ------------------- +my $install_bash_script =<<'BASH'; +#!/bin/bash -l + +#module load anaconda + +if [ -n "$(which conda)" ]; then +echo "conda is already installed in $(which conda)"; +exit 0; + +else + START=`date +%s` + + echo "GETTING miniconda from "; + if [ -f .sh ]; then rm -f .sh; fi; + wget "" + + echo "INSTALLING Miniconda"; + if [ -d ]; then + printf "ERROR: directory exists. Aborting install. \ + \nYou may use 'cj install -f ...' to remove this directory for a fresh install\n"; + exit 1; + fi + + bash .sh -b -p ; + + rm .sh + echo 'export PATH="/bin:$PATH" ' >> $HOME/.bashrc + echo 'export PATH="/bin:$PATH" ' >> $HOME/.bash_profile + + + if [ -f "$HOME/.bashrc" ]; then source $HOME/.bashrc; fi + if [ -f "$HOME/.bash_profile" ] ; then source $HOME/.bash_profile; fi + + conda update --yes conda + + if [ $? -eq 0 ]; then + END=`date +%s`; + RUNTIME=$((END-START)); + echo "INSTALL SUCCESSFUL ($RUNTIME seconds)" + exit 0; + else + echo "****INSTALL FAILED***** $? " + exit 1 + fi + +fi + +BASH + +$install_bash_script =~ s||$distro|g; +$install_bash_script =~ s||$miniconda|g; +$install_bash_script =~ s||$installpath|g; +# ----------------- + + +my $ssh = CJ::host($self->{'machine'}); + + +# if forced clear the previous installation if any +if($force_tag == 1){ + &CJ::message("(forced) removing prior installation of miniconda in $installpath"); + my $cmd = "ssh $ssh->{account} 'rm -rf $installpath' "; + &CJ::my_system($cmd,0); +} + + + + +my $filename = "CJ_install_miniconda.sh"; +my $filepath = "/tmp/$filename"; +&CJ::writeFile($filepath, $install_bash_script); +my $cmd = "scp $filepath $ssh->{account}:."; +&CJ::my_system($cmd,0); + + +&CJ::message("----- START BASH ON $self->{'machine'}-----",1); +$cmd = "ssh $ssh->{account} 'cd \$HOME && bash -l CJ_install_miniconda.sh' "; +system($cmd); + +$cmd = "ssh $ssh->{account} 'if [ -d \$HOME/$self->{path} ] ; then mv \$HOME/CJ_install_miniconda.sh \$HOME/$self->{path}/; fi' "; +system($cmd); + + +&CJ::message("----- END BASH ON $self->{'machine'}-----",1); + + return 1; +} + + + + +################ +sub anaconda{ + my $self = shift; + my ($force_tag) = @_; + +my $anaconda = "Anaconda3-4.4.0-Linux-x86_64"; +my $distro = "https://repo.continuum.io/archive/${anaconda}.sh"; +my $installpath = "\$HOME/$self->{path}/anaconda"; + + +# ------------------- +my $install_bash_script =<<'BASH'; + +#module load anaconda + +if [ -n "$(which conda)" ]; then + + echo "Anaconda is already installed in $(which conda)"; + exit 0; + +else + START=`date +%s` + + echo "GETTING anaconda from "; + if [ -f .sh ]; then rm -f .sh; fi; + wget "" + + echo "INSTALLING anaconda"; + if [ -d ]; then + printf "ERROR: directory exists. Aborting install. \ + \nYou may use 'cj install -f ...' to remove this directory for a fresh install\n"; + exit 1; + fi + + bash .sh -b -p ; + + rm .sh + echo 'export PATH="/bin:$PATH" ' >> $HOME/.bashrc + source $HOME/.bashrc + yes | conda update conda + + if [ $? -eq 0 ]; then + END=`date +%s`; + RUNTIME=$((END-START)); + echo "INSTALL SUCCESSFUL ($RUNTIME seconds)" + exit 0; + else + echo "****INSTALL FAILED*****"; + exit 1; + fi + +fi + + + +BASH + +$install_bash_script =~ s||$distro|g; +$install_bash_script =~ s||$anaconda|g; +$install_bash_script =~ s||$installpath|g; +# ----------------- + + +my $ssh = CJ::host($self->{'machine'}); + + + +# if forced clear the previous installation if any + if($force_tag == 1){ + &CJ::message("(forced) removing prior installation of anaconda in $installpath"); + my $cmd = "ssh $ssh->{account} 'rm -rf $installpath' "; + &CJ::my_system($cmd,0); + } + + + + +my $filename = "CJ_install_anaconda.sh"; +my $filepath = "/tmp/$filename"; +&CJ::writeFile($filepath, $install_bash_script); +my $cmd = "scp $filepath $ssh->{account}:."; +&CJ::my_system($cmd,0); + + +&CJ::message("----- START BASH ON $self->{'machine'}-----",1); +$cmd = "ssh $ssh->{account} 'cd \$HOME && bash -l CJ_install_anaconda.sh' "; +system($cmd); + +$cmd = "ssh $ssh->{account} 'if [ -d \$HOME/$self->{path} ] ; then mv \$HOME/CJ_install_anaconda.sh \$HOME/$self->{path}/; fi' "; +system($cmd); + + +&CJ::message("----- END BASH ON $self->{'machine'}-----",1); + + return 1; +} + + + +################### +sub cvx { +################### + + my $self = shift; + my ($force_tag) = @_; + +my $cvx = "cvx-rd"; +my $distro = "http://web.cvxr.com/cvx/${cvx}.tar.gz"; +my $installpath = "\$HOME/$self->{path}"; + + +# ------------------- +my $install_bash_script =<<'BASH'; + +START=`date +%s` + +echo "GETTING CVX from "; +if [ -f .tar.gz ]; then rm -f .tar.gz; fi; +wget "" + +echo "INSTALLING in /cvx"; +if [ -d "/cvx" ]; then +printf "ERROR: directory /cvx exists. Aborting install. \ +\nYou may use 'cj install -f ...' to remove this directory for a fresh install\n"; +exit 1; +fi + +if [ ! -d "" ] ; then + mkdir ; +fi + +cp .tar.gz /. +cd +tar -xzvf .tar.gz +rm -f .tar.gz + + END=`date +%s`; + RUNTIME=$((END-START)); + echo "INSTALL SUCCESSFUL ($RUNTIME seconds)" + exit 0; + +BASH + +$install_bash_script =~ s||$distro|g; +$install_bash_script =~ s||$cvx|g; +$install_bash_script =~ s||$installpath|g; +# ----------------- + + +my $ssh = CJ::host($self->{'machine'}); + + + +# if forced clear the previous installation if any +if($force_tag == 1){ + &CJ::message("(forced) removing prior installation of cvx in $installpath"); + my $cmd = "ssh $ssh->{account} 'rm -rf $installpath' "; + &CJ::my_system($cmd,0); +} + + + + +my $filename = "CJ_install_cvx.sh"; +my $filepath = "/tmp/$filename"; +&CJ::writeFile($filepath, $install_bash_script); +my $cmd = "scp $filepath $ssh->{account}:."; +&CJ::my_system($cmd,0); + + +&CJ::message("----- START BASH ON $self->{'machine'}-----",1); +$cmd = "ssh $ssh->{account} 'cd \$HOME && bash -l CJ_install_cvx.sh' "; +system($cmd); + +$cmd = "ssh $ssh->{account} 'if [ -d \$HOME/$self->{path} ] ; then mv \$HOME/CJ_install_cvx.sh \$HOME/$self->{path}/; fi' "; +system($cmd); + + +&CJ::message("----- END BASH ON $self->{'machine'}-----",1); + +return 1; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1; diff --git a/src/CJ/Matlab.pm b/src/CJ/Matlab.pm index 59b2579..9e9906b 100644 --- a/src/CJ/Matlab.pm +++ b/src/CJ/Matlab.pm @@ -14,11 +14,12 @@ use feature 'say'; # class constructor sub new { my $class= shift; - my ($path,$program) = @_; + my ($path,$program,$dep_folder) = @_; my $self= bless { path => $path, - program => $program + program => $program, + dep_folder => $dep_folder }, $class; return $self; @@ -44,8 +45,9 @@ sub parse { close $fh; # this includes fors on one line - my @lines = split('\n|;\s*(?=for)', $script_lines); + my @lines = split('\n|[;,]\s*(?=for)', $script_lines); + my @forlines_idx_set; foreach my $i (0..$#lines){ my $line = $lines[$i]; @@ -54,8 +56,8 @@ sub parse { } } # ============================================================== - # complain if the size of for loops is more than three or - # if they are not consecutive. We do not allow it in clusterjob. + # complain if for loops are not + # consecutive. We do not allow it in clusterjob. # ============================================================== &CJ::err(" 'parrun' does not allow less than 1 parallel loops inside the MAIN script.") if($#forlines_idx_set+1 < 1); @@ -269,7 +271,7 @@ sub findIdxTagRange push @idx_tags, $idx_tag; # This will keep order. if(defined($range)){ - $ranges->{$idx_tag} = $range; + $ranges->{$idx_tag} = $range; }else{ push @tags_to_matlab_interpret, $idx_tag; push @forlines_to_matlab_interpret, $this_forline; @@ -289,6 +291,7 @@ sub findIdxTagRange } } + return (\@idx_tags,$ranges); } @@ -341,22 +344,20 @@ sub read_matlab_index_set #extract the range my @this_array = split(/\s*=\s*/,$this_line); - my $numbers; if($this_array[1] =~ /\[\s*([^:]+?)\s*\]/){ ($numbers) = $this_array[1] =~ /\[\s*(.+?)\s*\]/; - my $floating_pattern = "[-+]?[0-9]*[\.]?[0-9]+(?:[eE][-+]?[0-9]+)?"; - my $fractional_pattern = "(?:${floating_pattern}\/)?${floating_pattern}"; + my $float_pattern = "[-+]?[0-9]*[\.]?[0-9]+(?:[eE][-+]?[0-9]+)?"; + my $power_pattern = "(?:${float_pattern}[\\^])?${float_pattern}"; + my $fractional_pattern = "(?:${power_pattern}\/)?${power_pattern}"; my @vals = $numbers =~ /[\;\,]?($fractional_pattern)[\;\,]?/g; - + my $high = 1+$#vals; my @range = ($low..$high); $range = join(',',@range); } - - }elsif($rightarray[1] =~ /\s*(\D+)\s*/) { #print "$rightarray[1]"."\n"; # CASE i = 1:L @@ -409,16 +410,63 @@ sub read_matlab_index_set sub run_matlab_index_interpreter{ my $self = shift; my ($TOP,$tag_list,$for_lines,$verbose) = @_; + + &CJ::message("Invoking MATLAB to find range of indices. Please be patient..."); + # Check that the local machine has MATLAB (we currently build package locally!) + # Open matlab and eval + +my $test_name= "/tmp/CJ_matlab_test"; +my $test_file = "\'$test_name\'"; + +my $matlab_check_script = <$junk; +CHECK_BASH + + + +&CJ::message("Checking command 'matlab' is available...",1); + +CJ::my_system("source ~/.bash_profile; source ~/.bashrc; printf '%s' $matlab_check_bash",$verbose); # this will generate a file test_file + +eval{ + my $check = &CJ::readFile($test_name); # this causes error if there is no file which indicates matlab were not found. + #print $check . "\n"; +}; +if($@){ + #print $@ . "\n"; +&CJ::err("CJ requires 'matlab' but it cannot access it. Consider adding alias 'matlab' in your ~/.bashrc or ~/.bash_profile"); +}else{ +&CJ::message("matlab available.",1); +}; + + + # + # my $check_matlab_installed = `source ~/.bashrc ; source ~/.profile; source ~/.bash_profile; command -v matlab`; + # if($check_matlab_installed eq ""){ + # &CJ::err("I require matlab but it's not installed: The following check command returned null. \n `source ~/.bashrc ; source ~/.profile; command -v matlab`"); + # }else{ + # &CJ::message("Test passed, Matlab is installed on your machine."); + # } + # # build a script from top to output the range of index @@ -433,7 +481,7 @@ foreach my $i (0..$#{$for_lines}){ # print "$tag: $forline\n"; - my $tag_file = "\'/tmp/$tag\.tmp\'"; + my $tag_file = "\'/tmp/$tag\.tmp\'"; $matlab_interpreter_script .=<{path}/$name",$matlab_interpreter_script); +#FIXME if this is not successful and doesnt give index.tmp, we need to issue error. my $matlab_interpreter_bash = </tmp/matlab.output # dump matlab output +cd $self->{'path'} +matlab -nodisplay -nodesktop -nosplash <$junk; +addpath('$self->{path}/$self->{dep_folder}'); +run('$self->{path}/$name') +HERE BASH + #my $bash_name = "CJ_matlab_interpreter_bash.sh"; #my $bash_path = "/tmp"; #&CJ::writeFile("$bash_path/$bash_name",$matlab_interpreter_bash); #&CJ::message("$bash_name is built in $bash_path"); -&CJ::message("Invoking matlab to find range of indecies. Please be patient..."); -&CJ::my_system("echo $matlab_interpreter_bash", $verbose); -&CJ::message("Closing Matlab session!"); + + +&CJ::message("finding range of indices...",1); +CJ::my_system("source ~/.bash_profile; source ~/.bashrc; printf '%s' $matlab_interpreter_bash",$verbose); +&CJ::message("Closing Matlab session!",1); + # Read the files, and put it into $numbers # open a hashref @@ -477,19 +529,17 @@ foreach my $tag (@$tag_list){ my $tmp_array = &CJ::readFile("$tag_file"); my @tmp_array = split /\n/,$tmp_array; $range->{$tag} = join(',', @tmp_array); - #print $range->{$tag} . "\n"; -} - return $range; - + # print $range->{$tag} . "\n"; + &CJ::my_system("rm -f $tag_file", $verbose) ; #clean /tmp } +# remove the files you made in /tmp +&CJ::my_system("rm -f $test_name $junk $check_path/$check_name $self->{path}/$name"); - - - - - + return $range; + +} @@ -500,6 +550,7 @@ sub uncomment_matlab_line{ my ($line) = @_; $line =~ s/^(?:(?!\').)*\K\%(.*)//; + return $line; } @@ -511,146 +562,184 @@ sub uncomment_matlab_line{ -sub make_MAT_collect_script -{ - my $self = shift; - -my ($res_filename, $completed_filename, $bqs) = @_; - -my $collect_filename = "collect_list.txt"; +######################## +sub CJrun_body_script{ +######################## + my $self = shift; + my ($ssh) = @_; -my $matlab_collect_script=<{'mat'}; +my $script =<<'BASH'; - \% delete the line from remaining_filename and add it to collected. - \%fid = fopen('$completed_filename', 'r') ; \% Open source file. - \%fgetl(fid) ; \% Read/discard line. - \%buffer = fread(fid, Inf) ; \% Read rest of the file. - \%fclose(fid); - \%delete('$completed_filename'); \% delete the file - \%fid = fopen('$completed_filename', 'w') ; \% Open destination file. - \%fwrite(fid, buffer) ; \% Save to file. - \%fclose(fid) ; +module load +unset _JAVA_OPTIONS +matlab -nosplash -nodisplay < +my $script =<<'BASH'; - if(~exist('$collect_filename','file')); - fid = fopen('$collect_filename', 'a+'); - fprintf ( fid, '%d\\n', completed_list(1) ); - fclose(fid); - end +module load +unset _JAVA_OPTIONS +matlab -nosplash -nodisplay < + +% make sure each run has different random number stream +myversion = version; +mydate = date; +RandStream.setGlobalStream(RandStream('mt19937ar','seed', sum(100*clock))); +globalStream = RandStream.getGlobalStream; +CJsavedState = globalStream.State; +fname = sprintf('CJrandState.mat'); +save(fname,'myversion','mydate', 'CJsavedState'); +cd $DIR +run('${PROGRAM}'); +quit; +HERE - percent_done = 1/length(completed_list) * 100; - fprintf('\\n SubPackage %d Collected (%3.2f%%)', completed_list(1), percent_done ); +BASH +my $pathText.=<{matlib} -begin + +% generate recursive path +addpath(genpath('.')); + +try +cvx_setup; +cvx_quiet(true) +% Find and add Sedumi Path for machines that have CVX installed +cvx_path = which('cvx_setup.m'); +oldpath = textscan( cvx_path, '%s', 'Delimiter', '/'); +newpath = horzcat(oldpath{:}); +sedumi_path = [sprintf('%s/', newpath{1:end-1}) 'sedumi']; +addpath(sedumi_path) + +catch +warning('CVX not enabled. Please set CVX path in .ssh_config if you need CVX for your jobs'); end -flds = fields(res); +MATLAB + +$script =~ s||$pathText|; +$script =~ s||$ssh->{mat}|; -for idx = start:length(completed_list) - count = completed_list(idx); - newres = load([num2str(count),'/$res_filename']); + return $script; - for i = 1:length(flds) \% for all variables - res.(flds{i}) = CJ_reduce( res.(flds{i}) , newres.(flds{i}) ); - end +} -\% save after each packgae -save('$res_filename','-struct', 'res'); -percent_done = idx/length(completed_list) * 100; +########################## +sub CJrun_par_body_script{ +########################## + my $self = shift; + my ($ssh) = @_; -\% delete the line from remaining_filename and add it to collected. -\%fid = fopen('$completed_filename', 'r') ; \% Open source file. -\%fgetl(fid) ; \% Read/discard line. -\%buffer = fread(fid, Inf) ; \% Read rest of the file. -\%fclose(fid); -\%delete('$completed_filename'); \% delete the file -\%fid = fopen('$completed_filename', 'w') ; \% Open destination file. -\%fwrite(fid, buffer) ; \% Save to file. -\%fclose(fid) ; +&CJ::err("Matlab module not defined in ssh_config file.") if not defined $ssh->{'mat'}; -if(~exist('$collect_filename','file')); - error(' CJerr::File $collect_filename is missing. CJ stands in AWE!'); -end +my $script =<<'BASH'; + +module load +unset _JAVA_OPTIONS +matlab -nosplash -nodisplay < + + +% add path for parrun +oldpath = textscan('$DIR', '%s', 'Delimiter', '/'); +newpath = horzcat(oldpath{:}); +bin_path = sprintf('%s/', newpath{1:end-1}); +addpath(genpath(bin_path)); + + +% make sure each run has different random number stream +myversion = version; +mydate = date; +% To get different Randstate for different jobs +rng(${COUNTER}) +seed = sum(100*clock) + randi(10^6); +RandStream.setGlobalStream(RandStream('mt19937ar','seed', seed)); +globalStream = RandStream.getGlobalStream; +CJsavedState = globalStream.State; +fname = sprintf('CJrandState.mat'); +save(fname,'myversion', 'mydate', 'CJsavedState'); +cd $DIR +run('${PROGRAM}'); +quit; +HERE -fid = fopen('$collect_filename', 'a+'); -fprintf ( fid, '%d\\n', count ); -fclose(fid); +BASH -fprintf('\\n SubPackage %d Collected (%3.2f%%)', count, percent_done ); -end + + +my $pathText.=<{matlib} -begin +% generate recursive path +addpath(genpath('.')); + +try +cvx_setup; +cvx_quiet(true) +% Find and add Sedumi Path for machines that have CVX installed +cvx_path = which('cvx_setup.m'); +oldpath = textscan( cvx_path, '%s', 'Delimiter', '/'); +newpath = horzcat(oldpath{:}); +sedumi_path = [sprintf('%s/', newpath{1:end-1}) 'sedumi']; +addpath(sedumi_path) + +catch +warning('CVX not enabled. Please set CVX path in .ssh_config if you need CVX for your jobs'); end MATLAB + + +$script =~ s||$pathText|; +$script =~ s||$ssh->{mat}|; + + + return $script; +} -my $HEADER= &CJ::bash_header($bqs); -my $script; -if($bqs eq "SGE"){ -$script=< $path, + program => $program, + dep_folder => $dep_folder + }, $class; + + return $self; +} + + +##################### +sub parse { +##################### + my $self = shift; + + # script lines will have blank lines or comment lines removed; + # ie., all remaining lines are effective codes + # that actually do something. + my @CJbang; + my $script_lines; + open my $fh, "$self->{path}/$self->{program}" or CJ::err("Couldn't open file: $!"); + while(<$fh>){ + + #if line starts with CJbang, keep them in CJbang! + + if($_ =~ /^\#CJ\s*(.*)$/){ + push @CJbang, $1; + }else{ + + $_ = $self->uncomment_python_line($_); + if (!/^\s*$/){ + $script_lines .= $_; + } + } + } + close $fh; + + # this includes fors on one line + my @lines = split('\n|[;,]\s*(?=for)', $script_lines); + + + my @forlines_idx_set; + foreach my $i (0..$#lines){ + my $line = $lines[$i]; + if ($line =~ /^[\t\s]*(for.*)/ ){ + push @forlines_idx_set, $i; + } + } + + # ============================================================== + # complain if for loops are not + # consecutive. We do not allow it in clusterjob. + # ============================================================== + &CJ::err(" 'parrun' does not allow less than 1 parallel loops inside the MAIN script.") if ($#forlines_idx_set+1 < 1); + + foreach my $i (0..$#forlines_idx_set-1){ + &CJ::err("CJ does not allow anything between the parallel for's. try rewriting your loops.") if($forlines_idx_set[$i+1] ne $forlines_idx_set[$i]+1); + } + + + my $TOP; + my $FOR; + my $BOT; + + foreach my $i (0..$forlines_idx_set[0]-1){ + $TOP .= "$lines[$i]\n"; + } + foreach my $i ($forlines_idx_set[0]..$forlines_idx_set[0]+$#forlines_idx_set){ + $FOR .= "$lines[$i]\n"; + } + foreach my $i ($forlines_idx_set[0]+$#forlines_idx_set+1..$#lines){ + $BOT .= "$lines[$i]\n"; + } + + + my $parser ={}; + $parser->{TOP} = $TOP; + $parser->{FOR} = $FOR; + $parser->{BOT} = $BOT; + $parser->{nloop} = $#forlines_idx_set+1; + $parser->{CJbang} = \@CJbang; + + + return $parser; + +} + + + + +################################## +sub build_reproducible_script{ +################################## +my $self = shift; +my ($runflag) = @_; + +my $program_script = CJ::readFile("$self->{path}/$self->{program}"); + +my $rp_program_script =<<'RP_PRGRAM'; + +# CJ has its own randState upon calling +# to reproduce results one needs to set +# the internal State of the global stream +# to the one saved when ruuning the code for +# the fist time; +import os,sys,pickle,numpy,random; +CJsavedState = pickle.load(open('CJrandState.pickle','rb')); +numpy.random.set_state(CJsavedState['numpy_CJsavedState']); +random.setstate(CJsavedState['CJsavedState']); + +RP_PRGRAM + + +if($runflag =~ /^par.*/){ + $rp_program_script .= "sys.path.append('../.');\n" +}else{ + $rp_program_script .= "sys.path.append('.');\n" +} + +$rp_program_script .= $program_script ; + +my $rp_program = "reproduce_$self->{program}"; +CJ::writeFile("$self->{path}/$rp_program", $rp_program_script); + +} + + + +################################### +sub getPIDJobCountExpr{ +# This is used only for +# CJrun_body_script +# and CJrun_par_body_script +################################### + my ($ssh) = @_; + + my $WordCountExpr; + if($ssh->{'bqs'} =~ /^SGE$/i ){ + $WordCountExpr = "qstat -xml | tr \'\n\' \' \' | sed \'s#]*>#\\\n#g\' | sed \'s#<[^>]*>##g\' | grep \" \" | column -t | grep -c \${PID}"; + }elsif($ssh->{'bqs'} =~ /^SLURM$/i){ + $WordCountExpr = 'sacct -n --format=jobname%44 | grep -v "^[0-9]*\\." | grep -c ${PID}'; + }else{ + &CJ::err("Unknown batch queueing system."); + } + + return $WordCountExpr; + +} + +####################### +sub CJrun_body_script{ +####################### + my $self = shift; + my ($ssh) = @_; + + +#my $WordCountExpr = getPIDJobCountExpr($ssh); + +my $script =<<'BASH'; + +# activate python venv +source activate + +python < ${DIR}/${PID}_py_conda_req.txt + +# Get out of virtual env and remove it +source deactivate + +BASH + +my $venv_name = "CJ_python_venv"; + +$script =~ s||$venv_name|; + +return $script; + +} + + + + + + + +########################## +sub CJrun_par_body_script{ +########################## + + my $self = shift; + my ($ssh) = @_; + + #my $WordCountExpr = getPIDJobCountExpr($ssh); + + # Determine easy_install version + my $python_version_tag = ""; + &CJ::err("python module not defined in ssh_config file.") if not defined $ssh->{'py'}; + + if( $ssh->{'py'} =~ /python\D?((\d.\d).\d)/i ) { + $python_version_tag = "-".$2; + }elsif( $ssh->{'py'} =~ /python\D?(\d.\d)/i ){ + $python_version_tag = "-".$1; + }else{ + CJ::err("Cannot decipher pythonX.Y.Z version"); + } + + my $user_required_pyLib = join (" ", split(":",$ssh->{'pylib'}) ); + +my $script =<<'BASH'; + +# activate python venv +source activate + +python <|$venv_name|; + + + + +return $script; +} + + + +################################ +sub read_python_array_values{ +################################ + my $self = shift; + my ($string) = @_; + + my $floating_pattern = "[-+]?[0-9]*[\.]?[0-9]+(?:[eE][-+]?[0-9]+)?"; + my $fractional_pattern = "(?:${floating_pattern}\/)?${floating_pattern}"; + my @vals = undef; + + if($string =~ /(.*array\(\[)?\s*($fractional_pattern)+\s*(\]\))?/){ + my ($numbers) = $string =~ /(?:.*array\(\[)?\s*(.+)\s*(?:\]\))?/; + @vals = $numbers =~ /[\;\,]?($fractional_pattern)[\;\,]?/g; + return \@vals; + }else{ + return undef; + } +} + + +############################################################# +# This function is used for parsing the content of _for_ line +# low and high limits of the loop +sub read_python_lohi{ +############################################################# + my $self = shift; + my ($input,$TOP) = @_; + + my $lohi = undef; + + if( &CJ::isnumeric($input) ) { + $lohi = $input; + + }elsif ($input =~ /\s*len\(\s*(.+)\s*\)/) { + my $this_line = &CJ::grep_var_line($1,$TOP); + + #extract the range + my @this_array = split(/\s*=\s*/,$this_line); + + my $vals = $self->read_python_array_values($this_array[1]); # This reads the vals; + $lohi = 1+$#{ $vals } unless not defined($vals); + + }elsif($input =~ /\s*(\D+)\s*:/){ + # CASE var + my $this_line = &CJ::grep_var_line($1,$TOP); + + #extract the range + my @this_array = split(/\s*=\s*/,$this_line); + my $vals = $self->read_python_array_values($this_array[1]); + $lohi = $vals->[0]; # This reads a number; + $lohi = undef if (!&CJ::isnumeric($lohi)); + } + + return $lohi; +} + + + + +########################## +sub read_python_index_set{ +########################## + my $self = shift; + + my ($forline, $TOP, $verbose) = @_; + + chomp($forline); + + + # split at 'in' keyword. + my @myarray = split(/\s*\bin\b\s*/,$forline); + my @tag = split(/\s/,$myarray[0]); + + my $idx_tag = (split(/,/, $tag[-1]))[0]; # to cover -> for i,d in enumerate(V) + + my $range = undef; # This will be defined below + # The right of in keyword + my $right = $myarray[1]; + + + # see if the for line contains range + if($right =~ /\s*x?range\(\s*(.+)\s*\)/){ + + my @rightarray = split( /\s*,\s*/, $1); + + if($#rightarray == 0){ + #CASE i in range(stop); + my $low = 0; + my $high = $self->read_python_lohi($rightarray[0],$TOP); + $range = join(',',($low..$high-1)) if defined($high); + + }elsif($#rightarray == 1){ + #CASE i in range(start,stop); + my $low = $self->read_python_lohi($rightarray[0],$TOP); + my $high = $self->read_python_lohi($rightarray[1],$TOP); + $range = join(',',($low..$high-1)) if defined($high); + + }elsif($#rightarray == 2){ + #CASE i in range(start,stop, step); + my $low = $self->read_python_lohi($rightarray[0],$TOP); + my $high = $self->read_python_lohi($rightarray[1],$TOP); + my $step = $self->read_python_lohi($rightarray[2],$TOP); + + if( defined($low) && defined($high) && defined($step)){ + + my @range; + for (my $i = $low; $i < $high; $i += $step) { + push @range, $i; + } + $range = join(',',@range); + } + }else{ + &CJ::err("invalid argument to range(start, stop[, step]). $!"); + } + + }elsif($right =~ /^\s*(\w+)\s*:$/){ + print "Its here $right\n"; + #CASE: for i in array; + print $1 . "\n"; + my $this_line = &CJ::grep_var_line($1,$TOP); + #extract the range + my @this_array = split(/\s*=\s*/,$this_line); + my $range = $self->read_python_array_values($this_array[1]); + my @range = @{$range}; + $range = join(',',@range); + }else{ + + $range = undef; + #&CJ::err("strcuture of for loop not recognized by clusterjob. try rewriting your for loop using 'i = 1:10' structure"); + + } + return ($idx_tag, $range); +} + + + + +################################## +sub run_python_index_interpreter{ +################################## +my $self = shift; +my ($TOP,$tag_list,$for_lines,$verbose) = @_; + +&CJ::message("Invoking Python to find range of indices. Please be patient..."); + + +# Check that the local machine has Python (we currently build package locally!) +# Open python and eval + +my $test_name= "/tmp/CJ_python_test"; +my $test_file = "\'$test_name\'"; + +my $python_check_script = <$junk; +CHECK_BASH + + + +&CJ::message("Checking command 'python' is available...",1); + +CJ::my_system("source ~/.bash_profile; source ~/.bashrc; printf '%s' $python_check_bash",$verbose); # this will generate a file test_file + +eval{ +my $check = &CJ::readFile($test_name); # this causes error if there is no file which indicates Python were not found. + #print $check . "\n"; +}; + +if($@){ +#print $@ . "\n"; +&CJ::err("CJ requires 'python' but it cannot access it. Check 'python' command."); +}else{ +&CJ::message("python available.",1); +}; + + +# build a script from top to output the range of index + +# Add top +my $python_interpreter_script=$TOP; + + +# Add for lines +foreach my $i (0..$#{$for_lines}){ +my $tag = $tag_list->[$i]; +my $forline = $for_lines->[$i]; +chomp($forline); + +my ($level) = $forline =~ m/^(\s*).+/ ; # determin our level of indentation + + + +$forline = &CJ::remove_white_space($forline); +# print "$tag: $forline\n"; + + +my @top_lines = split /^/, $TOP; + my $last_top_line = $top_lines[$#top_lines]; + +my $tag_file = "\'/tmp/$tag\.tmp\'"; + +$python_interpreter_script .= "${level}pass" if ( $i==0 && $last_top_line =~ /^[^:]*:\s*$/ ); + +$python_interpreter_script .=<{path}/$name.py",$python_interpreter_script); +#&CJ::message("$name is built in $path",1); + + +my $python_interpreter_bash = <{'path'} +python -B <$junk; +import sys; +sys.path.append('$self->{path}/$self->{dep_folder}'); +import $name +HERE +BASH + + +&CJ::message("finding range of indices...",1); + +CJ::my_system("source ~/.bash_profile; source ~/.profile; source ~/.bashrc; printf '%s' $python_interpreter_bash",$verbose); + +&CJ::message("Closing Python session!",1); + +# Read the files, and put it into $numbers +# open a hashref +my $range={}; +foreach my $tag (@$tag_list){ +my $tag_file = "/tmp/$tag\.tmp"; +my $tmp_array = &CJ::readFile("$tag_file"); +my @tmp_array = split /\n/,$tmp_array; +$range->{$tag} = join(',', @tmp_array); +# print $range->{$tag} . "\n"; +&CJ::my_system("rm -f $tag_file", $verbose) ; #clean /tmp +} + + + + + +# remove the files you made in /tmp +&CJ::my_system("rm -f $test_name $junk $check_path/$check_name $self->{path}/$name.py"); + + +return $range; + +} + + +##################### +sub findIdxTagRange{ +##################### + + my $self = shift; + my ($parser,$verbose) = @_; + + my $FOR = $parser->{FOR}; + my $TOP = $parser->{TOP}; + + # Determine the tags and ranges of the + # indecies + my @idx_tags; + my $ranges={}; # This is a hashref $range->{tag} + my @tags_to_python_interpret; + my @forlines_to_python_interpret; + + + my @forline_list = split /^/, $FOR; + + for my $this_forline (@forline_list) { + + my ($idx_tag, $range) = $self->read_python_index_set($this_forline, $TOP,$verbose); + + + #print $idx_tag; + #FIX + + CJ::err("Index tag cannot be established for $this_forline") unless ($idx_tag); + push @idx_tags, $idx_tag; # This will keep order. + + + + + + if(defined($range)){ + $ranges->{$idx_tag} = $range; + }else{ + push @tags_to_python_interpret, $idx_tag; + push @forlines_to_python_interpret, $this_forline; + } + + } + + + + + + + + if ( @tags_to_python_interpret ) { + + # if we need to run python + my $range_run_interpret = $self->run_python_index_interpreter($TOP,\@tags_to_python_interpret,\@forlines_to_python_interpret, $verbose); + + + for (keys %$range_run_interpret){ + $ranges->{$_} = $range_run_interpret->{$_}; + #print"$_:$range_run_interpret->{$_} \n"; + } + } + + + + return (\@idx_tags,$ranges); +} + + + + +############################ +sub uncomment_python_line{ +############################ + my $self = shift; + my ($line) = @_; + # This uncomments useless comment lines. + $line =~ s/^(?:(?![\"|\']).)*\K\#(.*)//; + return $line; +} + + + +############################# +sub buildParallelizedScript{ +############################# +my $self = shift; +my ($TOP,$FOR,$BOT,@tag_idx) = @_; + +my @str; +while(@tag_idx){ + my $tag = shift @tag_idx; + my $idx = shift @tag_idx; + push @str , " $tag != $idx "; +} + +my $str = join('or',@str); + +my $INSERT = "if ($str): continue;"; +my @BOT_lines = split /^/, $BOT; +my ($level) = $BOT_lines[0] =~ m/^(\s*).+/ ; # determin our level of indentation + +my $new_script = "$TOP\n$FOR\n$level$INSERT\n$BOT"; + +# if there is #CJ -s directive do the substitute +# This is good for including remote data for parrun +$new_script = $self->_CJbang_substitute($new_script); + +undef $INSERT; +return $new_script; + +} + + + +####################### +sub _CJbang_substitute{ +####################### + my $self = shift; + my ($script) = @_; + + my @CJbang=$self->get_CJbang(); + + foreach my $bang (@CJbang){ + # subs + if($bang =~ m/^-s\s*(.*)/){ + my @tmp = split(/\s/, $1); + &CJ::err('I expected 2 inputs but got 1 in #CJ -s directive.') if ($#tmp < 1 ); + my $first = shift @tmp; + my $second = shift @tmp; + eval{$script =~ s/$first/$second/g;}; + &CJ::err('$bang generated invalid regexp $sub') if $@; + }else{ + CJ::err("I don't recognize option '#CJ $bang'."); + } + + } + + + return $script; + +} + + + + + + +##################### +sub get_CJbang { +##################### + my $self = shift; + + my @CJbang; + open my $fh, "$self->{path}/$self->{program}" or CJ::err("Couldn't open file: $!"); + while(<$fh>){ + + #if line starts with CJbang, keep them in CJbang! + + if($_ =~ /^\#CJ\s*(.*)$/){ + push @CJbang, $1; + } + } +close $fh; + + +return @CJbang; + +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +############################## UP TO HERE EDITED FOR PY ##################### + + + + + +########################## +sub check_initialization{ +########################## + my $self = shift; + + my ($parser,$tag_list,$verbose) = @_; + + my $BOT = $parser->{BOT}; + my $TOP = $parser->{TOP}; + + + + my @BOT_lines = split /\n/, $BOT; + + + my @pattern; + foreach my $tag (@$tag_list){ + # grep the line that has this tag as argument + push @pattern, "\\(.*\\b$tag\\b\.*\\)\|\\{.*\\b$tag\\b\.*\\}"; + } + my $pattern = join("\|", @pattern); + + my @vars; + foreach my $line (@BOT_lines) { + + if($line =~ /(.*)(${pattern})\s*\={1}/){ + my @tmp = split "\\(|\\{", $line; + my $var = $tmp[0]; + #print "$line\n${pattern}: $var\n"; + $var =~ s/^\s+|\s+$//g; + push @vars, $var; + } + } + + foreach(@vars) + { + my $line = &CJ::grep_var_line($_,$TOP); + } + +} + + + + +1; diff --git a/src/CJ/R.pm b/src/CJ/R.pm index ba8de0d..399b120 100644 --- a/src/CJ/R.pm +++ b/src/CJ/R.pm @@ -13,11 +13,12 @@ use feature 'say'; # class constructor sub new { my $class = shift; - my ($path,$program) = @_; + my ($path,$program,$dep_folder) = @_; my $self= bless { path => $path, - program => $program + program => $program, + dep_folder=>$dep_folder }, $class; return $self; diff --git a/src/CJ/Run.pm b/src/CJ/Run.pm new file mode 100644 index 0000000..87e5d27 --- /dev/null +++ b/src/CJ/Run.pm @@ -0,0 +1,789 @@ +package CJ::Run; +# This is the Matlab class of CJ +# Copyright 2015 Hatef Monajemi (monajemi@stanford.edu) + +use strict; +use warnings; +use CJ; +use CJ::CJVars; +use Data::Dumper; +use feature 'say'; +use Digest::SHA qw(sha1_hex); # generate hexa-decimal SHA1 PID + + + +#################### +# class constructor +sub new { +#################### + my $class= shift; + my ($path,$program,$machine, $runflag,$dep_folder,$message,$qsub_extra, $qSubmitDefault, $submit_defaults, $verbose) = @_; + + my $self = bless { + path => $path, + program => $program, + machine => $machine, + runflag => $runflag, + dep_folder => $dep_folder, + qsub_extra => $qsub_extra, + verbose => $verbose, + qSubmitDefault => $qSubmitDefault, + submit_defaults => $submit_defaults, + message => $message + }, $class; + return $self; +} + + + + + +########################################### +# This should be called at the beginning of +# run for all run options. Common to all +sub run_common{ +########################################### + my ($self) = @_; + +# Check connection +&CJ::CheckConnection($self->{machine}); + +# CREATE PID +my $ssh = &CJ::host($self->{machine}); +my $date = &CJ::date(); + +# PID +my $sha_expr = "$CJID:$localIP:$self->{program}:$ssh->{account}:$date->{datestr}"; +my $pid = sha1_hex("$sha_expr"); +my $short_pid = &CJ::short_pid($pid); # we use an 8 character abbrviation + + +# Check to see if the file and dep folder exists +&CJ::err("$self->{path}/$self->{program} not found") if(! -e "$self->{path}/$self->{program}" ); +if(defined($self->{dep_folder})){ +&CJ::err("Dependency folder $self->{path}/$self->{dep_folder} not found") if(! -d "$self->{path}/$self->{dep_folder}" ); +} + +#======================================= +# BUILD DOCSTRING +# WE NAME THE REMOTE FOLDERS +# BY PROGRAM AND PID +# EXAMPLE : MaxEnt/20dd3203e29ec29... +#======================================= + +my ($program_name,$ext) = &CJ::remove_extension($self->{program}); +my $program_type = CJ::program_type($self->{program}); + +CJ::message("$self->{runflag}"."ing [$self->{program}] on [$self->{machine}]"); +&CJ::message("Sending from: $self->{path}"); + + + +my $localDir = "$localPrefix/"."$program_name"; +my $local_sep_Dir = "$localDir/" . "$pid" ; +my $saveDir = "$savePrefix"."$program_name"; + + +#==================================== +# CREATE LOCAL DIRECTORIES +#==================================== +# create local directories +if(-d $localPrefix){ + +mkdir "$localDir" unless (-d $localDir); +mkdir "$local_sep_Dir" unless (-d $local_sep_Dir); + +}else{ +# create local Prefix +mkdir "$localPrefix"; +mkdir "$localDir" unless (-d $localDir); +mkdir "$local_sep_Dir" unless (-d $local_sep_Dir); +} + + +# cp code +my $cmd = "cp $self->{path}/$self->{program} $local_sep_Dir/"; +&CJ::my_system($cmd,$self->{verbose}); +# cp dependencies +$cmd = "cp -r $self->{dep_folder}/* $local_sep_Dir/" unless not defined($self->{dep_folder}); +&CJ::my_system($cmd,$self->{verbose}); + + + +#===================== +# REMOTE DIRECTORIES +#===================== +my $remoteDir = "$ssh->{remote_repo}/"."$program_name"; +my $remote_sep_Dir = "$remoteDir/" . "$pid" ; + +# for creating remote directory +my $outText; +if($ssh->{bqs} eq "SLURM"){ +$outText=<{remote_repo}" ]; then +mkdir $ssh->{remote_repo} +fi +mkdir $remoteDir +TEXT +}elsif($ssh->{bqs} eq "SGE"){ +$outText=<{remote_repo}" ]; then +mkdir $ssh->{remote_repo} +fi +mkdir $remoteDir +TEXT +}else{ +&CJ::err("unknown BQS"); +} + +return ($date,$ssh,$pid,$short_pid,$program_type,$localDir,$local_sep_Dir,$remoteDir,$remote_sep_Dir,$saveDir,$outText); +} + + + + + + + + + + + +######################################################### +# clusterjob run myscript.m -dep DEP -m "message" +# Serial run +sub SERIAL_DEPLOY_RUN{ +######################################################### + +my $self = shift; + +# create directories etc. +my ($date,$ssh,$pid,$short_pid,$program_type,$localDir,$local_sep_Dir,$remoteDir,$remote_sep_Dir,$saveDir,$outText) = run_common($self); + + +# for python only; check conda exists on the cluster and setup env +$self->setup_conda_venv($pid,$ssh) if($program_type eq 'python'); + + + + +&CJ::message("Creating reproducible script(s) reproduce_$self->{program}"); +my $codeobj = &CJ::CodeObj($local_sep_Dir,$self->{program},$self->{dep_folder}); + + +$codeobj->build_reproducible_script($self->{runflag}); + +#=========================================== +# BUILD A BASH WRAPPER +#=========================================== +my $sh_script = &CJ::Scripts::make_shell_script($ssh,$self->{program},$pid,$ssh->{bqs}, $remote_sep_Dir); +my $local_sh_path = "$local_sep_Dir/bashMain.sh"; +&CJ::writeFile($local_sh_path, $sh_script); + +# Build master-script for submission +my $master_script; + +$master_script = &CJ::Scripts::make_master_script($master_script,$self->{runflag},$self->{program},$date,$pid,$ssh,$self->{submit_defaults},$self->{qSubmitDefault},$remote_sep_Dir,$self->{qsub_extra}); + + +my $local_master_path="$local_sep_Dir/master.sh"; +&CJ::writeFile($local_master_path, $master_script); + + + + + +#============================================== +# PROPAGATE THE FILES AND RUN ON CLUSTER +#============================================== +&CJ::message("Compressing files to propagate..."); + +my $tarfile="$pid".".tar.gz"; +my $cmd="cd $localDir; tar --exclude '.git' --exclude '*~' --exclude '*.pdf' -czf $tarfile $pid/ ; rm -rf $local_sep_Dir ; cd $self->{path}"; +&CJ::my_system($cmd,$self->{verbose}); + +# create remote directory using outText +$cmd = "ssh $ssh->{account} 'echo `$outText` ' "; +&CJ::my_system($cmd,$self->{verbose}); + + + + +&CJ::message("Sending package \033[32m$short_pid\033[0m"); +# copy tar.gz file to remoteDir +$cmd = "rsync -avz ${localDir}/${tarfile} $ssh->{account}:$remoteDir/"; +&CJ::my_system($cmd,$self->{verbose}); + +&CJ::message("Extracting package..."); +$cmd = "ssh $ssh->{account} 'source ~/.bashrc; cd $remoteDir; tar -xzf ${tarfile} --exclude=\"._*\";exit 0'"; +&CJ::my_system($cmd,$self->{verbose}); + + +$self->{runflag} eq "deploy" ? &CJ::message("Deployed.") : &CJ::message("Submitting job..."); +$cmd = "ssh $ssh->{account} 'source ~/.bashrc && cd $remoteDir/${pid} && bash -l master.sh > $remote_sep_Dir/qsub.info && sleep 3'"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "deploy"); + + + +# bring the log file +my $qsubfilepath="$remote_sep_Dir/qsub.info"; +$cmd = "rsync -avz $ssh->{account}:$qsubfilepath $info_dir"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "deploy"); + + + + + + +my $job_id=""; +if($self->{runflag} eq "run"){ + # read run info + my $local_qsub_info_file = "$info_dir/"."qsub.info"; + my ($job_ids,$errors) = &CJ::read_qsub($local_qsub_info_file); + + $self->_checkSubmitSuccess($job_ids,$ssh,$local_sep_Dir,$remote_sep_Dir,$errors); + + $job_id = $job_ids->[0]; # there is only one in this case + my $numJobs = $#{$job_ids}+1; + CJ::message("$numJobs job(s) submitted ($job_id)"); + + foreach my $error (@{$errors}) { + CJ::warning($error); + } + + #delete the local qsub.info after use + #my $cmd = "rm $local_qsub_info_file"; + #&CJ::my_system($cmd,$self->{verbose}); +}else{ + $job_id =""; +} + + + +my $runinfo={ + pid => ${pid}, + agent => ${AgentID}, + user => ${CJID}, + local_ip => ${localIP}, + local_un => ${localUserName}, + date => ${date}, + machine => $self->{machine}, + account => $ssh->{account}, + local_prefix => ${localPrefix}, + local_path => "${localDir}/${pid}", + remote_prefix => $ssh->{remote_repo}, + remote_path => "${remoteDir}/${pid}", + job_id => $job_id, + bqs => $ssh->{bqs}, + save_prefix => ${savePrefix}, + save_path => "${saveDir}/${pid}", + runflag => $self->{runflag}, + program => $self->{program}, + message => $self->{message}, + submit_defaults => $self->{'submit_defaults'}, + alloc => $self->{'qsub_extra'}, +}; + +# add_record locally +&CJ::add_record($runinfo); +# write runinfo to FireBaee as well +&CJ::write2firebase($pid,$runinfo,$date->{epoch},0); +} + + + +#======================================================== +# clusterjob parrun myscript.m -dep DEP -m "message" +# this implements parrallel for in perl +# so for each grid point, we will have +# one separate job +#======================================================== +sub PAR_DEPLOY_RUN{ +my $self = shift; + +# create directories etc. +my ($date,$ssh,$pid,$short_pid,$program_type,$localDir,$local_sep_Dir,$remoteDir,$remote_sep_Dir,$saveDir,$outText) = run_common($self); + + + +# for python only; check conda exists on the cluster and setup env +$self->setup_conda_venv($pid,$ssh) if($program_type eq 'python'); + + + + + +# read the script, parse it out and +# find the for loops +my $codeobj = &CJ::CodeObj($self->{path},$self->{program},$self->{dep_folder}); +my $parser = $codeobj->parse(); +my ($idx_tags,$ranges) = $codeobj->findIdxTagRange($parser,$self->{verbose}); + + +# Check that number of jobs doesnt exceed Maximum jobs for user on chosen cluster +# later check all resources like mem, etc. +my @keys = keys %$ranges; +my $totalJobs = 1; +foreach my $i (0..$parser->{nloop}-1){ + my @range = split(',', $ranges->{$keys[$i]}); + $totalJobs = (0+@range) * ($totalJobs); +} + + +my $max_jobs = &CJ::max_jobs_allowed($ssh,$self->{qsub_extra}); +&CJ::err("Maximum jobs allowed on $self->{machine} ($max_jobs) exceeded by your request ($totalJobs). Rewrite FOR loops to submit in smaller chunks.") unless ($max_jobs >= $totalJobs); + + +#Check that user has initialized for loop vars +$codeobj->check_initialization($parser,$idx_tags,$self->{verbose}); + +#============================================== +# MASTER SCRIPT +#============================================== + +my $nloops = $parser->{nloop}; +my $counter = 0; # counter gives the total number of jobs submited: (1..$counter) +my $extra={}; +$extra->{TOP}= $parser->{TOP}; +$extra->{FOR}= $parser->{FOR}; +$extra->{BOT}= $parser->{BOT}; +$extra->{local_sep_Dir}= $local_sep_Dir; +$extra->{remote_sep_Dir}= $remote_sep_Dir; +$extra->{runflag}= $self->{runflag}; +$extra->{path} = $self->{path}; #This is directory from which the code is being called +$extra->{program}= $self->{program}; +$extra->{date}= $date; +$extra->{pid}= $pid; +$extra->{bqs}= $ssh->{bqs}; +$extra->{submit_defaults}=$self->{submit_defaults}; +$extra->{qsub_extra}=$self->{qsub_extra}; +$extra->{runtime}=$self->{submit_defaults}->{runtime}; +$extra->{ssh}=$ssh; +$extra->{qSubmitDefault}=$self->{qSubmitDefault}; + +# Recursive loop for arbitrary number of loops. +my $master_script = &CJ::Scripts::build_nloop_master_script($nloops, $idx_tags,$ranges,$extra); + +#=================================== +# write out master_script +#=================================== +my $local_master_path="$local_sep_Dir/master.sh"; +&CJ::writeFile($local_master_path, $master_script); + + +#================================== +# PROPAGATE THE FILES +# AND RUN ON CLUSTER +#================================== +&CJ::message("Compressing files to propagate..."); +my $tarfile="$pid".".tar.gz"; +my $cmd="cd $localDir; tar --exclude '.git' --exclude '*~' --exclude '*.pdf' -czf $tarfile $pid/ ; rm -rf $local_sep_Dir ; cd $self->{path}"; +&CJ::my_system($cmd,$self->{verbose}); + +# create remote directory using outText +$cmd = "ssh $ssh->{account} 'echo `$outText` ' "; +&CJ::my_system($cmd,$self->{verbose}); + +&CJ::message("Sending package \033[32m$short_pid\033[0m"); +# copy tar.gz file to remoteDir +$cmd = "rsync -arvz ${localDir}/${tarfile} $ssh->{account}:$remoteDir/"; +&CJ::my_system($cmd,$self->{verbose}); + + +&CJ::message("Extracting package..."); +$cmd = "ssh $ssh->{account} 'source ~/.bashrc; cd $remoteDir; tar -xzf ${tarfile} --exclude=\"._*\";exit 0'"; +&CJ::my_system($cmd,$self->{verbose}); + + + +$self->{runflag} eq "pardeploy" ? &CJ::message("Deployed.") : &CJ::message("Submitting job(s)"); +my $wait = int($totalJobs/300) + 2 ; # add more wait time for large jobs. +$wait = $wait > 5 ? $wait: 5; +$cmd = "ssh $ssh->{account} 'source ~/.bashrc && cd $remoteDir/${pid} && bash -l master.sh > $remote_sep_Dir/qsub.info && sleep $wait'"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "pardeploy"); + + + +# bring the log file +my $qsubfilepath="$remote_sep_Dir/qsub.info"; +$cmd = "rsync -avz $ssh->{account}:$qsubfilepath $info_dir/"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "pardeploy"); + + + +my $job_ids; +my $job_id; +if($self->{runflag} eq "parrun"){ + # read run info + my $errors; + my $local_qsub_info_file = "$info_dir/"."qsub.info"; + ($job_ids,$errors) = &CJ::read_qsub($local_qsub_info_file); + + + $self->_checkSubmitSuccess($job_ids,$ssh,$local_sep_Dir,$remote_sep_Dir,$errors); + + + $job_id = join(',', @{$job_ids}); + my $numJobs = $#{$job_ids}+1; + + CJ::message("$numJobs job(s) submitted ($job_ids->[0]-$job_ids->[-1])"); + + foreach my $error (@{$errors}) { + CJ::warning($error); + } + + + #delete the local qsub.info after use + #my $cmd = "rm $local_qsub_info_file"; + #&CJ::my_system($cmd,$self->{verbose}); + +}else{ + $job_ids = ""; + $job_id = ""; +} + + + + +my $runinfo={ + pid => ${pid}, + user => ${CJID}, # will be changed to CJusername later + agent => ${AgentID}, + local_ip => ${localIP}, + local_un => ${localUserName}, + date => ${date}, + machine => $self->{machine}, + account => $ssh->{account}, + local_prefix => ${localPrefix}, + local_path => "${localDir}/${pid}", + remote_prefix => $ssh->{remote_repo}, + remote_path => "${remoteDir}/${pid}", + job_id => $job_id, + bqs => $ssh->{bqs}, + save_prefix => ${savePrefix}, + save_path => "${saveDir}/${pid}", + runflag => $self->{runflag}, + program => $self->{program}, + message => $self->{message}, + submit_defaults => $self->{'submit_defaults'}, + alloc => $self->{'qsub_extra'}, +}; + + +&CJ::add_record($runinfo); +&CJ::write2firebase($pid,$runinfo, $date->{epoch},0); # send to CJ server +} + + + + +sub _checkSubmitSuccess{ + + my ($self,$job_ids,$ssh,$local_sep_Dir,$remote_sep_Dir,$errors) = @_; + + # in case we dont get job ID + if( !defined($job_ids->[0]) || $job_ids->[0] =~ m/^\s*$/ ){ + #print "\_$job_ids->[0]\_\n"; + + #delete remote directories + my $local_clean = "$local_sep_Dir\*"; + my $remote_clean = "$remote_sep_Dir\*"; + my $cmd = "rm -rf $local_clean; ssh $ssh->{account} 'rm -rf $remote_clean' " ; + &CJ::my_system($cmd,$self->{verbose}); + foreach my $error (@{$errors}) { + CJ::warning($error); + } + CJ::err('Job submission failed. try running with --v option for more info'); + } + +} + + + + + + + + + + + + + + + +#======================================================== +# clusterjob rrun myscript.m -dep DEP -m "message" +# this implements parrallel for using SLUMR array +# so for each grid point, we will have +# one separate job +# This is very fast in submission as there will be only +# one submission for multiple jobs +# This only works in SLURM. +# +# THIS IS INCOMPLETE. NEED TO ADD +# compatible ARRAY_BASHMAIN and MASTER +#======================================================== +sub SLURM_ARRAY_DEPLOY_RUN{ +my $self = shift; + +# create directories etc. +my ($date,$ssh,$pid,$short_pid,$program_type,$localDir,$local_sep_Dir,$remoteDir,$remote_sep_Dir,$saveDir,$outText) = run_common($self); + + + +&CJ::err("RRUN works for SLURM batch queueing system only. Use parrun instead.") unless ($ssh->{bqs} eq "SLURM"); + + + + # for python only; check conda exists on the cluster and setup env + $self->setup_conda_venv($pid,$ssh) if($program_type eq 'python'); + + + + + + + + + + # read the script, parse it out and + # find the for loops + my $codeobj = &CJ::CodeObj($self->{path},$self->{program},$self->{dep_folder}); + my $parser = $codeobj->parse(); + my ($idx_tags,$ranges) = $codeobj->findIdxTagRange($parser,$self->{verbose}); + + + # Check that number of jobs doesnt exceed Maximum jobs for user on chosen cluster + # later check all resources like mem, etc. + my @keys = keys %$ranges; + my $totalJobs = 1; + foreach my $i (0..$parser->{nloop}-1){ + my @range = split(',', $ranges->{$keys[$i]}); + $totalJobs = (0+@range) * ($totalJobs); + } + + +# find max array size allowed +my $max_arraySize = &CJ::max_slurm_arraySize($ssh); + +#my $max_array_jobs = &CJ::max_jobs_allowed($ssh,$self->{qsub_extra}); + +&CJ::err("Maximum jobs allowed in array mode on $self->{machine} ($max_arraySize) exceeded by your request ($totalJobs). Rewrite FOR loops to submit in smaller chunks.") unless ($max_arraySize >= $totalJobs); + +# Check that user has initialized for loop vars +$codeobj->check_initialization($parser,$idx_tags,$self->{verbose}); + + + +#============================================== +# MASTER SCRIPT +#============================================== + +my $nloops = $parser->{nloop}; +my $counter = 0; # counter gives the total number of jobs submited: (1..$counter) +my $extra={}; +$extra->{TOP}= $parser->{TOP}; +$extra->{FOR}= $parser->{FOR}; +$extra->{BOT}= $parser->{BOT}; +$extra->{local_sep_Dir}= $local_sep_Dir; +$extra->{remote_sep_Dir}= $remote_sep_Dir; +$extra->{runflag}= $self->{runflag}; +$extra->{program}= $self->{program}; +$extra->{date}= $date; +$extra->{pid}= $pid; +$extra->{bqs}= $ssh->{bqs}; +$extra->{submit_defaults}=$self->{submit_defaults}; +$extra->{qsub_extra}=$self->{qsub_extra}; +$extra->{runtime}=$self->{submit_defaults}->{runtime}; +$extra->{ssh}=$ssh; +$extra->{qSubmitDefault}=$self->{qSubmitDefault}; +$extra->{totalJobs}=$totalJobs; +# Recursive loop for arbitrary number of loops. +my $master_script = &CJ::Scripts::build_rrun_master_script($nloops, $idx_tags,$ranges,$extra); +#print $master_script . "\n"; + +#=================================== +# write out master_script +#=================================== +my $local_master_path="$local_sep_Dir/master.sh"; +&CJ::writeFile($local_master_path, $master_script); + +#=================================== +# write out array_bashMain +#=================================== +my $array_bashMain_script = &CJ::Scripts::build_rrun_bashMain_script($extra); +&CJ::writeFile("$local_sep_Dir/array_bashMain.sh", $array_bashMain_script); +#================================== +# PROPAGATE THE FILES +# AND RUN ON CLUSTER +#================================== +&CJ::message("Compressing files to propagate..."); + +my $tarfile="$pid".".tar.gz"; +my $cmd="cd $localDir; tar --exclude '.git' --exclude '*~' --exclude '*.pdf' -czf $tarfile $pid/ ; rm -rf $local_sep_Dir ; cd $self->{path}"; +&CJ::my_system($cmd,$self->{verbose}); + +# create remote directory using outText +$cmd = "ssh $ssh->{account} 'echo `$outText` ' "; +&CJ::my_system($cmd,$self->{verbose}); + +&CJ::message("Sending package \033[32m$short_pid\033[0m"); +# copy tar.gz file to remoteDir +$cmd = "rsync -arvz ${localDir}/${tarfile} $ssh->{account}:$remoteDir/"; +&CJ::my_system($cmd,$self->{verbose}); + +&CJ::message("Extracting package..."); +$cmd = "ssh $ssh->{account} 'source ~/.bashrc; cd $remoteDir; tar -xzf ${tarfile} --exclude=\"._*\";exit 0'"; +&CJ::my_system($cmd,$self->{verbose}); + + +$self->{runflag} eq "rdeploy" ? &CJ::message("Deployed.") : &CJ::message("Submitting jobs..."); +my $wait = int($totalJobs/300) + 2 ; # add more wait time for large jobs so the other server finish writing. +$cmd = "ssh $ssh->{account} 'source ~/.bashrc && cd $remoteDir/${pid} && bash -l master.sh > $remote_sep_Dir/qsub.info && sleep $wait'"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "rdeploy"); + + + +# bring the log file +my $qsubfilepath="$remote_sep_Dir/qsub.info"; +$cmd = "rsync -avz $ssh->{account}:$qsubfilepath $info_dir/"; +&CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "pardeploy"); + + + + + + + my $array_job_id; + if($self->{runflag} eq "rrun"){ + # read run info + my $local_qsub_info_file = "$info_dir/"."qsub.info"; + my ($job_ids,$errors) = &CJ::read_qsub($local_qsub_info_file); + + + $self->_checkSubmitSuccess($job_ids,$ssh,$local_sep_Dir,$remote_sep_Dir,$errors); + + + $array_job_id = $job_ids->[0]; # there is only one in this case + #my $numJobs = $#{$job_ids}+1; + CJ::message("$totalJobs job(s) submitted ($array_job_id\_[1-$totalJobs])"); + foreach my $error (@{$errors}) { + CJ::warning($error); + } + #delete the local qsub.info after use + #my $cmd = "rm $local_qsub_info_file"; + #&CJ::my_system($cmd,$self->{verbose}); + }else{ + $array_job_id =""; + } + + + +my $runinfo={ +pid => ${pid}, +user => ${CJID}, +agent => ${AgentID}, +local_ip => ${localIP}, +local_un => ${localUserName}, +date => ${date}, +machine => $self->{machine}, +account => $ssh->{account}, +local_prefix => ${localPrefix}, +local_path => "${localDir}/${pid}", +remote_prefix => $ssh->{remote_repo}, +remote_path => "${remoteDir}/${pid}", +job_id => $array_job_id, +num_tasks => $totalJobs, # This is only for array_jobs +bqs => $ssh->{bqs}, +save_prefix => ${savePrefix}, +save_path => "${saveDir}/${pid}", +runflag => $self->{runflag}, +program => $self->{program}, +message => $self->{message}, +submit_defaults => $self->{'submit_defaults'}, +alloc => $self->{'qsub_extra'}, +}; + + +&CJ::add_record($runinfo); +&CJ::write2firebase($pid,$runinfo, $date->{epoch},0); # send to CJ server +} + + + + + + +######################### +sub setup_conda_venv{ +######################### + my ($self,$pid,$ssh) = @_; + # check to see conda is installed for python jobs + my $response =`ssh $ssh->{account} 'source ~/.bashrc ; source ~/.bash_profile; which conda' 2>$CJlog_error`; + if ( $response !~ m/^.*\/bin\/conda$/ ) { + + my $app = 'miniconda'; + CJ::message("No conda found on this machine. Do you want me to install '$app' on '$self->{'machine'}'?"); + my $yesno = ; chomp($yesno); + + if(lc($yesno) eq "y" or lc($yesno) eq "yes"){ + my $force_tag = 1; + my $q_yesno = 0;# anythin other than 1 will avoid asking the same yesno again + &CJ::install_software($app,$self->{'machine'}, $force_tag, $q_yesno) + }elsif(lc($yesno) eq "n" or lc($yesno) eq "no"){ + &CJ::err("CJ cannot find conda required for Python jobs. use 'cj install miniconda $self->{machine}'"); + }else{ + &CJ::message("Unknown response. Please answer by typing Yes/No"); + exit 0; + } + + } + + + # create conda env for python + + &CJ::message("Creating/checking conda venv. This may take a while the first time..."); + + + # Build conda-venv-script + my $conda_venv = "${pid}_conda_venv.sh"; + my $conda_venv_script = &CJ::Scripts::build_conda_venv_bash($ssh); + &CJ::writeFile("/tmp/$conda_venv", $conda_venv_script); + + + my $cmd = "scp /tmp/$conda_venv $ssh->{account}:."; + &CJ::my_system($cmd,$self->{verbose}); + $cmd = "ssh $ssh->{account} 'source ~/.bashrc; bash -l $conda_venv > /tmp/${pid}_conda_env.txt 2>&1; rm $conda_venv'"; + &CJ::my_system($cmd,$self->{verbose}) unless ($self->{runflag} eq "deploy"); + + # check that installation has been successful + my $venv = 'CJ_python_venv'; + $response =`ssh $ssh->{account} 'source ~/.bashrc ; source ~/.bash_profile;conda info --envs | grep $venv' 2>$CJlog_error`;chomp($response); + if ($response !~ m/$venv/ ){ + &CJ::message("CJ failed to create $venv on $self->{machine}"); + &CJ::message("*********************************************"); + + $cmd = "ssh $ssh->{account} 'cat /tmp/${pid}_conda_env.txt' "; + system($cmd); + exit 1; + } + +} + + + + + + + + + + + + +1; diff --git a/src/CJ/Scripts.pm b/src/CJ/Scripts.pm index 7a3e671..5280be3 100644 --- a/src/CJ/Scripts.pm +++ b/src/CJ/Scripts.pm @@ -9,6 +9,7 @@ use Data::Dumper; use CJ::CJVars; use CJ::Matlab; use CJ::R; +use CJ::Python; use feature 'state'; use feature 'say'; #==================================== @@ -35,15 +36,71 @@ sub build_reproducible_script{ } +sub build_rrun_bashMain_script{ +my ($extra) = @_; + +my $date = $extra->{date}; +my $remote_sep_Dir = $extra->{remote_sep_Dir}; +my $bqs = $extra->{bqs}; +my $docstring=<{datestr} +DOCSTRING + +my $HEADER = &CJ::bash_header($bqs); +my $array_bashMain_script=$HEADER; +$array_bashMain_script.="$docstring"; + + +if($bqs eq "SLURM"){ + + #$array_bashMain_script.="mkdir ${remote_sep_Dir}/\$SLURM_ARRAY_TASK_ID/logs\n"; + #$array_bashMain_script.="mkdir ${remote_sep_Dir}/\$SLURM_ARRAY_TASK_ID/scripts\n"; +$array_bashMain_script.="bash ${remote_sep_Dir}/\$SLURM_ARRAY_TASK_ID/bashMain.sh\n"; + +}else{ + &CJ::err("Unknown BQS for RRUN/RDEPLOY"); +} + +return $array_bashMain_script; + +} - - - - - +sub build_rrun_master_script +{ + my ($nloop, $idx_tags,$ranges,$extra) = @_; + + + # Run this to create the directories, etc. + my $loop_script = build_nloop_master_script($nloop, $idx_tags,$ranges,$extra); + + + + + my $TOP = $extra->{TOP}; + my $FOR = $extra->{FOR}; + my $BOT = $extra->{BOT}; + my $local_sep_Dir = $extra->{local_sep_Dir}; + my $remote_sep_Dir=$extra->{remote_sep_Dir}; + my $runflag = $extra->{runflag}; + my $program = $extra->{program}; + my $date = $extra->{date} ; + my $pid =$extra->{pid} ; + my $bqs = $extra->{bqs}; + my $submit_defaults=$extra->{submit_defaults}; + my $qSubmitDefault = $extra->{qSubmitDefault}; + my $qsub_extra = $extra->{qsub_extra}; + my $ssh = $extra->{ssh}; + my $total_jobs = $extra->{totalJobs}; + my $master_script; + $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$ssh,$submit_defaults,$qSubmitDefault,$remote_sep_Dir,$qsub_extra,$total_jobs); + return $master_script; +} @@ -53,7 +110,7 @@ sub build_nloop_master_script my $master_script; my $itr = 0; #$ranges->[$itr]); 0<=itr<=nloops-1 - $master_script = nForLoop(\&build_nloop_matlab_code,$extra,$nloop,$itr,$idx_tags,$ranges); + $master_script = nForLoop(\&build_nloop_code,$extra,$nloop,$itr,$idx_tags,$ranges); return $master_script; } @@ -79,46 +136,38 @@ sub nForLoop return $master_script; } -sub build_nloop_matlab_code + + + +sub build_nloop_code { my ($master_script,$counter,$extra,@rest) = @_; $counter++; # print "$counter\n"; - my $TOP = $extra->{TOP}; - my $FOR = $extra->{FOR}; - my $BOT = $extra->{BOT}; - my $local_sep_Dir = $extra->{local_sep_Dir}; - my $remote_sep_Dir=$extra->{remote_sep_Dir}; - my $runflag = $extra->{runflag}; - my $program = $extra->{program}; - my $date = $extra->{date} ; - my $pid =$extra->{pid} ; - my $bqs = $extra->{bqs}; - my $mem=$extra->{mem}; - my $qsub_extra = $extra->{qsub_extra}; - my $runtime = $extra->{runtime}; + my $TOP = $extra->{TOP}; + my $FOR = $extra->{FOR}; + my $BOT = $extra->{BOT}; + my $local_sep_Dir = $extra->{local_sep_Dir}; + my $remote_sep_Dir = $extra->{remote_sep_Dir}; + my $runflag = $extra->{runflag}; + my $path = $extra->{path}; + my $program = $extra->{program}; + my $date = $extra->{date} ; + my $pid = $extra->{pid} ; + my $bqs = $extra->{bqs}; + my $submit_defaults=$extra->{submit_defaults}; + my $qSubmitDefault = $extra->{qSubmitDefault}; + my $qsub_extra = $extra->{qsub_extra}; my $ssh = $extra->{ssh}; - + + my $codeobj = &CJ::CodeObj($path,$program); + #============================================ - # BUILD EXP FOR this (v0,v1) + # BUILD EXP FOR this (v0,v1,...) #============================================ - my @str; - while(@rest){ - my $tag = shift @rest; - my $idx = shift @rest; - push @str , "$tag~=$idx"; - } - - my $str = join('||',@str); - - - my $INPUT; - $INPUT .= "if ($str); continue;end"; - my $new_script = "$TOP \n $FOR \n $INPUT \n $BOT"; - undef $INPUT; #undef INPUT for the next run - + my $new_script = $codeobj->buildParallelizedScript($TOP,$FOR,$BOT,@rest); #============================================ # COPY ALL NECESSARY FILES INTO THE # EXPERIMENTS FOLDER @@ -129,17 +178,22 @@ sub build_nloop_matlab_code &CJ::writeFile($this_path,$new_script); # build reproducible script for each run CJ::message("Creating reproducible script(s) reproduce_$program") if ($counter==1); - CJ::Scripts::build_reproducible_script("matlab", $program, "$local_sep_Dir/$counter", $runflag); + &CJ::CodeObj("$local_sep_Dir/$counter",$program)->build_reproducible_script($runflag); # build bashMain.sh for each parallel package my $remote_par_sep_dir = "$remote_sep_Dir/$counter"; - my $sh_script = &CJ::Scripts::make_par_shell_script($ssh,$program,$pid,$bqs,$counter, $remote_par_sep_dir); + my $sh_script = &CJ::Scripts::make_par_shell_script($ssh,$program,$pid,$bqs,$counter,$remote_par_sep_dir); my $local_sh_path = "$local_sep_Dir/$counter/bashMain.sh"; &CJ::writeFile($local_sh_path, $sh_script); - - $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$bqs,$mem,$runtime,$remote_sep_Dir,$qsub_extra,$counter); + + # build logs and scripts directories + # this is essentail for rrun! + my $cmd = "mkdir $local_sep_Dir/$counter/logs; mkdir $local_sep_Dir/$counter/scripts"; + &CJ::my_system($cmd,0); + + $master_script = &CJ::Scripts::make_master_script($master_script,$runflag,$program,$date,$pid,$ssh,$submit_defaults,$qSubmitDefault,$remote_sep_Dir,$qsub_extra,$counter); return ($counter,$master_script); } @@ -150,46 +204,160 @@ sub build_nloop_matlab_code -# ====== + + +########################## +sub build_conda_venv_bash{ +########################## + my ($ssh) = @_; + +# Determine easy_install version +my $python_version_tag = ""; +&CJ::err("python module not defined in ssh_config file.") if not defined $ssh->{'py'}; + +if( $ssh->{'py'} =~ /python\D?(((\d).\d).\d)/i ) { +$python_version_tag = $3; +}elsif( $ssh->{'py'} =~ /python\D?((\d).\d)/i ){ +$python_version_tag = $2; +}else{ +CJ::err("Cannot decipher pythonX.Y.Z version"); +} + +my $user_required_pyLib = join (" ", split(":",$ssh->{'pylib'}) ); + + +# we check to see if the file has been changed. +my $ssh_config_check; +if( -f $ssh_config_md5 ){ + $ssh_config_check = &CJ::ssh_config_md5('check') +}else{ + $ssh_config_check = 1; +} + +&CJ::ssh_config_md5('update') if ($ssh_config_check); + +# Conda should be aviable. +# from commit +# 8ced93afebb9aaee12689d3aff473c9f02bb9d78 +# we are moving to anaconda virtual env for python +my $venv = "CJ_python_venv"; + +my $env =<<'BASH'; + +# if venv does not exists and ssh_config has changed since last time +# create a new venv +if [ -z "$(conda info --envs | grep )" ] ;then + echo " Creating ..." + echo " conda create --yes -n python= numpy " + conda create --yes -n python= numpy + +elif [ -eq 1 ]; then + + echo " Updating ..." + echo "conda env remove --yes -n " + conda env remove --yes -n + echo " conda create --yes -n python= numpy " + conda create --yes -n python= numpy + +else + # For python, if conda venv already exists, just use it! + echo "Using available " +fi + +BASH + + +$env =~ s||$python_version_tag|g; +$env =~ s||$user_required_pyLib|g; +$env =~ s||$venv|g; +$env =~ s||$ssh_config_check|g; + +return $env; + +} + + + + + + + + + +####################### # Build master script sub make_master_script{ - my($master_script,$runflag,$program,$date,$pid,$bqs,$mem, $runtime, $remote_sep_Dir,$qsub_extra,$counter) = @_; +####################### +my($master_script,$runflag,$program,$date,$pid,$ssh,$submit_defaults,$qSubmitDefault,$remote_sep_Dir,$qsub_extra,$counter) = @_; + my $mem = $submit_defaults->{mem}; + my $runtime = $submit_defaults->{runtime}; + my $bqs = $ssh->{'bqs'}; + #my $numberTasks = $submit_defaults->{numberTasks}; + +# one time only if( (!defined($master_script)) || ($master_script eq "")){ my $docstring=<{datestr} +# COPYRIGHT 2014 CLUSTERJOB (CJ) +# CONTACT: Hatef Monajemi (monajemi AT stanford DOT edu) +# DATE : $date->{datestr} DOCSTRING my $HEADER = &CJ::bash_header($bqs); $master_script=$HEADER; $master_script.="$docstring"; -} +} +#my $pid_head = substr($pid,0,8); #short_pid my ($programName,$ext) = &CJ::remove_extension($program); - if(!($runflag =~ /^par.*/) ){ + if ($runflag =~ /\brrun\b|\brdeploy\b/){ + + my $tagstr="CJ_$pid\_\%a\_$programName"; + if($bqs eq "SLURM"){ + + my $totalArrayJobs = $counter; # in RRUN CASE, $counter is the last job's counter. + + if($qSubmitDefault){ + $master_script.="sbatch --array=1-$totalArrayJobs --mem=$mem --time=$runtime $qsub_extra -J $tagstr -o ${remote_sep_Dir}/\%a/logs/${tagstr}.stdout -e ${remote_sep_Dir}/\%a/logs/${tagstr}.stderr ${remote_sep_Dir}/array_bashMain.sh \n" + }else{ + $master_script.="sbatch --array=1-$totalArrayJobs $qsub_extra -J $tagstr -o ${remote_sep_Dir}/\%a/logs/${tagstr}.stdout -e ${remote_sep_Dir}/\%a/logs/${tagstr}.stderr ${remote_sep_Dir}/array_bashMain.sh \n" + } + }else{ + &CJ::err("Unknown BQS for RRUN/RDEPLOY"); + } - $master_script .= "mkdir ${remote_sep_Dir}"."/logs" . "\n" ; - $master_script .= "mkdir ${remote_sep_Dir}"."/scripts" . "\n" ; + }elsif(!($runflag =~ /^par.*/) ){ + + + $master_script .= "mkdir ${remote_sep_Dir}"."/logs" . "\n" ; + $master_script .= "mkdir ${remote_sep_Dir}"."/scripts" . "\n" ; my $tagstr="CJ_$pid\_$programName"; if($bqs eq "SGE"){ + if($qSubmitDefault){ + $master_script.= "qsub -S /bin/bash -w e -l h_vmem=$mem -l h_rt=$runtime $qsub_extra -N $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n"; + }else{ + $master_script.= "qsub -S /bin/bash -w e -l $qsub_extra -N $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n"; + + } - $master_script.= "qsub -S /bin/bash -w e -l h_vmem=$mem -l h_rt=$runtime $qsub_extra -N $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n"; - }elsif($bqs eq "SLURM"){ - $master_script.="sbatch --mem=$mem --time=$runtime $qsub_extra -J $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n" + }elsif($bqs eq "SLURM"){ + if($qSubmitDefault){ + $master_script.="sbatch --mem=$mem --time=$runtime $qsub_extra -J $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n"; + }else{ + $master_script.="sbatch $qsub_extra -J $tagstr -o ${remote_sep_Dir}/logs/${tagstr}.stdout -e ${remote_sep_Dir}/logs/${tagstr}.stderr ${remote_sep_Dir}/bashMain.sh \n"; + } }else{ &CJ::err("unknown BQS") } @@ -199,19 +367,30 @@ $master_script.="$docstring"; }elsif(defined($counter)){ - # Add QSUB to MASTER SCRIPT - $master_script .= "mkdir ${remote_sep_Dir}/$counter". "/logs" . "\n" ; - $master_script .= "mkdir ${remote_sep_Dir}/$counter". "/scripts" . "\n" ; + #$master_script .= "mkdir ${remote_sep_Dir}/$counter". "/logs" . "\n" ; + #$master_script .= "mkdir ${remote_sep_Dir}/$counter". "/scripts" . "\n" ; my $tagstr="CJ_$pid\_$counter\_$programName"; if($bqs eq "SGE"){ + + if($qSubmitDefault){ $master_script.= "qsub -S /bin/bash -w e -l h_vmem=$mem -l h_rt=$runtime $qsub_extra -N $tagstr -o ${remote_sep_Dir}/$counter/logs/${tagstr}.stdout -e ${remote_sep_Dir}/$counter/logs/${tagstr}.stderr ${remote_sep_Dir}/$counter/bashMain.sh \n"; + }else{ + $master_script.= "qsub -S /bin/bash -w e -l $qsub_extra -N $tagstr -o ${remote_sep_Dir}/$counter/logs/${tagstr}.stdout -e ${remote_sep_Dir}/$counter/logs/${tagstr}.stderr ${remote_sep_Dir}/$counter/bashMain.sh \n"; + + } + + + }elsif($bqs eq "SLURM"){ - - $master_script.="sbatch --mem=$mem --time=$runtime $qsub_extra -J $tagstr -o ${remote_sep_Dir}/$counter/logs/${tagstr}.stdout -e ${remote_sep_Dir}/$counter/logs/${tagstr}.stderr ${remote_sep_Dir}/$counter/bashMain.sh \n" - + + if($qSubmitDefault){ + $master_script.="sbatch --mem=$mem --time=$runtime $qsub_extra -J $tagstr -o ${remote_sep_Dir}/$counter/logs/${tagstr}.stdout -e ${remote_sep_Dir}/$counter/logs/${tagstr}.stderr ${remote_sep_Dir}/$counter/bashMain.sh \n" + }else{ + $master_script.="sbatch $qsub_extra -J $tagstr -o ${remote_sep_Dir}/$counter/logs/${tagstr}.stdout -e ${remote_sep_Dir}/$counter/logs/${tagstr}.stderr ${remote_sep_Dir}/$counter/bashMain.sh \n" + } }else{ &CJ::err("unknown BQS"); } @@ -227,325 +406,82 @@ $master_script.="$docstring"; +###################### +sub make_shell_script{ +###################### + + my ($ssh,$program,$pid,$bqs,$remote_path) = @_; -sub make_shell_script - { - my ($ssh,$program,$pid,$bqs) = @_; - - my $programType = CJ::getProgramType($program); - - my $code = CJ::matlab->new() if ($programType eq "matlab"){ - - }elsif($programType eq "R"){ - - } - + my $sh_script = &CJ::shell_head($bqs); + $sh_script .= &CJ::shell_neck($program,$pid, $remote_path); # setting PID, and SHELLSCRIPT, LOGFILE PATH + $sh_script .= &CJ::Scripts::make_CJrun_bash_script($ssh,$program,$bqs); # Program specific Mat, Py, R, + $sh_script .= &CJ::shell_toe($bqs); +return $sh_script; +} -my $sh_script; -if($bqs eq "SGE"){ -$sh_script=<<'HEAD' -#!/bin/bash -#\$ -cwd -#\$ -S /bin/bash - +############################ +sub make_CJrun_bash_script{ +############################ +my ($ssh,$program,$bqs) = @_; -echo JOB_ID $JOB_ID -echo WORKDIR $SGE_O_WORKDIR -DIR=`pwd` -HEAD +my $codeobj = &CJ::CodeObj(undef,$program); # This doesnt need a path at this stage; -}elsif($bqs eq "SLURM"){ -$sh_script=<<'HEAD' -#!/bin/bash -l -echo JOB_ID $SLURM_JOBID -echo WORKDIR $SLURM_SUBMIT_DIR -DIR=`pwd` -HEAD -}else{ -&CJ::err("unknown BQS"); -} - -$sh_script.= <<'MID'; -PROGRAM=""; -PID=; -cd $DIR; -mkdir scripts -mkdir logs -SHELLSCRIPT=${DIR}/scripts/CJrun.${PID}.sh; -LOGFILE=${DIR}/logs/CJrun.${PID}.log; -MID - -if($bqs eq "SGE"){ -$sh_script.= <<'BASH'; -cat < $SHELLSCRIPT -#! /bin/bash -#$ -cwd -#$ -R y -#$ -S /bin/bash - -echo starting job $SHELLSCRIPT -echo JOB_ID \$JOB_ID -echo WORKDIR \$SGE_O_WORKDIR -date -cd $DIR - - - -echo ending job \$SHELLSCRIPT -echo JOB_ID \$JOB_ID -date -echo "done" -THERE - -chmod a+x $SHELLSCRIPT -bash $SHELLSCRIPT > $LOGFILE +my $CJrun_bash_script = 'cat < $SHELLSCRIPT' . "\n"; + $CJrun_bash_script .= &CJ::bash_header($bqs); + $CJrun_bash_script .= $codeobj->CJrun_body_script($ssh); + $CJrun_bash_script .= 'THERE' . "\n"; + $CJrun_bash_script .= 'chmod a+x $SHELLSCRIPT' . "\n"; + $CJrun_bash_script .= 'bash $SHELLSCRIPT > $LOGFILE' . "\n"; -BASH -}elsif($bqs eq "SLURM"){ -$sh_script.= <<'BASH'; -cat < $SHELLSCRIPT -#! /bin/bash -l - -echo starting job \$SHELLSCRIPT -echo JOB_ID \$SLURM_JOBID -echo WORKDIR \$SLURM_SUBMIT_DIR -date -cd $DIR - -module load matlab\/R2014b -unset _JAVA_OPTIONS -matlab -nosplash -nodisplay < -% make sure each run has different random number stream -myversion = version; -mydate = date; -RandStream.setGlobalStream(RandStream('mt19937ar','seed', sum(100*clock))); -globalStream = RandStream.getGlobalStream; -CJsavedState = globalStream.State; -fname = sprintf('CJrandState.mat'); -save(fname, 'myversion' ,'mydate', 'CJsavedState'); -cd $DIR -run('${PROGRAM}'); -quit; -HERE - -echo ending job \$SHELLSCRIPT -echo JOB_ID \$SLURM_JOBID -date -echo "done" -THERE - -chmod a+x $SHELLSCRIPT -bash $SHELLSCRIPT > $LOGFILE +return $CJrun_bash_script; -BASH } - - - - -$sh_script =~ s||$program|; -$sh_script =~ s||$pid|; -$sh_script =~ s||$pathText|; - -return $sh_script; -} - - - -# parallel shell script -#==================================== -# BUILD A PARALLEL BASH WRAPPER -#==================================== -sub make_par_shell_script -{ -my ($ssh,$program,$pid,$bqs,$counter,$remote_path) = @_; -my $sh_script; -if($bqs eq "SGE"){ - -$sh_script=<<'HEAD' -#!/bin/bash -l -#\$ -cwd -#\$ -S /bin/bash - -echo JOB_ID $JOB_ID -echo WORKDIR $SGE_O_WORKDIR -DIR= -HEAD - -}elsif($bqs eq "SLURM"){ -$sh_script=<<'HEAD' -#!/bin/bash -l -echo JOB_ID $SLURM_JOBID -echo WORKDIR $SLURM_SUBMIT_DIR -DIR= -HEAD -}else{ -&CJ::err("unknown BQS"); -} +############################### +sub make_CJrun_par_bash_script{ +############################### - -$sh_script.= <<'MID'; -PROGRAM=""; -PID=; -COUNTER=; -cd $DIR; -mkdir scripts -mkdir logs -SHELLSCRIPT=${DIR}/scripts/CJrun.${PID}.${COUNTER}.sh; -LOGFILE=${DIR}/logs/CJrun.${PID}.${COUNTER}.log; -MID - -if($bqs eq "SGE"){ -$sh_script.= <<'BASH'; -cat < $SHELLSCRIPT -#! /bin/bash -l -#$ -cwd -#$ -R y -#$ -S /bin/bash - -echo starting job $SHELLSCRIPT -echo JOB_ID \$JOB_ID -echo WORKDIR \$SGE_O_WORKDIR -date -cd $DIR - -module load matlab\/r2014b #MATLAB-R2014b -unset _JAVA_OPTIONS -matlab -nosplash -nodisplay < - - -% add path for parrun -oldpath = textscan('$DIR', '%s', 'Delimiter', '/'); -newpath = horzcat(oldpath{:}); -bin_path = sprintf('%s/', newpath{1:end-1}); -addpath(genpath(bin_path)); % recursive path - - -% make sure each run has different random number stream -myversion = version; -mydate = date; - -% To get different Randstate for different jobs -rng(${COUNTER}) -seed = sum(100*clock) + randi(10^6); -RandStream.setGlobalStream(RandStream('mt19937ar','seed', seed)); -globalStream = RandStream.getGlobalStream; -CJsavedState = globalStream.State; -fname = sprintf('CJrandState.mat'); -save(fname, 'myversion','mydate', 'CJsavedState'); -cd $DIR -run('${PROGRAM}'); -quit; -HERE - -echo ending job \$SHELLSCRIPT -echo JOB_ID \$JOB_ID -date -echo "done" -THERE - -chmod a+x $SHELLSCRIPT -bash $SHELLSCRIPT > $LOGFILE - -BASH -}elsif($bqs eq "SLURM"){ -$sh_script.= <<'BASH'; -cat < $SHELLSCRIPT -#! /bin/bash -l - -echo starting job \$SHELLSCRIPT -echo JOB_ID \$SLURM_JOBID -echo WORKDIR \$SLURM_SUBMIT_DIR -date -cd $DIR - -module load matlab\/R2014b -unset _JAVA_OPTIONS -matlab -nosplash -nodisplay < - - -% add path for parrun -oldpath = textscan('$DIR', '%s', 'Delimiter', '/'); -newpath = horzcat(oldpath{:}); -bin_path = sprintf('%s/', newpath{1:end-1}); -addpath(genpath(bin_path)); - - -% make sure each run has different random number stream -myversion = version; -mydate = date; -% To get different Randstate for different jobs -rng(${COUNTER}) -seed = sum(100*clock) + randi(10^6); -RandStream.setGlobalStream(RandStream('mt19937ar','seed', seed)); -globalStream = RandStream.getGlobalStream; -CJsavedState = globalStream.State; -fname = sprintf('CJrandState.mat'); -save(fname,'myversion', 'mydate', 'CJsavedState'); -cd $DIR -run('${PROGRAM}'); -quit; -HERE - -echo ending job \$SHELLSCRIPT -echo JOB_ID \$SLURM_JOBID -date -echo "done" -THERE - -chmod a+x $SHELLSCRIPT -bash $SHELLSCRIPT > $LOGFILE - - -BASH -} - -my $pathText.=<{matlib} -begin + my $codeobj = CJ::CodeObj(undef,$program); # This doesnt need a path at this stage; + + my $CJrun_bash_script = 'cat < $SHELLSCRIPT' . "\n"; + $CJrun_bash_script .= &CJ::bash_header($bqs); + $CJrun_bash_script .= $codeobj->CJrun_par_body_script($ssh); + $CJrun_bash_script .= 'THERE' . "\n"; + $CJrun_bash_script .= 'chmod a+x $SHELLSCRIPT' . "\n"; + $CJrun_bash_script .= 'bash $SHELLSCRIPT > $LOGFILE' . "\n"; + + return $CJrun_bash_script; +} -% generate recursive path -addpath(genpath('.')); -try -cvx_setup; -cvx_quiet(true) -% Find and add Sedumi Path for machines that have CVX installed - cvx_path = which('cvx_setup.m'); -oldpath = textscan( cvx_path, '%s', 'Delimiter', '/'); -newpath = horzcat(oldpath{:}); -sedumi_path = [sprintf('%s/', newpath{1:end-1}) 'sedumi']; -addpath(sedumi_path) -catch -warning('CVX not enabled. Please set CVX path in .ssh_config if you need CVX for your jobs'); -end -MATLAB +############################### +# parallel shell script +# BUILD A PARALLEL BASH WRAPPER +sub make_par_shell_script{ +############################### +my ($ssh,$program,$pid,$bqs,$counter,$remote_path) = @_; + my $codeobj = &CJ::CodeObj(undef,$program); # This doesnt need a path at this stage; + + my $sh_script = &CJ::shell_head($bqs); + $sh_script .= &CJ::par_shell_neck($program,$pid,$counter,$remote_path); # setting PID, and SHELLSCRIPT, LOGFILE PATH + $sh_script .= &CJ::Scripts::make_CJrun_par_bash_script($ssh,$program,$bqs); # Program specific Mat, Py, R, + $sh_script .= &CJ::shell_toe($bqs); -$sh_script =~ s||$program|; -$sh_script =~ s||$pid|; -$sh_script =~ s||$counter|; -$sh_script =~ s||$pathText|; -$sh_script =~ s||$remote_path|; - return $sh_script; } - -1; \ No newline at end of file +1; diff --git a/src/CJ/Sync.pm b/src/CJ/Sync.pm index e0af4e4..4e6cad3 100644 --- a/src/CJ/Sync.pm +++ b/src/CJ/Sync.pm @@ -64,7 +64,7 @@ sub request{ # $fb_get->{$agent}->{SyncReq}; and if the value is not # null, it should update the corresponding PIDs (keys of the hashref); # once all updates are done the agent changes the value of todo to null -# to indicateall updates are done. +# to indicate all updates are done. my $firebase = Firebase->new(firebase => $firebase_name, auth_token => $CJKEY); # Get todo list @@ -196,6 +196,7 @@ sub push_timestamp{ # than the remote counterpart, it sends to the server the local info that hasnt been pushed. my $firebase = Firebase->new(firebase => $firebase_name, auth_token => $CJKEY); my $fb_get = $firebase->get("users/${CJID}/agents/$agent"); + return unless defined($fb_get); my $remote_push_timestamp = $fb_get->{push_timestamp}; return unless defined($remote_push_timestamp); @@ -219,7 +220,8 @@ CJ::warning("CJ is in awe! Push TimeStamp:: remote is bigger than local") if ($r my @filtered_pids = grep { $pid_timestamp->{$_} > $remote_push_timestamp } keys %$pid_timestamp; my $info_hash = &CJ::retrieve_package_info(\@filtered_pids); - my $size = keys $info_hash; + return if not defined($info_hash); + my $size = 0+keys( %{ $info_hash } ); my $counter = 0; while ( my ($pid,$info) = each (%$info_hash)){ my $timestamp = $info->{date}{epoch}; @@ -257,4 +259,4 @@ sub GetLocalPushTimeStamp -1; \ No newline at end of file +1; diff --git a/src/external/GDrive/lib/Simple.pm b/src/external/GDrive/lib/Simple.pm new file mode 100644 index 0000000..408dc89 --- /dev/null +++ b/src/external/GDrive/lib/Simple.pm @@ -0,0 +1,851 @@ +########################################### +package Net::Google::Drive::Simple; +########################################### +use strict; +use warnings; + +use LWP::UserAgent; +use HTTP::Request; +use HTTP::Headers; +use HTTP::Request::Common; +use Sysadm::Install qw( slurp ); +use File::Basename; +use YAML qw( LoadFile DumpFile ); +use JSON qw( from_json to_json ); +use Test::MockObject; +use Log::Log4perl qw(:easy); +use Data::Dumper; +use File::MMagic; +use OAuth::Cmdline::GoogleDrive; + +our $VERSION = "0.12"; + +########################################### +sub new { +########################################### + my($class, %options) = @_; + + my $self = { + init_done => undef, + api_file_url => "https://www.googleapis.com/drive/v2/files", + api_upload_url => "https://www.googleapis.com/upload/drive/v2/files", + oauth => OAuth::Cmdline::GoogleDrive->new( ), + error => undef, + %options, + }; + + bless $self, $class; +} + +########################################### +sub error { +########################################### + my( $self, $set ) = @_; + + if( defined $set ) { + $self->{ error } = $set; + } + + return $self->{ error }; +} + +########################################### +sub init { +########################################### + my( $self, $path ) = @_; + + if( $self->{ init_done } ) { + return 1; + } + + DEBUG "Testing API"; + if( !$self->api_test() ) { + LOGDIE "api_test failed"; + } + + $self->{ init_done } = 1; + + return 1; +} + +########################################### +sub api_test { +########################################### + my( $self ) = @_; + + my $url = $self->file_url( { maxResults => 1 } ); + + my $ua = LWP::UserAgent->new(); + + my $req = HTTP::Request->new( + GET => $url->as_string, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + DEBUG "Fetching $url"; + + my $resp = $ua->request( $req ); + + if( $resp->is_success() ) { + DEBUG "API tested OK"; + return 1; + } + + $self->error( $resp->message() ); + + ERROR "API error: ", $resp->message(); + return 0; +} + +########################################### +sub file_url { +########################################### + my( $self, $opts ) = @_; + + $opts = {} if !defined $opts; + + my $default_opts = { + maxResults => 3000, + }; + + $opts = { + %$default_opts, + %$opts, + }; + + my $url = URI->new( $self->{ api_file_url } ); + $url->query_form( $opts ); + + return $url; +} + +########################################### +sub files { +########################################### + my( $self, $opts, $search_opts ) = @_; + + if( !defined $search_opts ) { + $search_opts = {}; + } + $search_opts = { + page => 1, + %$search_opts, + }; + + if( !defined $opts ) { + $opts = {}; + } + + $self->init(); + + my @docs = (); + + while( 1 ) { + my $url = $self->file_url( $opts ); + my $data = $self->http_json( $url ); + + if( !defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + if( $item->{ kind } eq "drive#file" ) { + my $file = $item->{ originalFilename }; + if( !defined $file ) { + DEBUG "Skipping $item->{ title } (no originalFilename)"; + next; + } + + push @docs, $self->data_factory( $item ); + } else { + DEBUG "Skipping $item->{ title } ($item->{ kind })"; + } + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@docs; +} + +########################################### +sub folder_create { +########################################### + my( $self, $title, $parent ) = @_; + + my $url = URI->new( $self->{ api_file_url } ); + + my $data = $self->http_json( $url, { + title => $title, + parents => [ { id => $parent } ], + mimeType => "application/vnd.google-apps.folder", + } ); + + if( ! defined $data ) { + return undef; + } + + return $data->{ id }; +} + +########################################### +sub file_upload { +########################################### + my( $self, $file, $parent_id, $file_id ) = @_; + + # Since a file upload can take a long time, refresh the token + # just in case. + $self->{ oauth }->token_expire(); + + my $title = basename $file; + + # First, insert the file placeholder, according to + # http://stackoverflow.com/questions/10317638 + my $file_data = slurp $file; + my $mime_type = $self->file_mime_type( $file ); + + my $url; + + if( ! defined $file_id ) { + $url = URI->new( $self->{ api_file_url } ); + + my $data = $self->http_json( $url, + { mimeType => $mime_type, + parents => [ { id => $parent_id } ], + title => $title, + } + ); + + if( ! defined $data ) { + return undef; + } + + $file_id = $data->{ id }; + } + + $url = URI->new( $self->{ api_upload_url } . "/$file_id" ); + $url->query_form( uploadType => "media" ); + + my $req = &HTTP::Request::Common::PUT( + $url->as_string, + $self->{ oauth }->authorization_headers(), + "Content-Type" => $mime_type, + Content => $file_data, + ); + + my $resp = $self->http_loop( $req ); + + if( $resp->is_error() ) { + $self->error( $self->message() ); + return undef; + } + + DEBUG $resp->as_string; + + return $file_id; +} + +########################################### +sub file_delete { +########################################### + my( $self, $file_id ) = @_; + + my $url; + + LOGDIE 'Deletion requires file_id' if( ! defined $file_id ); + + $url = URI->new( $self->{ api_file_url } . "/$file_id" ); + + my $req = &HTTP::Request::Common::DELETE( + $url->as_string, + $self->{ oauth }->authorization_headers(), + ); + + my $resp = $self->http_loop( $req ); + + DEBUG $resp->as_string; + + if( $resp->is_error ) { + $self->error( $resp->message() ); + return undef; + } + + return $file_id; +} + +########################################### +sub children_by_folder_id { +########################################### + my( $self, $folder_id, $opts, $search_opts ) = @_; + + $self->init(); + + if( !defined $search_opts ) { + $search_opts = {}; + } + + $search_opts = { + page => 1, + %$search_opts, + }; + + if( !defined $opts ) { + $opts = { + maxResults => 100, + }; + } + + my $url = URI->new( $self->{ api_file_url } ); + $opts->{ q } = "'$folder_id' in parents"; + + if( $search_opts->{ title } ) { + $opts->{ q } .= " AND title = '$search_opts->{ title }'"; + } + + my @children = (); + + while( 1 ) { + $url->query_form( $opts ); + + my $data = $self->http_json( $url ); + + if( ! defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + push @children, $self->data_factory( $item ); + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@children; +} + +########################################### +sub children { +########################################### + my( $self, $path, $opts, $search_opts ) = @_; + + DEBUG "Determine children of $path"; + + if( !defined $path ) { + LOGDIE "No $path given"; + } + + if( !defined $search_opts ) { + $search_opts = {}; + } + + my @parts = split '/', $path; + my $parent = $parts[0] = "root"; + DEBUG "Parent: $parent"; + + my $folder_id = shift @parts; + + PART: for my $part ( @parts ) { + + DEBUG "Looking up part $part (folder_id=$folder_id)"; + + my $children = $self->children_by_folder_id( $folder_id, + { maxResults => 100, # path resolution maxResults is different + }, + { %$search_opts, title => $part }, + ); + + if( ! defined $children ) { + return undef; + } + + for my $child ( @$children ) { + DEBUG "Found child ", $child->title(); + if( $child->title() eq $part ) { + $folder_id = $child->id(); + $parent = $folder_id; + DEBUG "Parent: $parent"; + next PART; + } + } + + my $msg = "Child $part not found"; + $self->error( $msg ); + ERROR $msg; + return undef; + } + + DEBUG "Getting content of folder $folder_id"; + + my $children = $self->children_by_folder_id( $folder_id, $opts, + $search_opts ); + + if( ! defined $children ) { + return undef; + } + + if( wantarray ) { + return( $children, $parent ); + } else { + return $children; + } +} + +########################################### +sub search { +########################################### + my( $self, $opts, $search_opts, $query ) = @_; + $search_opts||= { page => 1 }; + + $self->init(); + + if( !defined $opts ) { + $opts = { + maxResults => 100, + }; + } + + my $url = URI->new( $self->{ api_file_url } ); + + $opts->{ q }= $query; + + my @children = (); + + while( 1 ) { + $url->query_form( $opts ); + + my $data = $self->http_json( $url ); + if( ! defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + push @children, $self->data_factory( $item ); + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@children; +} + +########################################### +sub data_factory { +########################################### + my( $self, $data ) = @_; + + my $mock = Test::MockObject->new(); + + for my $key ( keys %$data ) { + # DEBUG "Adding method $key"; + $mock->mock( $key , sub { $data->{ $key } } ); + } + + return $mock; +} + +########################################### +sub download { +########################################### + my( $self, $url, $local_file ) = @_; + + if( ref $url ) { + $url = $url->downloadUrl(); + } + + my $req = HTTP::Request->new( + GET => $url, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + + my $ua = LWP::UserAgent->new(); + my $resp = $ua->request( $req, $local_file ); + + if( $resp->is_error() ) { + my $msg = "Can't download $url (" . $resp->message() . ")"; + ERROR $msg; + $self->error( $msg ); + return undef; + } + + if( $local_file ) { + return 1; + } + + return $resp->content(); +} + + +########################################### +sub http_loop { +########################################### + my( $self, $req, $noinit ) = @_; + + my $ua = LWP::UserAgent->new(); + my $resp; + + my $RETRIES = 3; + my $SLEEP_INTERVAL = 10; + + { + # refresh token if necessary + if( ! $noinit ) { + $self->init(); + } + + DEBUG "Fetching ", $req->url->as_string(); + + $resp = $ua->request( $req ); + + if( ! $resp->is_success() ) { + $self->error( $resp->message() ); + warn "Failed with ", $resp->code(), ": ", $resp->message(); + if( --$RETRIES >= 0 ) { + ERROR "Retrying in $SLEEP_INTERVAL seconds"; + sleep $SLEEP_INTERVAL; + redo; + } else { + ERROR "Out of retries."; + return $resp; + } + } + + DEBUG "Successfully fetched ", length( $resp->content() ), " bytes."; + } + + return $resp; +} + +########################################### +sub http_json { +########################################### + my( $self, $url, $post_data ) = @_; + + my $req; + + if( $post_data ) { + $req = &HTTP::Request::Common::POST( + $url->as_string, + $self->{ oauth }->authorization_headers(), + "Content-Type"=> "application/json", + Content => to_json( $post_data ), + ); + } else { + $req = HTTP::Request->new( + GET => $url->as_string, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + } + + my $resp = $self->http_loop( $req ); + + if( $resp->is_error() ) { + $self->error( $resp->message() ); + return undef; + } + + my $data = from_json( $resp->content() ); + + return $data; +} + +########################################### +sub file_mime_type { +########################################### + my( $self, $file ) = @_; + + # There don't seem to be great implementations of mimetype + # detection on CPAN, so just use this one for now. + + if( !$self->{ magic } ) { + $self->{ magic } = File::MMagic->new(); + } + + return $self->{ magic }->checktype_filename( $file ); +} + +########################################### +sub item_iterator { +########################################### + my( $self, $data ) = @_; + + my $idx = 0; + + if( !defined $data ) { + die "no data in item_iterator"; + } + + return sub { + { + my $next_item = $data->{ items }->[ $idx++ ]; + + return if !defined $next_item; + + if( $next_item->{ labels }->{ trashed } ) { + DEBUG "Skipping $next_item->{ title } (trashed)"; + redo; + } + + return $next_item; + } + }; +} + +1; + +__END__ + +=head1 NAME + +Net::Google::Drive::Simple - Simple modification of Google Drive data + +=head1 SYNOPSIS + + use Net::Google::Drive::Simple; + + # requires a ~/.google-drive.yml file with an access token, + # see description below. + my $gd = Net::Google::Drive::Simple->new(); + + my $children = $gd->children( "/folder/path" ); + + for my $child ( @$children ) { + + next if $child->kind() ne 'drive#file'; + + next if !$child->can( "downloadUrl" ); + + print $child->originalFilename(), + " can be downloaded at ", + $child->downloadUrl(), + "\n"; + } + +=head1 DESCRIPTION + +Net::Google::Drive::Simple authenticates with a user's Google Drive and +offers several convenience methods to list, retrieve, and modify the data +stored in the 'cloud'. See C as an example on how +to keep a local directory in sync with a remote directory on Google Drive. + +=head2 GETTING STARTED + +To get the access token required to access your Google Drive data via +this module, you need to run the script C in this +distribution. + +Before you run it, you need to register your 'app' with Google Drive +and obtain a client_id and a client_secret from Google: + + https://developers.google.com/drive/web/enable-sdk + +Click on "Enable the Drive API and SDK", and find "Create an API project in +the Google APIs Console". On the API console, create a new project, click +"Services", and enable "Drive API" (leave "drive SDK" off). Then, under +"API Access" in the navigation bar, create a client ID, and make sure to +register a an "installed application" (not a "web application"). "Redirect +URIs" should contain "http://localhost". This will get you a "Client ID" +and a "Client Secret". + +Then, replace the following lines in C with the +values received: + + # You need to obtain a client_id and a client_secret from + # https://developers.google.com/drive to use this. + my $client_id = "XXX"; + my $client_secret = "YYY"; + +Then run the script. It'll start a web server on port 8082 on your local +machine. When you point your browser at http://localhost:8082, you'll see a +link that will lead you to Google Drive's login page, where you authenticate +and then allow the app (specified by client_id and client_secret above) access +to your Google Drive data. The script will then receive an access token from +Google Drive and store it in ~/.google-drive.yml from where other scripts can +pick it up and work on the data stored on the user's Google Drive account. Make +sure to limit access to ~/.google-drive.yml, because it contains the access +token that allows everyone to manipulate your Google Drive data. It also +contains a refresh token that this library uses to get a new access token +transparently when the old one is about to expire. + +=head1 METHODS + +=over 4 + +=item C + +Constructor, creates a helper object to retrieve Google Drive data +later. + +=item Cchildren( "/path/to" )> + +Return the entries under a given path on the Google Drive as a reference +to an array. Each entry +is an object composed of the JSON data returned by the Google Drive API. +Each object offers methods named like the fields in the JSON data, e.g. +C, C, etc. + +Will return all entries found unless C is set: + + my $children = $gd->children( "/path/to", { maxResults => 3 } ) + +Due to the somewhat capricious ways Google Drive handles its directory +structures, the method needs to traverse the path component by component +and determine the ID of each directory to get to the next level. To speed +up subsequent lookups, it also returns the ID of the last component to the +caller: + + my( $children, $parent ) = $gd->children( "/path/to" ); + +If the caller now wants to e.g. insert a file into the directory, its +ID is available in $parent. + +Each child comes back as a files#resource type and gets mapped into +an object that offers access to the various fields via methods: + + for my $child ( @$children ) { + print $child->kind(), " ", $child->title(), "\n"; + } + +Please refer to + + https://developers.google.com/drive/v2/reference/files#resource + +for details on which fields are available. + +=item Cfiles( )> + +Return all files on the drive as a reference to an array. +Will return all entries found unless C is set: + + my $files = $gd->files( { maxResults => 3 } ) + +Note that Google limits the number of entries returned by default to +100, and seems to restrict the maximum number of files returned +by a single query to 3,500, even if you specify higher values for +C. + +Each file comes back as an object that offers access to the Google +Drive item's fields, according to the API (see C). + +=item Cfolder_create( "folder-name", $parent_id )> + +Create a new folder as a child of the folder with the id C<$parent_id>. +Returns the ID of the new folder or undef in case of an error. + +=item C<$gd-Efile_upload( $file, $dir_id )> + +Uploads the content of the file C<$file> into the directory with the ID +$dir_id on Google Drive. Uses C<$file> as the file name. + +To overwrite an existing file on Google Drive, specify the file's ID as +an optional parameter: + + $gd->file_upload( $file, $dir_id, $file_id ); + +=item C<$gd-Edownload( $item, [$local_filename] )> + +Downloads an item found via C or C. Also accepts +the downloadUrl of an item. If C<$local_filename> is not specified, +C will return the data downloaded (this might be undesirable +for large files). If C<$local_filename> is specified, C will +store the downloaded data under the given file name. + + my $gd = Net::Google::Drive::Simple->new(); + my $files = $gd->files( { maxResults => 20 }, { page => 0 } ); + for my $file ( @$files ) { + my $name = $file->originalFilename(); + print "Downloading $name\n"; + $gd->download( $file, $name ) or die "failed: $!"; + } + +Be aware that only documents like PDF or png can be downloaded directly. Google Drive Documents like spreadsheets or (text) documents need to be exported into one of the available formats. +Check for "exportLinks" on a file given. In case of a document that can be exported you will receive a hash in the form: + + { + 'format_1' => 'download_link_1', + 'format_2' => 'download_link_2', + ... + } + +Choose your download link and use it as an argument to the download() function which can also take urls directly. + + my $gd = Net::Google::Drive::Simple->new(); + my $children = $gd->children( '/path/to/folder/on/google/drive' ); + for my $child ( @$children ) { + if ($child->can( 'exportLinks' )){ + my $type_chosen; + foreach my $type (keys %{$child->exportLinks()}){ + # Take any type you can get.. + $type_chosen = $type; + # ..but choose your preferred format, opendocument here: + last if $type =~/oasis\.opendocument/; + } + my $url = $child->exportLinks()->{$type_chosen}; + + $gd->download($url, 'my/local/file'); + + } + } + +=item Csearch( )> + + my $children= $gd->search({ maxResults => 20 },{ page => 0 }, + "title contains 'Futurama'"); + +Search files for attributes. See +L +for a definition of the attributes. + +To list all available files, those on the drive, those directly shared +with the user, and those generally available to the user, use an +empty search: + + my $children= $gd->search({},{ page => 0 },""); + +=item C<$gd-Efile_delete( file_id )> + +Delete the file with the specified ID from Google Drive. + +=back + +=head1 Error handling + +In case of an error while retrieving information from the Google Drive +API, the methods above will return C and a more detailed error +message can be obtained by calling the C method: + + print "An error occurred: ", $gd->error(); + +=head1 LOGGING/DEBUGGING + +Net::Google::Drive::Simple is Log4perl-enabled. +To find out what's going on under the hood, turn on Log4perl: + + use Log::Log4perl qw(:easy); + Log::Log4perl->easy_init($DEBUG); + +=head1 LEGALESE + +Copyright 2012 by Mike Schilli, all rights reserved. +This program is free software, you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +2012, Mike Schilli diff --git a/src/external/GDrive/lib/gDrive.pm b/src/external/GDrive/lib/gDrive.pm new file mode 100644 index 0000000..e69de29 diff --git a/src/external/GDrive/lib/gDrive/OAuth.pm b/src/external/GDrive/lib/gDrive/OAuth.pm new file mode 100644 index 0000000..1fd4a1f --- /dev/null +++ b/src/external/GDrive/lib/gDrive/OAuth.pm @@ -0,0 +1,88 @@ +################################ +package gDrive::OAuth; +################################ + +use Moo; +use JSON::XS; +use POSIX; +use MIME::Base64; + + + + + + + + + + + + + + + + + + + + +=head1 NAME + +gDrive::OAuth - Auth token generation for Google Drive service account (Server to Server); + +=head1 SYNOPSIS + +use gDrive::OAuth; + +my $token = gDrive::OAuth->new(token => 'xxxxxxxxx', admin => 'true', data => { uid => '1' } )->create_token(); + + + + + + + + + +=head1 LICENSE AND COPYRIGHT + + Copyright 2017 Hatef Monajemi. + + This program is free software; you can redistribute it and/or modify it + under the terms of the the Artistic License (2.0). You may obtain a + copy of the full license at: + + L + + Any use, modification, and distribution of the Standard or Modified + Versions is governed by this Artistic License. By using, modifying or + distributing the Package, you accept this license. Do not use, modify, + or distribute the Package, if you do not accept this license. + + If your Modified Version has been derived from a Modified Version made + by someone other than you, you are nevertheless required to ensure that + your Modified Version complies with the requirements of this license. + + This license does not grant you the right to use any trademark, service + mark, tradename, or logo of the Copyright Holder. + + This license includes the non-exclusive, worldwide, free-of-charge + patent license to make, have made, use, offer to sell, sell, import and + otherwise transfer the Package with respect to any patent claims + licensable by the Copyright Holder that are necessarily infringed by the + Package. If you institute patent litigation (including a cross-claim or + counterclaim) against any party alleging that the Package constitutes + direct or contributory patent infringement, then this Artistic License + to you shall terminate on the date that such litigation is filed. + + Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER + AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. + THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY + YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR + CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + =cut + 1; diff --git a/src/external/GDrive/t/test.pl b/src/external/GDrive/t/test.pl new file mode 100644 index 0000000..7280be0 --- /dev/null +++ b/src/external/GDrive/t/test.pl @@ -0,0 +1,6 @@ +#!/usr/bin/perl -w + +use FindBin qw($Bin); +use Data::Dumper; +use lib "$Bin/.."; + diff --git a/src/external/Net-Google-Drive-Simple-0.12/Changes b/src/external/Net-Google-Drive-Simple-0.12/Changes new file mode 100644 index 0000000..18f6905 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/Changes @@ -0,0 +1,71 @@ +###################################################################### +Revision history for Perl extension Net::Google::Drive::Simple + +0.12 (2015/01/26) + (ms) [rt.cpan.org #100789] http_loop() now returns a HTTP::Response + object even in the error case, to allow for the is_error() + method to work in the calling code. + (ms) [rt.cpan.org [#101655] fixed docs, removed unused config_file + parameter in constructor. + +0.11 (2014/10/08) + (ms) Ilmari Ikonen implemented file_delete(). Added docs and tests. + (ms) Failed calls no longer die, but return undef, where $gd->error() + holds an error message. + (ms) Added access_type=offline ([rt.cpan.org #99372]) since Google + apparently won't sent a refresh token in some cases if it's not + set. + +0.10 (2014/09/27) + (ms) Fixed file_upload(), which uses the token_expire() method + in OAuth::Cmdline 0.03. + +0.09 (2014/09/25) + (ms) Fixed link to set up client ID and secret, as suggested by + Tom Shield ([rt.cpan.org #94316]). + (ms) All list functions (files(), children(), etc.) now filter out + trashed items. + (ms) Delegated oauth logic to OAuth::Cmdline + +0.08 (2013/07/27) + (ms) Max Maischein added + * Allow passing of the Google JSON file on the command line, + and using the data found in the JSON file as credentials + * Output an error message if the user did not patch the file, and + also did not pass the JSON file on the command line + * Try to find the JSON file in the users home directory + * a new search() method to perform non-folder-based general + searches for files + (ms) Maettu documented exporting Google Docs + (https://github.com/mschilli/net-google-drive-simple/pull/7) + +0.07 (2013/07/27) + (ms) fixed broken test suite + +0.06 (2013/07/25) + (ms) Added download() method to download files from the google drive + (ms) Fixed files() to return full objects, not just file names + (ms) Applied patch by Yxes, pulling in a https lib and replacing + deprecated render_text Mojo method by render(text => x): + https://github.com/mschilli/net-google-drive-simple/pull/4 + +0.05 (2013/03/18) + (ms) Requiring Sysadm::Install 0.43 for Win32 compatibility. + +0.04 (2013/02/03) + (ms) Removed dependency on pdf files, now supports all types of files + via File::MMagic. + (ms) Better debug logs + (ms) Added better instructions on how to obtain Google Drive client + IDs and secrets. + +0.03 (2013/01/03) + (ms) Now refreshing token right before a file_upload, regardless of expire + time. Working around unexplicable problems when the token expires + while an upload is in progress. + +0.02 (2012/12/30) + (ms) children() now works correctly in both scalar and array context + +0.01 (2012/12/28) + (ms) Where it all began. diff --git a/src/external/Net-Google-Drive-Simple-0.12/MANIFEST b/src/external/Net-Google-Drive-Simple-0.12/MANIFEST new file mode 100644 index 0000000..7c439f8 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/MANIFEST @@ -0,0 +1,16 @@ +Changes +eg/file-download +eg/file-upload +eg/google-drive-init +eg/google-drive-upsync +lib/Net/Google/Drive/Simple.pm +Makefile.PL +MANIFEST This list of files +MANIFEST.SKIP +MYMETA.json +MYMETA.yml +README +t/001Basic.t +t/data/testfile +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/src/external/Net-Google-Drive-Simple-0.12/MANIFEST.SKIP b/src/external/Net-Google-Drive-Simple-0.12/MANIFEST.SKIP new file mode 100644 index 0000000..b492858 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/MANIFEST.SKIP @@ -0,0 +1,10 @@ +blib +^Simple.pm +^Makefile$ +^Makefile.old$ +CVS +.gitignore +.git +MANIFEST.bak +adm/release +.gz$ diff --git a/src/external/Net-Google-Drive-Simple-0.12/META.json b/src/external/Net-Google-Drive-Simple-0.12/META.json new file mode 100644 index 0000000..2e9effb --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/META.json @@ -0,0 +1,56 @@ +{ + "abstract" : "Simple modification of Google Drive data", + "author" : [ + "Mike Schilli " + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.142690", + "license" : [ + "unknown" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Net-Google-Drive-Simple", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "File::MMagic" : "1.29", + "JSON" : "2.53", + "LWP::Protocol::https" : "6.04", + "LWP::UserAgent" : "6.02", + "Log::Log4perl" : "1", + "Mojolicious" : "4.13", + "OAuth::Cmdline" : "0.04", + "Pod::Usage" : "1.36", + "Sysadm::Install" : "0.43", + "Test::MockObject" : "1.09", + "YAML" : "0.71" + } + } + }, + "release_status" : "stable", + "resources" : { + "repository" : { + "url" : "http://github.com/mschilli/net-google-drive-simple.git" + } + }, + "version" : "0.12" +} diff --git a/src/external/Net-Google-Drive-Simple-0.12/META.yml b/src/external/Net-Google-Drive-Simple-0.12/META.yml new file mode 100644 index 0000000..c881a10 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/META.yml @@ -0,0 +1,34 @@ +--- +abstract: 'Simple modification of Google Drive data' +author: + - 'Mike Schilli ' +build_requires: + ExtUtils::MakeMaker: '0' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.142690' +license: unknown +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: Net-Google-Drive-Simple +no_index: + directory: + - t + - inc +requires: + File::MMagic: '1.29' + JSON: '2.53' + LWP::Protocol::https: '6.04' + LWP::UserAgent: '6.02' + Log::Log4perl: '1' + Mojolicious: '4.13' + OAuth::Cmdline: '0.04' + Pod::Usage: '1.36' + Sysadm::Install: '0.43' + Test::MockObject: '1.09' + YAML: '0.71' +resources: + repository: http://github.com/mschilli/net-google-drive-simple.git +version: '0.12' diff --git a/src/external/Net-Google-Drive-Simple-0.12/MYMETA.json b/src/external/Net-Google-Drive-Simple-0.12/MYMETA.json new file mode 100644 index 0000000..291699d --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/MYMETA.json @@ -0,0 +1,56 @@ +{ + "abstract" : "Simple modification of Google Drive data", + "author" : [ + "Mike Schilli " + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.142690", + "license" : [ + "unknown" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Net-Google-Drive-Simple", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "File::MMagic" : "1.29", + "JSON" : "2.53", + "LWP::Protocol::https" : "6.04", + "LWP::UserAgent" : "6.02", + "Log::Log4perl" : "1", + "Mojolicious" : "4.13", + "OAuth::Cmdline" : "0.04", + "Pod::Usage" : "1.36", + "Sysadm::Install" : "0.43", + "Test::MockObject" : "1.09", + "YAML" : "0.71" + } + } + }, + "release_status" : "stable", + "resources" : { + "repository" : { + "url" : "http://github.com/mschilli/net-google-drive-simple.git" + } + }, + "version" : "0.12" +} diff --git a/src/external/Net-Google-Drive-Simple-0.12/MYMETA.yml b/src/external/Net-Google-Drive-Simple-0.12/MYMETA.yml new file mode 100644 index 0000000..7c9a061 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/MYMETA.yml @@ -0,0 +1,34 @@ +--- +abstract: 'Simple modification of Google Drive data' +author: + - 'Mike Schilli ' +build_requires: + ExtUtils::MakeMaker: '0' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.142690' +license: unknown +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: Net-Google-Drive-Simple +no_index: + directory: + - t + - inc +requires: + File::MMagic: '1.29' + JSON: '2.53' + LWP::Protocol::https: '6.04' + LWP::UserAgent: '6.02' + Log::Log4perl: '1' + Mojolicious: '4.13' + OAuth::Cmdline: '0.04' + Pod::Usage: '1.36' + Sysadm::Install: '0.43' + Test::MockObject: '1.09' + YAML: '0.71' +resources: + repository: http://github.com/mschilli/net-google-drive-simple.git +version: '0.12' diff --git a/src/external/Net-Google-Drive-Simple-0.12/Makefile.PL b/src/external/Net-Google-Drive-Simple-0.12/Makefile.PL new file mode 100644 index 0000000..da5c50c --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/Makefile.PL @@ -0,0 +1,37 @@ +###################################################################### +# Makefile.PL for Net::Google::Drive::Simple +# 2012, Mike Schilli +###################################################################### +use ExtUtils::MakeMaker; + +my $meta_merge = { + META_MERGE => { + resources => { + repository => + 'http://github.com/mschilli/net-google-drive-simple.git', + }, + } +}; + +WriteMakefile( + 'NAME' => 'Net::Google::Drive::Simple', + 'VERSION_FROM' => 'lib/Net/Google/Drive/Simple.pm', # finds $VERSION + 'PREREQ_PM' => { + 'LWP::UserAgent' => 6.02, + 'LWP::Protocol::https' => 6.04, + 'Sysadm::Install' => 0.43, + 'YAML' => 0.71, + 'JSON' => 2.53, + 'Test::MockObject' => 1.09, + 'Log::Log4perl' => 1, + 'Mojolicious' => 4.13, + 'Pod::Usage' => 1.36, + 'File::MMagic' => 1.29, + 'OAuth::Cmdline' => 0.04, + }, # e.g., Module::Name => 1.1 + EXE_FILES => ["eg/google-drive-init"], + $ExtUtils::MakeMaker::VERSION >= 6.50 ? (%$meta_merge) : (), + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Net/Google/Drive/Simple.pm', + AUTHOR => 'Mike Schilli ') : ()), +); diff --git a/src/external/Net-Google-Drive-Simple-0.12/README b/src/external/Net-Google-Drive-Simple-0.12/README new file mode 100644 index 0000000..fae306f --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/README @@ -0,0 +1,229 @@ +###################################################################### + Net::Google::Drive::Simple 0.12 +###################################################################### + +NAME + Net::Google::Drive::Simple - Simple modification of Google Drive data + +SYNOPSIS + use Net::Google::Drive::Simple; + + # requires a ~/.google-drive.yml file with an access token, + # see description below. + my $gd = Net::Google::Drive::Simple->new(); + + my $children = $gd->children( "/folder/path" ); + + for my $child ( @$children ) { + + next if $child->kind() ne 'drive#file'; + + next if !$child->can( "downloadUrl" ); + + print $child->originalFilename(), + " can be downloaded at ", + $child->downloadUrl(), + "\n"; + } + +DESCRIPTION + Net::Google::Drive::Simple authenticates with a user's Google Drive and + offers several convenience methods to list, retrieve, and modify the + data stored in the 'cloud'. See "eg/google-drive-upsync" as an example + on how to keep a local directory in sync with a remote directory on + Google Drive. + + GETTING STARTED + To get the access token required to access your Google Drive data via + this module, you need to run the script "eg/google-drive-init" in this + distribution. + + Before you run it, you need to register your 'app' with Google Drive and + obtain a client_id and a client_secret from Google: + + https://developers.google.com/drive/web/enable-sdk + + Click on "Enable the Drive API and SDK", and find "Create an API project + in the Google APIs Console". On the API console, create a new project, + click "Services", and enable "Drive API" (leave "drive SDK" off). Then, + under "API Access" in the navigation bar, create a client ID, and make + sure to register a an "installed application" (not a "web application"). + "Redirect URIs" should contain "http://localhost". This will get you a + "Client ID" and a "Client Secret". + + Then, replace the following lines in "eg/google-drive-init" with the + values received: + + # You need to obtain a client_id and a client_secret from + # https://developers.google.com/drive to use this. + my $client_id = "XXX"; + my $client_secret = "YYY"; + + Then run the script. It'll start a web server on port 8082 on your local + machine. When you point your browser at http://localhost:8082, you'll + see a link that will lead you to Google Drive's login page, where you + authenticate and then allow the app (specified by client_id and + client_secret above) access to your Google Drive data. The script will + then receive an access token from Google Drive and store it in + ~/.google-drive.yml from where other scripts can pick it up and work on + the data stored on the user's Google Drive account. Make sure to limit + access to ~/.google-drive.yml, because it contains the access token that + allows everyone to manipulate your Google Drive data. It also contains a + refresh token that this library uses to get a new access token + transparently when the old one is about to expire. + +METHODS + "new()" + Constructor, creates a helper object to retrieve Google Drive data + later. + + "my $children = $gd->children( "/path/to" )" + Return the entries under a given path on the Google Drive as a + reference to an array. Each entry is an object composed of the JSON + data returned by the Google Drive API. Each object offers methods + named like the fields in the JSON data, e.g. "originalFilename()", + "downloadUrl", etc. + + Will return all entries found unless "maxResults" is set: + + my $children = $gd->children( "/path/to", { maxResults => 3 } ) + + Due to the somewhat capricious ways Google Drive handles its + directory structures, the method needs to traverse the path + component by component and determine the ID of each directory to get + to the next level. To speed up subsequent lookups, it also returns + the ID of the last component to the caller: + + my( $children, $parent ) = $gd->children( "/path/to" ); + + If the caller now wants to e.g. insert a file into the directory, + its ID is available in $parent. + + Each child comes back as a files#resource type and gets mapped into + an object that offers access to the various fields via methods: + + for my $child ( @$children ) { + print $child->kind(), " ", $child->title(), "\n"; + } + + Please refer to + + https://developers.google.com/drive/v2/reference/files#resource + + for details on which fields are available. + + "my $files = $gd->files( )" + Return all files on the drive as a reference to an array. Will + return all entries found unless "maxResults" is set: + + my $files = $gd->files( { maxResults => 3 } ) + + Note that Google limits the number of entries returned by default to + 100, and seems to restrict the maximum number of files returned by a + single query to 3,500, even if you specify higher values for + "maxResults". + + Each file comes back as an object that offers access to the Google + Drive item's fields, according to the API (see "children()"). + + "my $id = $gd->folder_create( "folder-name", $parent_id )" + Create a new folder as a child of the folder with the id $parent_id. + Returns the ID of the new folder or undef in case of an error. + + "$gd->file_upload( $file, $dir_id )" + Uploads the content of the file $file into the directory with the ID + $dir_id on Google Drive. Uses $file as the file name. + + To overwrite an existing file on Google Drive, specify the file's ID + as an optional parameter: + + $gd->file_upload( $file, $dir_id, $file_id ); + + "$gd->download( $item, [$local_filename] )" + Downloads an item found via "files()" or "children()". Also accepts + the downloadUrl of an item. If $local_filename is not specified, + "download()" will return the data downloaded (this might be + undesirable for large files). If $local_filename is specified, + "download()" will store the downloaded data under the given file + name. + + my $gd = Net::Google::Drive::Simple->new(); + my $files = $gd->files( { maxResults => 20 }, { page => 0 } ); + for my $file ( @$files ) { + my $name = $file->originalFilename(); + print "Downloading $name\n"; + $gd->download( $file, $name ) or die "failed: $!"; + } + + Be aware that only documents like PDF or png can be downloaded + directly. Google Drive Documents like spreadsheets or (text) + documents need to be exported into one of the available formats. + Check for "exportLinks" on a file given. In case of a document that + can be exported you will receive a hash in the form: + + { + 'format_1' => 'download_link_1', + 'format_2' => 'download_link_2', + ... + } + + Choose your download link and use it as an argument to the + download() function which can also take urls directly. + + my $gd = Net::Google::Drive::Simple->new(); + my $children = $gd->children( '/path/to/folder/on/google/drive' ); + for my $child ( @$children ) { + if ($child->can( 'exportLinks' )){ + my $type_chosen; + foreach my $type (keys %{$child->exportLinks()}){ + # Take any type you can get.. + $type_chosen = $type; + # ..but choose your preferred format, opendocument here: + last if $type =~/oasis\.opendocument/; + } + my $url = $child->exportLinks()->{$type_chosen}; + + $gd->download($url, 'my/local/file'); + + } + } + + "my $files = $gd->search( )" + my $children= $gd->search({ maxResults => 20 },{ page => 0 }, + "title contains 'Futurama'"); + + Search files for attributes. See + for a + definition of the attributes. + + To list all available files, those on the drive, those directly + shared with the user, and those generally available to the user, use + an empty search: + + my $children= $gd->search({},{ page => 0 },""); + + "$gd->file_delete( file_id )" + Delete the file with the specified ID from Google Drive. + +Error handling + In case of an error while retrieving information from the Google Drive + API, the methods above will return "undef" and a more detailed error + message can be obtained by calling the "error()" method: + + print "An error occurred: ", $gd->error(); + +LOGGING/DEBUGGING + Net::Google::Drive::Simple is Log4perl-enabled. To find out what's going + on under the hood, turn on Log4perl: + + use Log::Log4perl qw(:easy); + Log::Log4perl->easy_init($DEBUG); + +LEGALESE + Copyright 2012 by Mike Schilli, all rights reserved. This program is + free software, you can redistribute it and/or modify it under the same + terms as Perl itself. + +AUTHOR + 2012, Mike Schilli + diff --git a/src/external/Net-Google-Drive-Simple-0.12/eg/file-download b/src/external/Net-Google-Drive-Simple-0.12/eg/file-download new file mode 100755 index 0000000..a1aefe9 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/eg/file-download @@ -0,0 +1,25 @@ +#!/usr/local/bin/perl -w +use strict; +use Net::Google::Drive::Simple; +use Log::Log4perl qw(:easy); +use Sysadm::Install qw( ask ); +use File::Basename; +use Getopt::Std; +use Pod::Usage; +use Data::Dumper; +use Log::Log4perl qw(:easy); +Log::Log4perl->easy_init($DEBUG); + +my $gd = Net::Google::Drive::Simple->new(); + +my $files = $gd->files( { maxResults => 20 }, { page => 0 } ); + +for my $file ( @$files ) { + my $name = $file->originalFilename(); + my $q = "$name ([y]/n)"; + my $yes = ask $q, "y"; + next if $yes ne "y"; + print "Downloading $name\n"; + my $c = $gd->download( $file, $name ); + print "Stored as $name\n"; +} diff --git a/src/external/Net-Google-Drive-Simple-0.12/eg/file-upload b/src/external/Net-Google-Drive-Simple-0.12/eg/file-upload new file mode 100755 index 0000000..9b02ade --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/eg/file-upload @@ -0,0 +1,65 @@ +#!/usr/local/bin/perl -w +use strict; +use FindBin qw( $Bin ); +use lib "$Bin/../lib"; + +use Net::Google::Drive::Simple; +use Log::Log4perl qw(:easy); +use Pod::Usage; + +my( $file, $dir ) = @ARGV; + +if( ! $file ) { + pod2usage( "No file given for upload." ); +} + +if( ! -f $file ) { + pod2usage( "No such file: $file" ); +} + +if( ! defined $dir ) { + pod2usage( "No Google Drive destination directory given." ); +} + +Log::Log4perl->easy_init( $DEBUG ); + +my $gd = Net::Google::Drive::Simple->new(); + +my( $children, $parent ) = + $gd->children( $dir, { maxResults => 1 } ); + +if( !$parent ) { + LOGDIE "Can't find remote directory $dir. Does it exist?"; +} + +$gd->file_upload( $file, $parent ) or + LOGDIE "Upload failed: $!"; + +__END__ + +=head1 NAME + + file-upload - Upload a file into a Google Drive directory + +=head1 SYNOPSIS + + file-upload local-file google-drive-dir + +=head1 DESCRIPTION + +C uploads a given files to Google Drive, and puts +it into the destination directory specified. + +=head1 EXAMPLES + + $ google-drive-upsync my.txt /stuff/txtfiles + +=head1 LEGALESE + +Copyright 2014 by Mike Schilli, all rights reserved. +This program is free software, you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +2014, Mike Schilli diff --git a/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-init b/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-init new file mode 100755 index 0000000..0b96512 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-init @@ -0,0 +1,25 @@ +#!/usr/local/bin/perl -w +########################################### +# google-drive-init +# Mike Schilli, 2014 (m@perlmeister.com) +########################################### +use strict; +use lib 'lib'; + +use OAuth::Cmdline::GoogleDrive; +use OAuth::Cmdline::Mojo; + +my $oauth = OAuth::Cmdline::GoogleDrive->new( + client_id => "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", + client_secret => "YYYYYYYYYYYYYYYYYYYYYYYY", + login_uri => "https://accounts.google.com/o/oauth2/auth", + token_uri => "https://accounts.google.com/o/oauth2/token", + scope => "https://www.googleapis.com/auth/drive", + access_type => "offline", +); + +my $app = OAuth::Cmdline::Mojo->new( + oauth => $oauth, +); + +$app->start( 'daemon', '-l', $oauth->local_uri ); diff --git a/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-upsync b/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-upsync new file mode 100755 index 0000000..ed67c2a --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/eg/google-drive-upsync @@ -0,0 +1,158 @@ +#!/usr/local/bin/perl -w +use strict; +use lib 'lib'; + +use Sysadm::Install qw(:all); +use Net::Google::Drive::Simple; +use Log::Log4perl qw(:easy); +use File::Basename; +use Getopt::Std; +use Pod::Usage; + +getopts("hvn", \my %opts); +pod2usage() if $opts{h}; + +my( $local_dir, $gd_dir ) = @ARGV; + +if( ! defined $gd_dir ) { + pod2usage( "No Google Drive dir given" ); +} + +if( ! -d $local_dir ) { + pod2usage( "$local_dir not a directory" ); +} + +my $log_level = $INFO; +$log_level = $DEBUG if $opts{ v }; + +Log::Log4perl->easy_init( { + level => $log_level, + layout => "%d %F{1}:%L> %m%n" +} ); + +my %files_local = (); + +opendir DH, $local_dir; + +for my $file ( readdir DH ) { + + my $path = "$local_dir/$file"; + + if( ! -f $path) { + next; + } + + $files_local{ $file } = -s $path; +} + +closedir DH; + +INFO "Found ", scalar keys %files_local, " local files"; + +my $gd = Net::Google::Drive::Simple->new(); + +INFO "Listing $gd_dir on Google Drive"; + +my( $entries_gd, $parent ) = $gd->children( $gd_dir ); + +if( !defined $entries_gd ) { + pod2usage( "$gd_dir can't be listed - does it exist?" ); +} + +INFO "Found ", scalar @$entries_gd, " files on Google Drive"; + +my %needs_update = (); + +for my $entry ( @$entries_gd ) { + if( $entry->kind() ne 'drive#file' ) { + DEBUG "Ignoring ", $entry->title(); + next; + } + + my $title = $entry->title(); + + my $labels = $entry->labels(); + + if( $labels->{ trashed } ) { + INFO "Ignoring trashed file $title", + next; + } + + if( exists $files_local{ $title } ) { + if( $entry->fileSize() == $files_local{ $title } ) { + DEBUG "$title synched OK"; + delete $files_local{ $title }; + next; + } + ERROR "$title: different file size (local:$files_local{ $title } ", + "gdrive: ", $entry->fileSize(); + $needs_update{ $title } = $entry; + } else { + INFO "Remote only: $title"; + } +} + +for my $file ( sort keys %files_local ) { + + my $file_id; + + if( exists $needs_update{ $file } ) { + INFO "Needs update: $file"; + $file_id = $needs_update{ $file }->id(); + } else { + INFO "Local only: $file"; + } + + next if $opts{ n }; + + INFO "Uploading file $file"; + $gd->file_upload( "$local_dir/$file", $parent, $file_id ); +} + +__END__ + +=head1 NAME + + google-drive-upsync - Sync a local dir with a Google Drive dir + +=head1 SYNOPSIS + + google-drive-upsync local-dir /gdrive-dir + +=head1 OPTIONS + +=over 8 + +=item B<-v> + +Be verbose. + +=item B<-n> + +Dryrun, don't upload any files, just report what needs to be done. + +=back + +=head1 DESCRIPTION + +C uploads the files in a local directory to a +directory on Google Drive. If a file on Google Drive already exists +with a different byte count, it will overwrite the remote file with +the local counterpart. + +Files in the Google Drive dir that have no counterpart in the local dir +are left alone. + +=head1 EXAMPLES + + $ google-drive-upsync ~/books /books + +=head1 LEGALESE + +Copyright 2012 by Mike Schilli, all rights reserved. +This program is free software, you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +2012, Mike Schilli diff --git a/src/external/Net-Google-Drive-Simple-0.12/lib/Net/Google/Drive/Simple.pm b/src/external/Net-Google-Drive-Simple-0.12/lib/Net/Google/Drive/Simple.pm new file mode 100644 index 0000000..408dc89 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/lib/Net/Google/Drive/Simple.pm @@ -0,0 +1,851 @@ +########################################### +package Net::Google::Drive::Simple; +########################################### +use strict; +use warnings; + +use LWP::UserAgent; +use HTTP::Request; +use HTTP::Headers; +use HTTP::Request::Common; +use Sysadm::Install qw( slurp ); +use File::Basename; +use YAML qw( LoadFile DumpFile ); +use JSON qw( from_json to_json ); +use Test::MockObject; +use Log::Log4perl qw(:easy); +use Data::Dumper; +use File::MMagic; +use OAuth::Cmdline::GoogleDrive; + +our $VERSION = "0.12"; + +########################################### +sub new { +########################################### + my($class, %options) = @_; + + my $self = { + init_done => undef, + api_file_url => "https://www.googleapis.com/drive/v2/files", + api_upload_url => "https://www.googleapis.com/upload/drive/v2/files", + oauth => OAuth::Cmdline::GoogleDrive->new( ), + error => undef, + %options, + }; + + bless $self, $class; +} + +########################################### +sub error { +########################################### + my( $self, $set ) = @_; + + if( defined $set ) { + $self->{ error } = $set; + } + + return $self->{ error }; +} + +########################################### +sub init { +########################################### + my( $self, $path ) = @_; + + if( $self->{ init_done } ) { + return 1; + } + + DEBUG "Testing API"; + if( !$self->api_test() ) { + LOGDIE "api_test failed"; + } + + $self->{ init_done } = 1; + + return 1; +} + +########################################### +sub api_test { +########################################### + my( $self ) = @_; + + my $url = $self->file_url( { maxResults => 1 } ); + + my $ua = LWP::UserAgent->new(); + + my $req = HTTP::Request->new( + GET => $url->as_string, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + DEBUG "Fetching $url"; + + my $resp = $ua->request( $req ); + + if( $resp->is_success() ) { + DEBUG "API tested OK"; + return 1; + } + + $self->error( $resp->message() ); + + ERROR "API error: ", $resp->message(); + return 0; +} + +########################################### +sub file_url { +########################################### + my( $self, $opts ) = @_; + + $opts = {} if !defined $opts; + + my $default_opts = { + maxResults => 3000, + }; + + $opts = { + %$default_opts, + %$opts, + }; + + my $url = URI->new( $self->{ api_file_url } ); + $url->query_form( $opts ); + + return $url; +} + +########################################### +sub files { +########################################### + my( $self, $opts, $search_opts ) = @_; + + if( !defined $search_opts ) { + $search_opts = {}; + } + $search_opts = { + page => 1, + %$search_opts, + }; + + if( !defined $opts ) { + $opts = {}; + } + + $self->init(); + + my @docs = (); + + while( 1 ) { + my $url = $self->file_url( $opts ); + my $data = $self->http_json( $url ); + + if( !defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + if( $item->{ kind } eq "drive#file" ) { + my $file = $item->{ originalFilename }; + if( !defined $file ) { + DEBUG "Skipping $item->{ title } (no originalFilename)"; + next; + } + + push @docs, $self->data_factory( $item ); + } else { + DEBUG "Skipping $item->{ title } ($item->{ kind })"; + } + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@docs; +} + +########################################### +sub folder_create { +########################################### + my( $self, $title, $parent ) = @_; + + my $url = URI->new( $self->{ api_file_url } ); + + my $data = $self->http_json( $url, { + title => $title, + parents => [ { id => $parent } ], + mimeType => "application/vnd.google-apps.folder", + } ); + + if( ! defined $data ) { + return undef; + } + + return $data->{ id }; +} + +########################################### +sub file_upload { +########################################### + my( $self, $file, $parent_id, $file_id ) = @_; + + # Since a file upload can take a long time, refresh the token + # just in case. + $self->{ oauth }->token_expire(); + + my $title = basename $file; + + # First, insert the file placeholder, according to + # http://stackoverflow.com/questions/10317638 + my $file_data = slurp $file; + my $mime_type = $self->file_mime_type( $file ); + + my $url; + + if( ! defined $file_id ) { + $url = URI->new( $self->{ api_file_url } ); + + my $data = $self->http_json( $url, + { mimeType => $mime_type, + parents => [ { id => $parent_id } ], + title => $title, + } + ); + + if( ! defined $data ) { + return undef; + } + + $file_id = $data->{ id }; + } + + $url = URI->new( $self->{ api_upload_url } . "/$file_id" ); + $url->query_form( uploadType => "media" ); + + my $req = &HTTP::Request::Common::PUT( + $url->as_string, + $self->{ oauth }->authorization_headers(), + "Content-Type" => $mime_type, + Content => $file_data, + ); + + my $resp = $self->http_loop( $req ); + + if( $resp->is_error() ) { + $self->error( $self->message() ); + return undef; + } + + DEBUG $resp->as_string; + + return $file_id; +} + +########################################### +sub file_delete { +########################################### + my( $self, $file_id ) = @_; + + my $url; + + LOGDIE 'Deletion requires file_id' if( ! defined $file_id ); + + $url = URI->new( $self->{ api_file_url } . "/$file_id" ); + + my $req = &HTTP::Request::Common::DELETE( + $url->as_string, + $self->{ oauth }->authorization_headers(), + ); + + my $resp = $self->http_loop( $req ); + + DEBUG $resp->as_string; + + if( $resp->is_error ) { + $self->error( $resp->message() ); + return undef; + } + + return $file_id; +} + +########################################### +sub children_by_folder_id { +########################################### + my( $self, $folder_id, $opts, $search_opts ) = @_; + + $self->init(); + + if( !defined $search_opts ) { + $search_opts = {}; + } + + $search_opts = { + page => 1, + %$search_opts, + }; + + if( !defined $opts ) { + $opts = { + maxResults => 100, + }; + } + + my $url = URI->new( $self->{ api_file_url } ); + $opts->{ q } = "'$folder_id' in parents"; + + if( $search_opts->{ title } ) { + $opts->{ q } .= " AND title = '$search_opts->{ title }'"; + } + + my @children = (); + + while( 1 ) { + $url->query_form( $opts ); + + my $data = $self->http_json( $url ); + + if( ! defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + push @children, $self->data_factory( $item ); + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@children; +} + +########################################### +sub children { +########################################### + my( $self, $path, $opts, $search_opts ) = @_; + + DEBUG "Determine children of $path"; + + if( !defined $path ) { + LOGDIE "No $path given"; + } + + if( !defined $search_opts ) { + $search_opts = {}; + } + + my @parts = split '/', $path; + my $parent = $parts[0] = "root"; + DEBUG "Parent: $parent"; + + my $folder_id = shift @parts; + + PART: for my $part ( @parts ) { + + DEBUG "Looking up part $part (folder_id=$folder_id)"; + + my $children = $self->children_by_folder_id( $folder_id, + { maxResults => 100, # path resolution maxResults is different + }, + { %$search_opts, title => $part }, + ); + + if( ! defined $children ) { + return undef; + } + + for my $child ( @$children ) { + DEBUG "Found child ", $child->title(); + if( $child->title() eq $part ) { + $folder_id = $child->id(); + $parent = $folder_id; + DEBUG "Parent: $parent"; + next PART; + } + } + + my $msg = "Child $part not found"; + $self->error( $msg ); + ERROR $msg; + return undef; + } + + DEBUG "Getting content of folder $folder_id"; + + my $children = $self->children_by_folder_id( $folder_id, $opts, + $search_opts ); + + if( ! defined $children ) { + return undef; + } + + if( wantarray ) { + return( $children, $parent ); + } else { + return $children; + } +} + +########################################### +sub search { +########################################### + my( $self, $opts, $search_opts, $query ) = @_; + $search_opts||= { page => 1 }; + + $self->init(); + + if( !defined $opts ) { + $opts = { + maxResults => 100, + }; + } + + my $url = URI->new( $self->{ api_file_url } ); + + $opts->{ q }= $query; + + my @children = (); + + while( 1 ) { + $url->query_form( $opts ); + + my $data = $self->http_json( $url ); + if( ! defined $data ) { + return undef; + } + + my $next_item = $self->item_iterator( $data ); + + while( my $item = $next_item->() ) { + push @children, $self->data_factory( $item ); + } + + if( $search_opts->{ page } and $data->{ nextPageToken } ) { + $opts->{ pageToken } = $data->{ nextPageToken }; + } else { + last; + } + } + + return \@children; +} + +########################################### +sub data_factory { +########################################### + my( $self, $data ) = @_; + + my $mock = Test::MockObject->new(); + + for my $key ( keys %$data ) { + # DEBUG "Adding method $key"; + $mock->mock( $key , sub { $data->{ $key } } ); + } + + return $mock; +} + +########################################### +sub download { +########################################### + my( $self, $url, $local_file ) = @_; + + if( ref $url ) { + $url = $url->downloadUrl(); + } + + my $req = HTTP::Request->new( + GET => $url, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + + my $ua = LWP::UserAgent->new(); + my $resp = $ua->request( $req, $local_file ); + + if( $resp->is_error() ) { + my $msg = "Can't download $url (" . $resp->message() . ")"; + ERROR $msg; + $self->error( $msg ); + return undef; + } + + if( $local_file ) { + return 1; + } + + return $resp->content(); +} + + +########################################### +sub http_loop { +########################################### + my( $self, $req, $noinit ) = @_; + + my $ua = LWP::UserAgent->new(); + my $resp; + + my $RETRIES = 3; + my $SLEEP_INTERVAL = 10; + + { + # refresh token if necessary + if( ! $noinit ) { + $self->init(); + } + + DEBUG "Fetching ", $req->url->as_string(); + + $resp = $ua->request( $req ); + + if( ! $resp->is_success() ) { + $self->error( $resp->message() ); + warn "Failed with ", $resp->code(), ": ", $resp->message(); + if( --$RETRIES >= 0 ) { + ERROR "Retrying in $SLEEP_INTERVAL seconds"; + sleep $SLEEP_INTERVAL; + redo; + } else { + ERROR "Out of retries."; + return $resp; + } + } + + DEBUG "Successfully fetched ", length( $resp->content() ), " bytes."; + } + + return $resp; +} + +########################################### +sub http_json { +########################################### + my( $self, $url, $post_data ) = @_; + + my $req; + + if( $post_data ) { + $req = &HTTP::Request::Common::POST( + $url->as_string, + $self->{ oauth }->authorization_headers(), + "Content-Type"=> "application/json", + Content => to_json( $post_data ), + ); + } else { + $req = HTTP::Request->new( + GET => $url->as_string, + ); + $req->header( $self->{ oauth }->authorization_headers() ); + } + + my $resp = $self->http_loop( $req ); + + if( $resp->is_error() ) { + $self->error( $resp->message() ); + return undef; + } + + my $data = from_json( $resp->content() ); + + return $data; +} + +########################################### +sub file_mime_type { +########################################### + my( $self, $file ) = @_; + + # There don't seem to be great implementations of mimetype + # detection on CPAN, so just use this one for now. + + if( !$self->{ magic } ) { + $self->{ magic } = File::MMagic->new(); + } + + return $self->{ magic }->checktype_filename( $file ); +} + +########################################### +sub item_iterator { +########################################### + my( $self, $data ) = @_; + + my $idx = 0; + + if( !defined $data ) { + die "no data in item_iterator"; + } + + return sub { + { + my $next_item = $data->{ items }->[ $idx++ ]; + + return if !defined $next_item; + + if( $next_item->{ labels }->{ trashed } ) { + DEBUG "Skipping $next_item->{ title } (trashed)"; + redo; + } + + return $next_item; + } + }; +} + +1; + +__END__ + +=head1 NAME + +Net::Google::Drive::Simple - Simple modification of Google Drive data + +=head1 SYNOPSIS + + use Net::Google::Drive::Simple; + + # requires a ~/.google-drive.yml file with an access token, + # see description below. + my $gd = Net::Google::Drive::Simple->new(); + + my $children = $gd->children( "/folder/path" ); + + for my $child ( @$children ) { + + next if $child->kind() ne 'drive#file'; + + next if !$child->can( "downloadUrl" ); + + print $child->originalFilename(), + " can be downloaded at ", + $child->downloadUrl(), + "\n"; + } + +=head1 DESCRIPTION + +Net::Google::Drive::Simple authenticates with a user's Google Drive and +offers several convenience methods to list, retrieve, and modify the data +stored in the 'cloud'. See C as an example on how +to keep a local directory in sync with a remote directory on Google Drive. + +=head2 GETTING STARTED + +To get the access token required to access your Google Drive data via +this module, you need to run the script C in this +distribution. + +Before you run it, you need to register your 'app' with Google Drive +and obtain a client_id and a client_secret from Google: + + https://developers.google.com/drive/web/enable-sdk + +Click on "Enable the Drive API and SDK", and find "Create an API project in +the Google APIs Console". On the API console, create a new project, click +"Services", and enable "Drive API" (leave "drive SDK" off). Then, under +"API Access" in the navigation bar, create a client ID, and make sure to +register a an "installed application" (not a "web application"). "Redirect +URIs" should contain "http://localhost". This will get you a "Client ID" +and a "Client Secret". + +Then, replace the following lines in C with the +values received: + + # You need to obtain a client_id and a client_secret from + # https://developers.google.com/drive to use this. + my $client_id = "XXX"; + my $client_secret = "YYY"; + +Then run the script. It'll start a web server on port 8082 on your local +machine. When you point your browser at http://localhost:8082, you'll see a +link that will lead you to Google Drive's login page, where you authenticate +and then allow the app (specified by client_id and client_secret above) access +to your Google Drive data. The script will then receive an access token from +Google Drive and store it in ~/.google-drive.yml from where other scripts can +pick it up and work on the data stored on the user's Google Drive account. Make +sure to limit access to ~/.google-drive.yml, because it contains the access +token that allows everyone to manipulate your Google Drive data. It also +contains a refresh token that this library uses to get a new access token +transparently when the old one is about to expire. + +=head1 METHODS + +=over 4 + +=item C + +Constructor, creates a helper object to retrieve Google Drive data +later. + +=item Cchildren( "/path/to" )> + +Return the entries under a given path on the Google Drive as a reference +to an array. Each entry +is an object composed of the JSON data returned by the Google Drive API. +Each object offers methods named like the fields in the JSON data, e.g. +C, C, etc. + +Will return all entries found unless C is set: + + my $children = $gd->children( "/path/to", { maxResults => 3 } ) + +Due to the somewhat capricious ways Google Drive handles its directory +structures, the method needs to traverse the path component by component +and determine the ID of each directory to get to the next level. To speed +up subsequent lookups, it also returns the ID of the last component to the +caller: + + my( $children, $parent ) = $gd->children( "/path/to" ); + +If the caller now wants to e.g. insert a file into the directory, its +ID is available in $parent. + +Each child comes back as a files#resource type and gets mapped into +an object that offers access to the various fields via methods: + + for my $child ( @$children ) { + print $child->kind(), " ", $child->title(), "\n"; + } + +Please refer to + + https://developers.google.com/drive/v2/reference/files#resource + +for details on which fields are available. + +=item Cfiles( )> + +Return all files on the drive as a reference to an array. +Will return all entries found unless C is set: + + my $files = $gd->files( { maxResults => 3 } ) + +Note that Google limits the number of entries returned by default to +100, and seems to restrict the maximum number of files returned +by a single query to 3,500, even if you specify higher values for +C. + +Each file comes back as an object that offers access to the Google +Drive item's fields, according to the API (see C). + +=item Cfolder_create( "folder-name", $parent_id )> + +Create a new folder as a child of the folder with the id C<$parent_id>. +Returns the ID of the new folder or undef in case of an error. + +=item C<$gd-Efile_upload( $file, $dir_id )> + +Uploads the content of the file C<$file> into the directory with the ID +$dir_id on Google Drive. Uses C<$file> as the file name. + +To overwrite an existing file on Google Drive, specify the file's ID as +an optional parameter: + + $gd->file_upload( $file, $dir_id, $file_id ); + +=item C<$gd-Edownload( $item, [$local_filename] )> + +Downloads an item found via C or C. Also accepts +the downloadUrl of an item. If C<$local_filename> is not specified, +C will return the data downloaded (this might be undesirable +for large files). If C<$local_filename> is specified, C will +store the downloaded data under the given file name. + + my $gd = Net::Google::Drive::Simple->new(); + my $files = $gd->files( { maxResults => 20 }, { page => 0 } ); + for my $file ( @$files ) { + my $name = $file->originalFilename(); + print "Downloading $name\n"; + $gd->download( $file, $name ) or die "failed: $!"; + } + +Be aware that only documents like PDF or png can be downloaded directly. Google Drive Documents like spreadsheets or (text) documents need to be exported into one of the available formats. +Check for "exportLinks" on a file given. In case of a document that can be exported you will receive a hash in the form: + + { + 'format_1' => 'download_link_1', + 'format_2' => 'download_link_2', + ... + } + +Choose your download link and use it as an argument to the download() function which can also take urls directly. + + my $gd = Net::Google::Drive::Simple->new(); + my $children = $gd->children( '/path/to/folder/on/google/drive' ); + for my $child ( @$children ) { + if ($child->can( 'exportLinks' )){ + my $type_chosen; + foreach my $type (keys %{$child->exportLinks()}){ + # Take any type you can get.. + $type_chosen = $type; + # ..but choose your preferred format, opendocument here: + last if $type =~/oasis\.opendocument/; + } + my $url = $child->exportLinks()->{$type_chosen}; + + $gd->download($url, 'my/local/file'); + + } + } + +=item Csearch( )> + + my $children= $gd->search({ maxResults => 20 },{ page => 0 }, + "title contains 'Futurama'"); + +Search files for attributes. See +L +for a definition of the attributes. + +To list all available files, those on the drive, those directly shared +with the user, and those generally available to the user, use an +empty search: + + my $children= $gd->search({},{ page => 0 },""); + +=item C<$gd-Efile_delete( file_id )> + +Delete the file with the specified ID from Google Drive. + +=back + +=head1 Error handling + +In case of an error while retrieving information from the Google Drive +API, the methods above will return C and a more detailed error +message can be obtained by calling the C method: + + print "An error occurred: ", $gd->error(); + +=head1 LOGGING/DEBUGGING + +Net::Google::Drive::Simple is Log4perl-enabled. +To find out what's going on under the hood, turn on Log4perl: + + use Log::Log4perl qw(:easy); + Log::Log4perl->easy_init($DEBUG); + +=head1 LEGALESE + +Copyright 2012 by Mike Schilli, all rights reserved. +This program is free software, you can redistribute it and/or +modify it under the same terms as Perl itself. + +=head1 AUTHOR + +2012, Mike Schilli diff --git a/src/external/Net-Google-Drive-Simple-0.12/t/001Basic.t b/src/external/Net-Google-Drive-Simple-0.12/t/001Basic.t new file mode 100644 index 0000000..86ab220 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/t/001Basic.t @@ -0,0 +1,63 @@ +###################################################################### +# Test suite for Net::Google::Drive::Simple +# by Mike Schilli +###################################################################### +use warnings; +use strict; + +use FindBin qw( $Bin ); +use Test::More; + +my $nof_tests = 10; +my $nof_live_tests = 9; +plan tests => $nof_tests; + +use Net::Google::Drive::Simple; +use Log::Log4perl qw(:easy); + +# Log::Log4perl->easy_init( { level => $DEBUG, layout => "%F{1}:%L> %m%n" } ); + +my $gd = Net::Google::Drive::Simple->new(); + +ok 1, "loaded ok"; + +SKIP: { + if( !$ENV{ LIVE_TEST } ) { + skip "LIVE_TEST not set, skipping live tests", $nof_live_tests; + } + + my( $files, $parent ) = $gd->children( "/this-path-does-not-exist", + { maxResults => 3 }, { page => 0 }, + ); + + ok !defined $files, "non-existent path"; + is $gd->error(), + "Child this-path-does-not-exist not found", + "error message"; + + ( $files, $parent ) = $gd->children( "/", + { maxResults => 3 }, { page => 0 }, + ); + + # upload a test file + my $testfile = "$Bin/data/testfile"; + my $file_id = $gd->file_upload( $testfile, $parent ); + ok defined $file_id, "upload ok"; + ok $gd->file_delete( $file_id ), "delete ok"; + + is ref($files), "ARRAY", "children returned ok"; + + $files = $gd->children( "/", + { maxResults => 3 }, { page => 0 }, + ); + + is ref($files), "ARRAY", "scalar context children"; + + $files = $gd->files( { maxResults => 3 }, { page => 0 } ); + is ref($files), "ARRAY", "files found"; + + ( $files ) = $gd->files( { maxResults => 10 }, { page => 0 }, + ); + is ref($files), "ARRAY", "files found"; + ok length $files->[0]->originalFilename(), "org filename"; +} diff --git a/src/external/Net-Google-Drive-Simple-0.12/t/data/testfile b/src/external/Net-Google-Drive-Simple-0.12/t/data/testfile new file mode 100644 index 0000000..b6f4a92 --- /dev/null +++ b/src/external/Net-Google-Drive-Simple-0.12/t/data/testfile @@ -0,0 +1 @@ +I am a testfile. diff --git a/src/external/firebase/lib/Firebase.pm b/src/external/firebase/lib/Firebase.pm index d2ecbe7..393c600 100755 --- a/src/external/firebase/lib/Firebase.pm +++ b/src/external/firebase/lib/Firebase.pm @@ -7,6 +7,7 @@ use HTTP::Request::Common qw(DELETE PUT GET POST); use Ouch; use JSON; use URI; +use Data::Dumper; @@ -84,6 +85,7 @@ sub put { sub patch { my ($self, $path, $params) = @_; + my $uri = $self->create_uri($path); my $request = POST($uri->as_string, Content_Type => 'form-data', Content => to_json($params)); $request->method('PATCH'); # because HTTP::Request::Common treats PUT as GET rather than POST @@ -135,6 +137,7 @@ sub process_response { } } else { + print Dumper($response); ouch 500, $response->status_line, $response->decoded_content; } } diff --git a/src/external/firebase/lib/FirebaseGS.pm b/src/external/firebase/lib/FirebaseGS.pm new file mode 100755 index 0000000..2134be6 --- /dev/null +++ b/src/external/firebase/lib/FirebaseGS.pm @@ -0,0 +1,455 @@ +package FirebaseGS; + +use Moo; +use Firebase::Auth; +use HTTP::Thin; +use HTTP::Request::Common qw(DELETE PUT GET POST); +use Ouch; +use JSON; +use URI; +use Data::Dumper; + + + +has firebase => ( + is => 'ro', + required => 1, +); + +has auth => ( + is => 'ro', + predicate => 'has_auth', +); + +has auth_token => ( + is => 'ro', + predicate => 'has_token', +); + +has authobj => ( + is => 'rw', + lazy => 1, + predicate => 'has_authobj', + default => sub { + Firebase::Auth->new(%{$_[0]->auth}); + }, +); + +has debug => ( + is => 'rw', + default => sub { '' }, +); + +has agent => ( + is => 'ro', + required => 0, + lazy => 1, + default => sub { HTTP::Thin->new() }, +); + + + + + +sub create_uri { + my ($self, $path, $metadata) = @_; + + my $token; + $token = $self->authobj->create_token if $self->has_authobj || $self->has_auth; + $token = $self->auth_token if $self->has_token; + my $add = "/$path" if defined($path); + + + my $url = defined($path) ? 'https://firebasestorage.googleapis.com/v0/b/'.$self->firebase.'.appspot.com/o'. $path : + 'https://firebasestorage.googleapis.com/v0/b/'.$self->firebase.'.appspot.com/o'; + $url .= '?uploadType=resumable'; + $url .= '&auth='. $token; + my $uri = URI->new($url); + + return $uri; +} + + + + + + +sub upload { + my ($self, $from_path, $to_path, $metadata) = @_; + + #print $path . "\n"; + + my $uri = $self->create_uri($to_path); + print $uri . "\n"; + + my $length = -s $from_path; + my $request = POST($uri->as_string, Content_Type => 'application/json; charset=UTF-8', + Content_Length => $length, Content => [ $from_path , $metadata ] ); + #$request->method('POST'); # because HTTP::Request::Common treats PUT as GET rather than POST + + + print Dumper($request) . "\n"; + + return $self->process_request( $request ); +} + + + + + + +sub process_request { + my $self = shift; + $self->process_response($self->agent->request( @_ )); +} + +sub process_response { + my ($self, $response) = @_; + + $self->debug($response->header('X-Firebase-Auth-Debug')); + + if ($response->is_success) { + if ($response->decoded_content eq 'null') { + return undef; + } + else { + my $result = eval { from_json($response->decoded_content) }; + if ($@) { + warn $response->decoded_content; + ouch 500, 'Server returned unparsable content.';#, { error => $@, content => $response->decoded_content }; + } + return $result; + } + } + else { + print Dumper($response); + ouch 500, $response->status_line, $response->decoded_content; + } +} + + + + + + + + + + + + + + + + + + + + + +sub get { + my ($self, $path, $param_hash) = @_; + my $uri = $self->create_uri($path); + + my $req = $uri->as_string; + + if(defined($param_hash)){ + while(my ($key, $value) = each (%$param_hash)){ + $req .= "\&".$key."=$value"; + } + } + #print "$req\n"; + my $request = GET($req); + return $self->process_request($request); +} + +sub delete { + my ($self, $path) = @_; + my $uri = $self->create_uri($path); + return $self->process_request( DELETE $uri ); +} + +sub put { + my ($self, $path, $params) = @_; + + #print $path . "\n"; + + my $uri = $self->create_uri($path); + #print $uri . "\n"; + my $request = POST($uri->as_string, Content_Type => 'form-data', Content => to_json($params)); + + + $request->method('PUT'); # because HTTP::Request::Common treats PUT as GET rather than POST + return $self->process_request( $request ); +} + +sub patch { + my ($self, $path, $params) = @_; + my $uri = $self->create_uri($path); + my $request = POST($uri->as_string, Content_Type => 'form-data', Content => to_json($params)); + $request->method('PATCH'); # because HTTP::Request::Common treats PUT as GET rather than POST + return $self->process_request( $request ); +} + +sub post { + my ($self, $path, $params) = @_; + my $uri = $self->create_uri($path); + my $request = POST($uri->as_string, Content_Type => 'form-data', Content => to_json($params)); + return $self->process_request( $request ); +} + + + +=head1 NAME + +Firebase - An interface to firebase.com. + +=head1 SYNOPSIS + + use Firebase; + + my $fb = Firebase->new(firebase => 'myfirebase', auth => { secret => 'xxxxxxx', data => { uid => 'xxx', username => 'fred' }, admin => \1 } ); + + my $result = $fb->put('foo', { this => 'that' }); + my $result = $fb->get('foo'); # or $fb->get('foo/this'); + my $result = $fb->delete('foo'); + +=head1 DESCRIPTION + +This is a light-weight wrapper around the Firebase REST API. Firebase is a real-time web service that acts as both a queue and a datastore. It's used for building real-time web apps and web services. + +More info at L. + +=head1 METHODS + + +=head2 new + +Constructor + +=over + +=item firebase + +Required. The name of your firebase. + +=item auth + +The parameters you'd pass to create a C object. This is a shortcut for constructing the object yourself and passing it into C. + +=item authobj + +A L object. Will be generated for you automatically if you don't supply one, but do supply C. + +=item agent + +A user agent. An L object will be generated for you automatically if you don't supply one. + +=back + + +=head2 get + +Fetch some data from firebase. + +=over + +=item path + +The path to the info you want to fetch. + +=back + + +=head2 put + +Put some data into a firebase. + +=over + +=item path + +The path where the info should be stored. + +=item params + +A hash reference of parameters to be stored at this location. + +B Firebase doesn't work with arrays, so you can nest scalars and hashes here, but not arrays. + +=back + +=head2 patch + +Partial update of data in a location + +=over + +=item path + +The path where the info should be stored. + +=item params + +A hash reference of parameters to be updated at this location. + +=back + + +=head2 post + +Adds data to an existing location, creating a hash of objects below the path. + +=over + +=item path + +The path where the info should be stored. + +=item params + +A hash reference of parameters to be stored at this location. + +B Firebase doesn't work with arrays, so you can nest scalars and hashes here, but not arrays. + +=back + + +=head2 delete + +Delete some data from a firebase. + +=over + +=item path + +The path where the info is that you want deleted. + +=back + + + +=head2 debug + +If C has been set to a true value in C, this will return the debug message returned with the previous response. + + + + +=head2 create_uri + +Creates a URI to a firebase data segment. You almost certainly want to use C, C or C instead. + +=over + +=item path + +The path to the data. + +=item params + +Any parameters you need to pass for any reason. + +=back + + +=head2 process_request + +Requests data and runs it through C. You almost certainly want to use C, C or C instead. + +=over + +=item request + +An L object. + +=back + +=head2 process_response + +Checks for errors, decodes json, and returns a result. You almost certainly want to use C, C or C instead. + +=over + +=item response + +An L object. + +=back + + +=head1 AUTHOR + +=over + +=item * + +Kiran Kumar, C<< >> + +=item * + +JT Smith, C<< >> + +=back + + + +=head1 SUPPORT + +=over + +=item Source Code Repository + +L + +=item Issue Tracker + +L + +=back + + + +=head1 LICENSE AND COPYRIGHT + +Copyright 2013 Plain Black Corporation + +This program is free software; you can redistribute it and/or modify it +under the terms of the the Artistic License (2.0). You may obtain a +copy of the full license at: + +L + +Any use, modification, and distribution of the Standard or Modified +Versions is governed by this Artistic License. By using, modifying or +distributing the Package, you accept this license. Do not use, modify, +or distribute the Package, if you do not accept this license. + +If your Modified Version has been derived from a Modified Version made +by someone other than you, you are nevertheless required to ensure that +your Modified Version complies with the requirements of this license. + +This license does not grant you the right to use any trademark, service +mark, tradename, or logo of the Copyright Holder. + +This license includes the non-exclusive, worldwide, free-of-charge +patent license to make, have made, use, offer to sell, sell, import and +otherwise transfer the Package with respect to any patent claims +licensable by the Copyright Holder that are necessarily infringed by the +Package. If you institute patent litigation (including a cross-claim or +counterclaim) against any party alleging that the Package constitutes +direct or contributory patent infringement, then this Artistic License +to you shall terminate on the date that such litigation is filed. + +Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER +AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. +THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY +YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR +CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR +CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=cut + +1; diff --git a/src/sanity_checks/test_firebase.pl b/src/sanity_checks/test_firebase.pl new file mode 100755 index 0000000..bf3eb34 --- /dev/null +++ b/src/sanity_checks/test_firebase.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl +# + +use lib '../external/firebase/lib'; +use lib '../external/ouch/lib'; +use Firebase; +use Test::More; +use Ouch; + +my $tk = '4lp5BkZFh0bEpbpoPQGChJcGCeRfq8gLDxP65E7S'; +my $fb = Firebase->new(firebase => 'clusterjob-78552', auth => {secret=>$tk, data => {uid => 'hatef'}, admin => \1} ); + +#print $fb->firebase . "\n"; + +my $result = $fb->put('hatef/ea923d43', { username => 'hatef', pid => 'ea923d43' }); +my $result = $fb->put('hatef/ba92fg33', { username => 'hatef', pid => 'ba92fg33' }); + +my $result = $fb->get('foo'); +my $result = $fb->delete('foo'); + + + diff --git a/src/sanity_checks/test_regex.pl b/src/sanity_checks/test_regex.pl new file mode 100644 index 0000000..b87b286 --- /dev/null +++ b/src/sanity_checks/test_regex.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl +# +print "Enter what needs to be matched:\n"; +my $name = ;chomp $name; +print "Enter the regexp:\n"; +my $regexp = ; chomp $regexp; + +if($name =~ /$regexp/){ + print "$name matches $regexp\n"; +}else{ + print "$name does not match $regexp\n"; +} diff --git a/src/sanity_checks/test_uncomment.pl b/src/sanity_checks/test_uncomment.pl new file mode 100644 index 0000000..497bff5 --- /dev/null +++ b/src/sanity_checks/test_uncomment.pl @@ -0,0 +1,21 @@ +#!/usr/bin/perl +# + +print "Enter python line to be uncommented:\n"; +my $line = ; chomp $line; +#my $line = " x = \'%3.2f%i\' %%Hatef "; + +$line = uncomment_python_line($line); +print "AFTER : $line\n"; + +sub uncomment_python_line{ + my ($line) = @_; + + print "BEFORE: $line\n"; + + $line =~ s/^(?:(?![\"|\']).)*\K\#(.*)//; + + + + return $line; +} diff --git a/src/tmp/log.txt b/src/tmp/log.txt new file mode 100644 index 0000000..a8f6b69 --- /dev/null +++ b/src/tmp/log.txt @@ -0,0 +1,59 @@ + +pid 3237a94673f4d0e480f83be1747f05301ad6e015 +date: 2015-Nov-02 03:11:37 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: myMatlabJob.m +initial_flag: run +reruned: 2 times + + + +pid f6175c6fe78091ebbd574c06562a19c34d48d71d +date: 2015-Nov-02 03:23:34 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: example_reduce.m +initial_flag: parrun +reruned: 1 times + + + +pid d4f4e3f9cda03298a7757ff13942a56e7ad7c6d4 +date: 2015-Nov-02 03:34:42 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: example_reduce.m +initial_flag: parrun + + + +pid f8566928c72be452e8936e77322ec6c53ae5795e +date: 2015-Nov-02 19:27:18 (GMT -08:00:00) +remote: monajemi@solomon.stanford.edu +script: myMatlabJob.m +initial_flag: run + + + +pid 849ea31c2b315feedbcd4f995a67fd86d4c9bf59 +date: 2015-Nov-03 15:11:12 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: myMatlabJob.m +initial_flag: run +cleaned: 2015-Nov-03 15:11:47 (GMT -08:00:00) + + + +pid fb576000df9373f66b9fc229bbfedf3a22b5256e +date: 2015-Nov-03 15:44:50 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: example_reduce.m +initial_flag: parrun + + + +pid 43cafaae4d7e4b7e76bada9584167adca3e78eb8 +date: 2015-Nov-03 15:45:28 (GMT -08:00:00) +remote: monajemi@sherlock.stanford.edu +script: example_reduce.m +initial_flag: parrun + + diff --git a/ssh_config b/ssh_config index 764037d..119b760 100644 --- a/ssh_config +++ b/ssh_config @@ -1,31 +1,140 @@ +[sherlock2] +Host login.sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2017a +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.6 +Pythonlib pytorch:pandas:cuda80:scipy:matplotlib:torchvision:-c soumith +[sherlock2] + + +[corn] +Host corn.stanford.edu +User monajemi +Bqs SGE +Repo /farmshare/user_data/monajemi/CJRepo_Remote +MAT matlab/r2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib scipy:pytorch +[corn] + + +[rice] +Host rice.stanford.edu +User monajemi +Bqs SLURM +Repo /farmshare/user_data/monajemi/CJRepo_Remote +MAT matlab/r2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib scipy:matplotlib +[rice] + + +[osg] +Host login.osgconnect.net +User hatef +Bqs HTCondor +Repo ./ +MAT matlab/2016b +MATlib ~/cvx +Python python/3.4.3 +Pythonlib scipy +[osg] + + +[bridges] +Host bridges.psc.edu +User monajemi +Bqs SLURM +Repo /pylon2/cc3l6jp/monajemi/ +MAT matlab/R2016a +MATlib ~/cvx +Python python/3.4.3 +Pythonlib scipy +[bridges] + + +[stampede] +Host stampede.tacc.utexas.edu +User tg840998 +Bqs SLURM +Repo /work/04800/tg840998/CJRepo_Remote +MAT matlab/2016b +MATlib ~/BPDN/CVX/cvx +Python python/3.4.3 +Pythonlib scipy +[stampede] + + [sherlock] -Host sherlock.stanford.edu -User monajemi -Bqs SLURM -Repo /scratch/users/monajemi/CJRepo_Remote -MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Host sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib pytorch:cuda80:scipy:matplotlib:torchvision -c soumith [sherlock] -[solomon] -Host solomon.stanford.edu -User monajemi -Bqs SGE -Repo ~/CJRepo_Remote -MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a -[solomon] - -[proclus] -Host proclus.stanford.edu -User monajemi -Bqs SGE -Repo /hsgs/nobackup/monajemi/CJRepo_Remote -MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a -[proclus] - -[farmshare] -Host corn.stanford.edu -User monajemi -Bqs SGE -Repo /farmshare/user_data/monajemi/CJRepo_Remote -MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a -[farmshare] \ No newline at end of file + +[sherlock2] +Host login.sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2017a +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.6 +Pythonlib pytorch:cuda80:scipy:matplotlib:torchvision:-c soumith +[sherlock2] + + +[gce] +Host 5.203.181.25 +user hatefmonajemi +Bqs SLURM +Repo /home/hatefmonajemi/CJRepo_Remote +MAT "" +matlib CJinstlled/cvx +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[gce] + +[az] +Host 104.42.101.85 +User ubuntu +Bqs SLURM +Repo /home/ubuntu/CJRepo_Remote +MAT "" +MATlib "" +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[az] + +[ec2] +Host 54.201.71.158 +User ubuntu +Bqs SLURM +Repo /home/ubuntu/CJRepo_Remote +MAT "" +MATlib "" +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:-c soumith +[ec2] + + +[gce-hatef] +host 35.18.20.12 +User moosh +Bqs SLURM +Repo ~/CJRepo +MAT matlab/r2016b +MATlib CJinstalled/cvx:CJinstalled/mosek/7/toolbox/r2013a +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[gce-hatef] diff --git a/ssh_config.bak b/ssh_config.bak new file mode 100644 index 0000000..5901972 --- /dev/null +++ b/ssh_config.bak @@ -0,0 +1,140 @@ +[sherlock2] +Host login.sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2017a +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.6 +Pythonlib pytorch:pandas:cuda80:scipy:matplotlib:torchvision:-c soumith +[sherlock2] + + +[corn] +Host corn.stanford.edu +User monajemi +Bqs SGE +Repo /farmshare/user_data/monajemi/CJRepo_Remote +MAT matlab/r2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib scipy:pytorch +[corn] + + +[rice] +Host rice.stanford.edu +User monajemi +Bqs SLURM +Repo /farmshare/user_data/monajemi/CJRepo_Remote +MAT matlab/r2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib scipy:matplotlib +[rice] + + +[osg] +Host login.osgconnect.net +User hatef +Bqs HTCondor +Repo ./ +MAT matlab/2016b +MATlib ~/cvx +Python python/3.4.3 +Pythonlib scipy +[osg] + + +[bridges] +Host bridges.psc.edu +User monajemi +Bqs SLURM +Repo /pylon2/cc3l6jp/monajemi/ +MAT matlab/R2016a +MATlib ~/cvx +Python python/3.4.3 +Pythonlib scipy +[bridges] + + +[stampede] +Host stampede.tacc.utexas.edu +User tg840998 +Bqs SLURM +Repo /work/04800/tg840998/CJRepo_Remote +MAT matlab/2016b +MATlib ~/BPDN/CVX/cvx +Python python/3.4.3 +Pythonlib scipy +[stampede] + + +[sherlock] +Host sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2016b +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.4.3 +Pythonlib pytorch:cuda80:scipy:matplotlib:torchvision -c soumith +[sherlock] + + +[sherlock2] +Host login.sherlock.stanford.edu +User monajemi +Bqs SLURM +Repo /scratch/users/monajemi/CJRepo_Remote +MAT matlab/R2017a +MATlib ~/BPDN/CVX/cvx:~/mosek/7/toolbox/r2013a +Python python/3.6 +Pythonlib pytorch:cuda80:scipy:matplotlib:torchvision:-c soumith +[sherlock2] + + +[gce] +Host 5.203.181.25 +user hatefmonajemi +Bqs SLURM +Repo /home/hatefmonajemi/CJRepo_Remote +MAT "" +matlib CJinstlled/cvx +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[gce] + +[az] +Host 104.42.101.85 +User ubuntu +Bqs SLURM +Repo /home/ubuntu/CJRepo_Remote +MAT "" +MATlib "" +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[az] + +[ec2] +Host 54.201.71.158 +User ubuntu +Bqs SLURM +Repo /home/ubuntu/CJRepo_Remote +MAT "" +MATlib "" +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:-c soumith +[ec2] + + +[gce-hatef] +Host 35.185.238.124 +User moosh +Bqs SLURM +Repo ~/CJRepo +MAT matlab/r2016b +MATlib CJinstalled/cvx:CJinstalled/mosek/7/toolbox/r2013a +Python python3.4 +Pythonlib pytorch:torchvision:cuda80:pandas:matplotlib:-c soumith +[gce-hatef] diff --git a/todo b/todo new file mode 100644 index 0000000..bbbf8a7 --- /dev/null +++ b/todo @@ -0,0 +1,17 @@ +Issues: + +1) Add_change_to_run_history currently reads only from local + it shouldnt be like this. Read_record should be adaptive! + + like try + + ./test run + cj clean + + CJ cannot clean. Also if it cleans then go to 2 + +2) When a change happens and remote changes, the local machine + which generated that package should update the record upon + next use of CJ! + This means just a comparison of Epochs between local machine + and remote