diff --git a/text/char-rnn/char_rnn_gpu_minibatch.jl b/text/char-rnn/char_rnn_gpu_minibatch.jl new file mode 100644 index 000000000..d0a91a4a3 --- /dev/null +++ b/text/char-rnn/char_rnn_gpu_minibatch.jl @@ -0,0 +1,82 @@ +using Flux +using Flux: onehot, chunk, batchseq, throttle, crossentropy +using StatsBase: wsample +using Base.Iterators: partition +using CuArrays +using CUDAnative: device! +using Random + +ϵ = 1.0f-32 + +epochs = 2 +batch_size = 50 +sequence = 50 +gpu_device = 0 + +device!(gpu_device) +CuArrays.allowscalar(false) + +input_file = joinpath(dirname(@__FILE__),"input.txt") + +isfile(input_file) || + download("https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", + input_file) + +text = collect(String(read(input_file))) +alphabet = [unique(text)...,'_'] +text = map(ch -> Float32.(onehot(ch,alphabet)),text) +stop = Float32.(onehot('_',alphabet)) + +N = length(alphabet) +seqlen = sequence +nbatch = batch_size + +Xs = collect(partition(batchseq(chunk(text, nbatch), stop), seqlen)) +txt = circshift(text,-1) +txt[end] = stop +Ys = collect(partition(batchseq(chunk(txt, nbatch), stop), seqlen)) + +model = Chain( + LSTM(N, 128), + LSTM(128, 256), + LSTM(256, 128), + Dense(128, N), + softmax) + m = model |>gpu + +opt = ADAM(0.01) +tx, ty = (Xs[5]|>gpu, Ys[5]|>gpu) + +function loss(xx, yy) + out = 0.0f0 + for (idx, x) in enumerate(xx) + out += crossentropy(m(x) .+ ϵ, yy[idx]) + end + Flux.reset!(m) + out +end + +idxs = length(Xs) +for epoch_idx in 1:epochs + for (idx,(xs,ys)) in enumerate(zip(Xs, Ys)) + Flux.train!(loss, params(m), [(xs|>gpu,ys|>gpu)], opt) + lss = loss(tx,ty) + if idx % 10 == 0 + @info "epoch# $(epoch_idx)/$(epochs)-$(idx)/$(idxs) loss = $(lss)" + end + end +end + +# Sampling +function sample(m, alphabet, len) + m = cpu(m) + Flux.reset!(m) + buf = IOBuffer() + c = rand(alphabet) + for i = 1:len + write(buf, c) + c = wsample(alphabet, m(onehot(c, alphabet))) + end + return String(take!(buf)) +end +@info sample(m, alphabet, 1000) diff --git a/vision/cifar10/cifar10_gpu_minibatch.jl b/vision/cifar10/cifar10_gpu_minibatch.jl new file mode 100644 index 000000000..a6b9a1b57 --- /dev/null +++ b/vision/cifar10/cifar10_gpu_minibatch.jl @@ -0,0 +1,189 @@ +# Julia version : 1.3.1 +# Flux version : v0.10.1 + +using Random +using Dates +using CuArrays +using CUDAdrv +using CUDAnative: device! +using Flux, Metalhead, Statistics +using Flux: onehotbatch, onecold, crossentropy, throttle +using Metalhead: trainimgs +using Images: channelview +using Statistics: mean +using Base.Iterators: partition + +model_file = joinpath(dirname(@__FILE__),"cifar10_vgg16_model.bson") + +# Get arguments + +epochs = 100 +batch_size = 128 +gpu_device = 0 + +# Very important : this prevent loss NaN +ϵ = 1.0f-32 + +# use 1nd GPU +#CUDAnative.device!(0) +device!(gpu_device) +CuArrays.allowscalar(false) + +# VGG16 and VGG19 models +vgg16() = Chain( + Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(64), + Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(64), + MaxPool((2,2)), + Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(128), + Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(128), + MaxPool((2,2)), + Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + MaxPool((2,2)), + Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + MaxPool((2,2)), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(512, 4096, relu), + Dropout(0.5), + Dense(4096, 4096, relu), + Dropout(0.5), + Dense(4096, 10), + softmax) + +vgg19() = Chain( + Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(64), + Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(64), + MaxPool((2,2)), + Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(128), + Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(128), + MaxPool((2,2)), + Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(256), + Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)), + MaxPool((2,2)), + Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + MaxPool((2,2)), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + BatchNorm(512), + Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(512, 4096, relu), + Dropout(0.5), + Dense(4096, 4096, relu), + Dropout(0.5), + Dense(4096, 10), + softmax) + +m = vgg16() |> gpu + +# Function to convert the RGB image to Float32 Arrays +getarray(X) = Float32.(permutedims(channelview(X), (2, 3, 1))) + +function make_minibatch(imgs,labels,batch_size) + data_set = [(cat(imgs[i]..., dims = 4), + labels[:,i]) + for i in partition(1:length(imgs), batch_size)] + return data_set +end + +X = trainimgs(CIFAR10) +train_idxs = 1:49000 +train_imgs = [getarray(X[i].img) for i in train_idxs] +train_labels = float.(onehotbatch([X[i].ground_truth.class for i in train_idxs],1:10)) +train_set = make_minibatch(train_imgs,train_labels,batch_size) + +verify_idxs = 49001:50000 +verify_imgs = cat([getarray(X[i].img) for i in verify_idxs]..., dims = 4) +verify_labels = float.(onehotbatch([X[i].ground_truth.class for i in verify_idxs],1:10)) +verify_set = [(verify_imgs,verify_labels)] + +# Fetch the test data from Metalhead and get it into proper shape. +# CIFAR-10 does not specify a verify set so valimgs fetch the testdata instead of testimgs +tX = valimgs(CIFAR10) +test_idxs = 1:10000 +test_imgs = [getarray(tX[i].img) for i in test_idxs] +test_labels = float.(onehotbatch([tX[i].ground_truth.class for i in test_idxs], 1:10)) +test_set = make_minibatch(test_imgs,test_labels,batch_size) + +# Defining the loss and accuracy functions +loss(x, y) = crossentropy(m(x) .+ ϵ, y) + +function accuracy(data_set) + batch_size = size(data_set[1][1])[end] + l = length(data_set)*batch_size + s = 0f0 + for (x,y) in data_set + s += sum((onecold(m(x|>gpu) |> cpu) .== onecold(y|>cpu))) + end + return s/l +end + +# Make sure our is nicely precompiled befor starting our training loop +m(train_set[1][1] |> gpu) + +# Defining the callback and the optimizer +opt = ADAM(0.001) + +@info "Training model..." + +for epoch_idx in 1:epochs + accs = Array{Float32}(undef,0) + + train_set_len = length(train_set) + shuffle_idxs = collect(1:train_set_len) + shuffle!(shuffle_idxs) + + for (idx,data_idx) in enumerate(shuffle_idxs) + (x,y) = train_set[data_idx] + # We augment `x` a little bit here, adding in random noise + x = (x .+ ϵ*randn(eltype(x),size(x))) |> gpu + y = y|> gpu + Flux.train!(loss,params(m),[(x,y)],opt) + v_acc = accuracy(verify_set) + @info "Epoch# $(epoch_idx)/$(epochs) - #$(idx)/$(train_set_len) loss: $(loss(x,y)), accuracy: $(v_acc)" + push!(accs,v_acc) + end + m_acc = mean(accs) + @info " -> Verify accuracy(mean) : $(m_acc)" +end +test_acc = accuracy(test_set) +@info "Test accuracy : $(test_acc)" diff --git a/vision/mnist/conv_gpu_minibatch.jl b/vision/mnist/conv_gpu_minibatch.jl new file mode 100644 index 000000000..b6bddabf4 --- /dev/null +++ b/vision/mnist/conv_gpu_minibatch.jl @@ -0,0 +1,161 @@ +#= +Test Environment + - Julia : v1.3.1 + - Flux : v0.10.1 +=# + +# Classifies MNIST digits with a convolution network. +# Writes out saved model to the file "mnist_conv.bson". +# Demonstrates basic model construction, training, saving, +# conditional early-exits, and learning rate scheduling. +# +# This model, while simple, should hit around 99% test +# accuracy after training for approximately 20 epochs. + +using Flux, Flux.Data.MNIST, Statistics +using Flux: onehotbatch, onecold, crossentropy, throttle +using Base.Iterators: repeated, partition +using Printf, BSON +using Dates +using CUDAnative: device! +using CuArrays +using Random +using Dates + +model_file = joinpath(dirname(@__FILE__),"conv_gpu_minibatch.bson") + +epochs = 100 +batch_size = 128 +gpu_device = 0 + +# set using GPU device +device!(gpu_device) +CuArrays.allowscalar(false) + + +# Bundle images together with labels and groups into minibatch +function make_minibatch(imgs,labels,batch_size) + len = length(imgs) + sz = size(imgs[1]) + data_set = + [(cat([reshape(Float32.(imgs[i]),sz...,1,1) for i in idx]...,dims=4), + float.(onehotbatch(labels[idx],0:9)) ) for idx in partition(1:len,batch_size) ] + return data_set +end + +# Train data load +train_labels = MNIST.labels() +train_imgs = MNIST.images() +# Make train data to minibatch +train_set = make_minibatch(train_imgs,train_labels,batch_size) + +# Test data load +test_labels = MNIST.labels(:test) +test_imgs = MNIST.images(:test) +test_set = make_minibatch(test_imgs,test_labels,batch_size) + +#= + Define our model. We will use a simple convolutional architecture with + three iterations of Conv -> ReLu -> MaxPool, followed by a final Dense + layer that feeds into a softmax probability output. +=# +@info "Construncting model..." +model = Chain( + # First convolution, operating upon a 28x28 image + Conv((3,3), 1=>16, pad=(1,1), relu), + MaxPool((2,2)), + + # Second convolution, operating upon a 14x14 image + Conv((3,3), 16=>32, pad=(1,1), relu), + MaxPool((2,2)), + + # Third convolution, operating upon a 7x7 image + Conv((3,3), 32=>32, pad=(1,1), relu), + MaxPool((2,2)), + + # Reshape 3d tensor into a 2d one, at this point it should be (3,3,32,N) + # which is where we get the 288 in the `Dense` layer below: + x -> reshape(x, :, size(x,4)), + Dense(288,10), + + # Finally, softmax to get nice probabilities + softmax, +) + +m = model |> gpu + +#= +`loss()` calculates the crossentropy loss between our prediction `ŷ` + (calculated from `m(x)`) and the ground truth `y`. We augment the data + a bit, adding gaussian random noise to our image to make it more robust. + =# +function loss(x,y) + ŷ = m(x) + return crossentropy(ŷ,y) +end + +function accuracy(data_set) + batch_size = size(data_set[1][1])[end] + l = length(data_set)*batch_size + s = 0f0 + for (x,y) in data_set + s += sum((onecold(m(x|>gpu) |> cpu) .== onecold(y|>cpu))) + end + return s/l +end + +# Make sure our is nicely precompiled befor starting our training loop +m(train_set[1][1] |> gpu) + +# Train our model with the given training set using the ADAM optimizer and +# printing out performance aganin the test set as we go. +opt = ADAM(0.001) + +@info "Beginning training loop..." +best_acc = 0.0 +last_improvement = 0 + +for epoch_idx in 1:epochs + global best_acc, last_improvement + suffle_idxs = collect(1:length(train_set)) + shuffle!(suffle_idxs) + for idx in suffle_idxs + (x,y) = train_set[idx] + # We augment `x` a little bit here, adding in random noise + x = (x .+ 0.1f0*randn(eltype(x),size(x))) |> gpu + y = y|> gpu + Flux.train!(loss, params(m), [(x, y)],opt) + end + acc = accuracy(test_set) + @info(@sprintf("[%d]: Test accuracy: %.4f",epoch_idx,acc)) + + # If our accuracy is good enough, quit out. + if acc >= 0.999 + @info " -> Early-exiting: We reached our target accuracy of 99.9%" + break + end + + # If this is the best accuracy we've seen so far, save the model out + if acc >= best_acc + @info " -> New best accuracy! saving model out to $(model_file)" + model = m |> cpu + acc = acc |> cpu + BSON.@save model_file model epoch_idx acc + best_acc = acc + last_improvement = epoch_idx + end + + #If we haven't seen improvement in 5 epochs, drop out learing rate: + if epoch_idx - last_improvement >= 5 && opt.eta > 1e-6 + opt.eta /= 10.0 + @warn " -> Haven't improved in a while, dropping learning rate to $(opt.eta)!" + + # After dropping learing rate, give it a few epochs to improve + last_improvement = epoch_idx + end + + if epoch_idx - last_improvement >= 10 + @warn " -> We're calling this converged." + break + end +end diff --git a/vision/mnist/mlp_gpu_minibatch.jl b/vision/mnist/mlp_gpu_minibatch.jl new file mode 100644 index 000000000..84fe82734 --- /dev/null +++ b/vision/mnist/mlp_gpu_minibatch.jl @@ -0,0 +1,81 @@ +module MNIST_BATCH +using Flux +using Flux.Data.MNIST, Statistics +using Flux: onehotbatch, onecold, crossentropy,throttle +using Base.Iterators: repeated,partition + +using CUDAnative +using CuArrays +CuArrays.allowscalar(false) + +#= +Very important !! +ϵ is used to prevent loss NaN +=# +const ϵ = 1.0f-32 + +# Load training labels and images from Flux.Data.MNIST +@info("Loading data...") + +train_imgs = MNIST.images() +train_labels = MNIST.labels() + +# use 1nd GPU : default +CUDAnative.device!(0) +# use 2nd GPU +#CUDAnative.device!(1) + +# Bundle images together with labels and group into minibatch +function make_minibatch(imgs,labels,batch_size) + X = hcat(float.(reshape.(imgs,:))...) |> gpu + Y = float.(onehotbatch(labels,0:9)) |> gpu + + data_set = [(X[:,i],Y[:,i]) for i in partition(1:length(labels),batch_size)] + return data_set +end + +@info("Making model...") +# Model +m = Chain( + Dense(28^2,32,relu), + Dense(32,10), + softmax +) |> gpu +loss(x,y) = crossentropy(m(x) .+ ϵ, y) +accuracy(x,y) = mean(onecold(m(x)|>cpu) .== onecold(y|>cpu)) + +batch_size = 500 +train_dataset = make_minibatch(train_imgs,train_labels,batch_size) + +opt = ADAM() + + +@info("Training model...") + +epochs = 200 +# used for plots +accs = Array{Float32}(undef,0) + +dataset_len = length(train_dataset) +for i in 1:epochs + for (idx,dataset) in enumerate(train_dataset) + Flux.train!(loss,params(m),[dataset],opt) + acc = accuracy(dataset...) + if idx == dataset_len + @info("Epoch# $(i)/$(epochs) - loss: $(loss(dataset...)), accuracy: $(acc)") + push!(accs,acc) + end + end +end + +# Test Accuracy +tX = hcat(float.(reshape.(MNIST.images(:test),:))...) |> gpu +tY = float.(onehotbatch(MNIST.labels(:test),0:9)) |> gpu + +println("Test loss:", loss(tX,tY)) +println("Test accuracy:", accuracy(tX,tY)) + +end + +using Plots;gr() +plot(MNIST_BATCH.accs)