Learning Word Representations

khanhnamle1994 · Dec 7, 2017 · 644610a · 644610a
1 parent f6e6d59
commit 644610a
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 13 deletions.
diff --git a/Assignment2/fprop.m b/Assignment2/fprop.m
@@ -51,24 +51,24 @@
 
 % Apply logistic activation function.
 % FILL IN CODE. Replace the line below by one of the options.
-hidden_layer_state = zeros(numhid2, batchsize);
+% hidden_layer_state = zeros(numhid2, batchsize);
 % Options
 % (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
 % (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
-% (c) hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));
+hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));
 % (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));
 
 %% COMPUTE STATE OF OUTPUT LAYER.
 % Compute inputs to softmax.
 % FILL IN CODE. Replace the line below by one of the options.
-inputs_to_softmax = zeros(vocab_size, batchsize);
+% inputs_to_softmax = zeros(vocab_size, batchsize);
 % Options
-% (a) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, 1, batchsize);
+inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, 1, batchsize);
 % (b) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, batchsize, 1);
 % (c) inputs_to_softmax = hidden_layer_state * hid_to_output_weights' +  repmat(output_bias, 1, batchsize);
 % (d) inputs_to_softmax = hid_to_output_weights * hidden_layer_state +  repmat(output_bias, batchsize, 1);
 
-% Subtract maximum. 
+% Subtract maximum.
 % Remember that adding or subtracting the same constant from each input to a
 % softmax unit does not affect the outputs. Here we are subtracting maximum to
 % make all inputs <= 0. This prevents overflows when computing their

diff --git a/Assignment2/octave-workspace b/Assignment2/octave-workspace
diff --git a/Assignment2/train.m b/Assignment2/train.m
@@ -30,7 +30,7 @@
 % LOAD DATA.
 [train_input, train_target, valid_input, valid_target, ...
   test_input, test_target, vocab] = load_data(batchsize);
-[numwords, batchsize, numbatches] = size(train_input); 
+[numwords, batchsize, numbatches] = size(train_input);
 vocab_size = size(vocab, 2);
 
 % INITIALIZE WEIGHTS AND BIASES.
@@ -99,25 +99,25 @@
 
     %% HIDDEN LAYER.
     % FILL IN CODE. Replace the line below by one of the options.
-    embed_to_hid_weights_gradient = zeros(numhid1 * numwords, numhid2);
+    % embed_to_hid_weights_gradient = zeros(numhid1 * numwords, numhid2);
     % Options:
     % (a) embed_to_hid_weights_gradient = back_propagated_deriv_1' * embedding_layer_state;
-    % (b) embed_to_hid_weights_gradient = embedding_layer_state * back_propagated_deriv_1';
+    embed_to_hid_weights_gradient = embedding_layer_state * back_propagated_deriv_1';
     % (c) embed_to_hid_weights_gradient = back_propagated_deriv_1;
     % (d) embed_to_hid_weights_gradient = embedding_layer_state;
 
     % FILL IN CODE. Replace the line below by one of the options.
-    hid_bias_gradient = zeros(numhid2, 1);
+    % hid_bias_gradient = zeros(numhid2, 1);
     % Options
-    % (a) hid_bias_gradient = sum(back_propagated_deriv_1, 2);
+    hid_bias_gradient = sum(back_propagated_deriv_1, 2);
     % (b) hid_bias_gradient = sum(back_propagated_deriv_1, 1);
     % (c) hid_bias_gradient = back_propagated_deriv_1;
     % (d) hid_bias_gradient = back_propagated_deriv_1';
 
     % FILL IN CODE. Replace the line below by one of the options.
-    back_propagated_deriv_2 = zeros(numhid2, batchsize);
+    % back_propagated_deriv_2 = zeros(numhid2, batchsize);
     % Options
-    % (a) back_propagated_deriv_2 = embed_to_hid_weights * back_propagated_deriv_1;
+    back_propagated_deriv_2 = embed_to_hid_weights * back_propagated_deriv_1;
     % (b) back_propagated_deriv_2 = back_propagated_deriv_1 * embed_to_hid_weights;
     % (c) back_propagated_deriv_2 = back_propagated_deriv_1' * embed_to_hid_weights;
     % (d) back_propagated_deriv_2 = back_propagated_deriv_1 * embed_to_hid_weights';
@@ -129,7 +129,7 @@
          expansion_matrix(:, input_batch(w, :)) * ...
          (back_propagated_deriv_2(1 + (w - 1) * numhid1 : w * numhid1, :)');
     end
-    
+
     % UPDATE WEIGHTS AND BIASES.
     word_embedding_weights_delta = ...
       momentum .* word_embedding_weights_delta + ...