Skip to content

Commit

Permalink
Status of submission
Browse files Browse the repository at this point in the history
  • Loading branch information
anlausch committed Apr 26, 2019
1 parent ca7d987 commit 3226aff
Show file tree
Hide file tree
Showing 15 changed files with 511 additions and 28 deletions.
21 changes: 21 additions & 0 deletions dict2vec_en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash
for similarity_type in "cosine" "euclidean" ; do
for test_number in 6 7 8 9 10 1 2 3 4 5 ; do
for language in "en" ; do
echo $language
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/dict2vec_${language}_${similarity_type}_${test_number}_cased.res \
--lower False \
--use_glove False \
--is_vec_format True \
--lang $language \
--embeddings \
/work/anlausch/dict2vec-vectors-dim300.vec \
--similarity_type $similarity_type |& tee ./results/dict2vec_${language}_${similarity_type}_${test_number}_cased.out
done
done
done
38 changes: 38 additions & 0 deletions fasttext_en_xling.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

#for similarity_type in "cosine" ; do
for similarity_type in "cosine" "euclidean" ; do
targets_language="en"
attributes_language="en"
for xspace in "en-de" "en-hr" "en-it" "en-ru" "en-tr" "ru-en" "tr-en" "de-en"; do
for test_number in 1 2 3 4 5 6 7 8 9 10 ; do
echo $targets_language
echo $attributes_language
echo $similarity_type
echo $test_number
echo $xspace

dir="/work/gglavas/data/word_embs/yacle/mappings/new/smith/fasttext/${xspace}"

if [ -d "$dir" ]; then
python xweat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--targets_lang $targets_language \
--attributes_lang $attributes_language \
--targets_embedding_vocab \
${dir}/vocab_${xspace}.${targets_language}.yacle.train.freq.5k.pkl \
--targets_embedding_vectors \
${dir}/vectors_${xspace}.${targets_language}.yacle.train.freq.5k.np \
--attributes_embedding_vocab \
${dir}/vocab_${xspace}.${attributes_language}.yacle.train.freq.5k.pkl \
--attributes_embedding_vectors \
${dir}/vectors_${xspace}.${attributes_language}.yacle.train.freq.5k.np \
--similarity_type $similarity_type |& tee ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.out
fi
done
done
done
13 changes: 2 additions & 11 deletions fasttext_multiling.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
#!/usr/bin/env bash
#parser.add_argument("--test_number", type=int, help="Number of the weat test to run", required=False)
#parser.add_argument("--permutation_number", type=int, default=None,
# help="Number of permutations (otherwise all will be run)", required=False)
# parser.add_argument("--output_file", type=str, default=None, help="File to store the results)", required=False)
# parser.add_argument("--lower", type=bool, default=False, help="Whether to lower the vocab", required=False)
# parser.add_argument("--similarity_type", type=str, default="cosine", help="Which similarity function to use",
# required=False)
# parser.add_argument("--embedding_file", type=str)

#for similarity_type in "cosine" "csls" ; do
for similarity_type in "cosine" "csls" ; do
for similarity_type in "euclidean" ; do
for test_number in 1 2 3 4 5 6 7 8 9 10 ; do
for language in "de" "es" "hr" "it" "ru" "tr" ; do
for language in "en" "de" "es" "hr" "it" "ru" "tr" ; do
echo $language
echo $similarity_type
echo $test_number
Expand Down
22 changes: 22 additions & 0 deletions fasttext_multiling_cc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
#for similarity_type in "cosine" "csls" ; do
for similarity_type in "cosine" "euclidean" ; do
for test_number in 3 4 5 ; do
for language in "en" ; do # "de" "es" "hr" "it" "ru" "tr" ; do
echo $language
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/fasttext_cc_${language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--is_vec_format True \
--lang $language \
--embeddings \
/work/anlausch/fasttext_cc/cc.${language}.300.vec \
--similarity_type $similarity_type |& tee ./results/fasttext_cc_${language}_${similarity_type}_${test_number}.out
done
done
done
3 changes: 2 additions & 1 deletion fasttext_xling.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

for similarity_type in "cosine" ; do
#for similarity_type in "cosine" ; do
for similarity_type in "euclidean" ; do
for test_number in 1 2 3 4 5 6 7 8 9 10 ; do
for targets_language in "en" "de" "hr" "it" "ru" "tr" ; do
for attributes_language in "en" "de" "hr" "it" "ru" "tr" ; do
Expand Down
63 changes: 63 additions & 0 deletions fasttext_xling2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash

#for similarity_type in "cosine" ; do
for similarity_type in "cosine" "euclidean" ; do
for targets_language in "en" "de" "hr" "it" "ru" "tr" ; do
for attributes_language in "en" "de" "hr" "it" "ru" "tr" ; do
for test_number in 6 7 8 9 10 1 2 ; do
echo $targets_language
echo $attributes_language
echo $similarity_type
echo $test_number

dir_1="/work/gglavas/data/word_embs/yacle/mappings/new/smith/fasttext/${targets_language}-${attributes_language}"
dir_2="/work/gglavas/data/word_embs/yacle/mappings/new/smith/fasttext/${attributes_language}-${targets_language}"

if [ -d "$dir_1" ]; then
embedding_dir=$dir_1

echo $embedding_dir

python xweat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_xling2_space-${targets_language}-${attributes_language}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--targets_lang $targets_language \
--attributes_lang $attributes_language \
--targets_embedding_vocab \
${embedding_dir}/vocab_${targets_language}-${attributes_language}.${targets_language}.yacle.train.freq.5k.pkl \
--targets_embedding_vectors \
${embedding_dir}/vectors_${targets_language}-${attributes_language}.${targets_language}.yacle.train.freq.5k.np \
--attributes_embedding_vocab \
${embedding_dir}/vocab_${targets_language}-${attributes_language}.${attributes_language}.yacle.train.freq.5k.pkl \
--attributes_embedding_vectors \
${embedding_dir}/vectors_${targets_language}-${attributes_language}.${attributes_language}.yacle.train.freq.5k.np \
--similarity_type $similarity_type |& tee ./results/ft_xling2_space-${targets_language}-${attributes_language}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.out
fi
if [ -d "$dir_2" ]; then
embedding_dir=$dir_2
echo $embedding_dir
python xweat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_xling2_space-${attributes_language}-${targets_language}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--targets_lang $targets_language \
--attributes_lang $attributes_language \
--targets_embedding_vocab \
${embedding_dir}/vocab_${attributes_language}-${targets_language}.${targets_language}.yacle.train.freq.5k.pkl \
--targets_embedding_vectors \
${embedding_dir}/vectors_${attributes_language}-${targets_language}.${targets_language}.yacle.train.freq.5k.np \
--attributes_embedding_vocab \
${embedding_dir}/vocab_${attributes_language}-${targets_language}.${attributes_language}.yacle.train.freq.5k.pkl \
--attributes_embedding_vectors \
${embedding_dir}/vectors_${attributes_language}-${targets_language}.${attributes_language}.yacle.train.freq.5k.np \
--similarity_type $similarity_type |& tee ./results/ft_xling2_space-${attributes_language}-${targets_language}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.out
fi
done
done
done
done
79 changes: 79 additions & 0 deletions fasttext_xling_es.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
#!/usr/bin/env bash

#for similarity_type in "cosine" ; do
for similarity_type in "cosine" "euclidean" ; do
targets_language="es"
for attributes_language in "en" "de" "hr" "it" "ru" "tr" ; do
for test_number in 6 7 8 9 10 1 2 ; do
xspace=${targets_language}-${attributes_language}
echo $targets_language
echo $attributes_language
echo $similarity_type
echo $test_number

dir_1="/work/gglavas/data/word_embs/yacle/mappings/new/smith/fasttext/${xspace}"

if [ -d "$dir_1" ]; then
embedding_dir=$dir_1
echo $embedding_dir

python xweat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--targets_lang $targets_language \
--attributes_lang $attributes_language \
--targets_embedding_vocab \
${embedding_dir}/${targets_language}.vocab \
--targets_embedding_vectors \
${embedding_dir}/${targets_language}.vectors \
--attributes_embedding_vocab \
${embedding_dir}/${attributes_language}.vocab \
--attributes_embedding_vectors \
${embedding_dir}/${attributes_language}.vectors \
--similarity_type $similarity_type |& tee ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.out
fi
done
done
done

for similarity_type in "cosine" "euclidean" ; do
attributes_language="es"
for targets_language in "en" "de" "hr" "it" "ru" "tr" ; do
for test_number in 6 7 8 9 10 1 2 ; do
xspace=${attributes_language}-${targets_language}
echo $targets_language
echo $attributes_language
echo $similarity_type
echo $test_number

dir_1="/work/gglavas/data/word_embs/yacle/mappings/new/smith/fasttext/${xspace}"

if [ -d "$dir_1" ]; then
embedding_dir=$dir_1
echo $embedding_dir

python xweat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--targets_lang $targets_language \
--attributes_lang $attributes_language \
--targets_embedding_vocab \
${embedding_dir}/${targets_language}.vocab \
--targets_embedding_vectors \
${embedding_dir}/${targets_language}.vectors \
--attributes_embedding_vocab \
${embedding_dir}/${attributes_language}.vocab \
--attributes_embedding_vectors \
${embedding_dir}/${attributes_language}.vectors \
--similarity_type $similarity_type |& tee ./results/ft_xling_space-${xspace}_ta-${targets_language}-${attributes_language}_${similarity_type}_${test_number}.out
fi
done
done
done
25 changes: 25 additions & 0 deletions ft_postspecialized.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash
#parser.add_argument("--test_number", type=int, help="Number of the weat test to run", required=False)
#parser.add_argument("--permutation_number", type=int, default=None,
# help="Number of permutations (otherwise all will be run)", required=False)
# parser.add_argument("--output_file", type=str, default=None, help="File to store the results)", required=False)
# parser.add_argument("--lower", type=bool, default=False, help="Whether to lower the vocab", required=False)
# parser.add_argument("--similarity_type", type=str, default="cosine", help="Which similarity function to use",
# required=False)
# parser.add_argument("--embedding_file", type=str)

#for similarity_type in "cosine" "csls" ; do
for similarity_type in "cosine" "euclidean" ; do
for test_number in 1 2 3 4 5 6 7 8 9 10 ; do
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/ft_postspec_en_${similarity_type}_${test_number}.res \
--lower True \
--use_glove False \
--postspec True \
--similarity_type $similarity_type |& tee ./results/ft_postspec_en_${similarity_type}_${test_number}.out
done
done
22 changes: 22 additions & 0 deletions glove_en_cc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
#for similarity_type in "cosine" "csls" ; do
for similarity_type in "cosine" "euclidean" ; do
for test_number in 6 7 8 9 10 1 2 3 4 5 ; do
for language in "en" ; do
echo $language
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/glove_cc_${language}_${similarity_type}_${test_number}_cased.res \
--lower False \
--use_glove False \
--is_vec_format True \
--lang $language \
--embeddings \
~/glove.840B.300d.txt \
--similarity_type $similarity_type |& tee ./results/glove_cc_${language}_${similarity_type}_${test_number}_cased.out
done
done
done
22 changes: 22 additions & 0 deletions glove_en_tweets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
#for similarity_type in "cosine" "csls" ; do
for similarity_type in "cosine" "euclidean" ; do
for test_number in 6 7 8 9 10 1 2 3 4 5 ; do
for language in "en" ; do
echo $language
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/glove_twitter_${language}_${similarity_type}_${test_number}_cased.res \
--lower False \
--use_glove False \
--is_vec_format True \
--lang $language \
--embeddings \
/work/anlausch/glove_twitter/glove.twitter.27B.200d.txt \
--similarity_type $similarity_type |& tee ./results/glove_twitter_${language}_${similarity_type}_${test_number}_cased.out
done
done
done
7 changes: 4 additions & 3 deletions glove_reproduction.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@
# parser.add_argument("--embedding_file", type=str)

for similarity_type in "cosine" ; do
#for similarity_type in "euclidean" ; do
for test_number in 1 2 3 4 5 6 7 8 9 10 ; do
echo $similarity_type
echo $test_number
python weat.py \
--test_number $test_number \
--permutation_number 1000000 \
--output_file ./results/glove_${similarity_type}_${test_number}.res \
--lower True \
--output_file ./results/glove_wiki_${similarity_type}_${test_number}_cased.res \
--lower False \
--use_glove True \
--similarity_type $similarity_type |& tee ./results/glove_${similarity_type}_${test_number}.out
--similarity_type $similarity_type |& tee ./results/glove_wiki_${similarity_type}_${test_number}_cased.out
done
done
Loading

0 comments on commit 3226aff

Please sign in to comment.