Skip to content

Commit

Permalink
added unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zbsimon committed Jan 15, 2015
1 parent 78a0c73 commit fd85fc5
Show file tree
Hide file tree
Showing 9 changed files with 466 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
language: python
python:
- "2.7"
- "3.3"
script: nosetests
Empty file added tests/unit_tests/__init__.py
Empty file.
33 changes: 33 additions & 0 deletions tests/unit_tests/contents_mapper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python

import tests.test_utils as tu
import contents_mapper
import os
import re


class TestContentsMapper(tu.MapTestCase):
""" tests contents mapper """

def __init__(self, *args, **kwargs):
super(TestContentsMapper, self).__init__(
contents_mapper.map_contents,
'tests/fixtures/contents_mapper.txt',
*args, **kwargs
)
os.environ['mapreduce_map_input_file'] = 'filename'

def test_has_correct_number_of_keys_and_values(self):
"ensures that the correct number of keys and values are emitted"
self.has_single_delimiter()
self.has_n_keys(1)

def test_only_lowercase_alphabetic(self):
"ensures all content is lowercase alphabetic charachters"
numerals = re.compile('^[a-z, ,\t]+$')
self.are_all_matches(numerals)

def test_prepends_filename(self):
"ensures that the filename is prepended to each line emitted"
contains_filename = re.compile('^filename')
self.are_all_matches(contains_filename)
79 changes: 79 additions & 0 deletions tests/unit_tests/corpus_frequency_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python

import tests.test_utils as tu
import corp_freq_map as map
import corp_freq_red as red
import re


class TestCorpusFrequencyMapper(tu.MapTestCase):
"""
tests corpus frequency mapper
"""

def __init__(self, *args, **kwargs):
super(TestCorpusFrequencyMapper, self).__init__(
map.map_corpus_frequency,
'tests/fixtures/corpus_frequency_mapper.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
tests that values emitted have correct number of keys and values
"""
self.has_correct_number_of_keys_and_values(1, 4)

def test_appends_one(self):
"""
tests that 1 is appended to each line of input
"""
ends_with_one = '.*1$'
self.are_all_matches(re.compile(ends_with_one))

def test_lines_out_equals_lines_in(self):
"""
tests that the number of lines of input and output are equal
"""
self.lines_out_equals_lines_in()


class TestCorpusFrequencyReducer(tu.ReduceTestCase):
"""
tests basic functionality of the corpus frequency reducer
"""

def __init__(self, *args, **kwargs):
super(TestCorpusFrequencyReducer, self).__init__(
red.KEYS, red.VALUES,
red.reduce_corpus_frequency,
'tests/fixtures/corpus_frequency_reducer.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 3)

def test_sums_correct_values(self):
"""
ensure that the sum is the number of document a word occurs in,
not the number of occurences of the word.
"""
for line in self.run_reducer_tokenize():
if line[0][0] == 'word1':
self.assertEqual(int(line[1][2]), 2)
elif line[0][0] == 'word2':
self.assertEqual(int(line[1][2]), 2)
elif line[0][0] == 'word3':
self.assertEqual(int(line[1][2]), 1)
else:
self.fail('unknown word in output')

def test_lines_out_equals_lines_in(self):
"""
tests that the number of lines of input and output are equal
"""
self.lines_out_equals_lines_in()
57 changes: 57 additions & 0 deletions tests/unit_tests/cosine_similarity_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python

import tests.test_utils as tu
import cos_sim_map as map
import cos_sim_red as red


class TestCosineSimilarityMapper(tu.MapTestCase):
""" tests the cosine similarity mapper """

def __init__(self, *args, **kwargs):
super(TestCosineSimilarityMapper, self).__init__(
map.map_cosine_similarity,
'tests/fixtures/cosine_similarity_mapper.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the same number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 1)

def test_emits_line_for_each_word_in_each_file(self):
"""
ensures that the same number of lines of input and output are emitted
"""
self.lines_out_equals_lines_in()


class TestCosineSimilarityReducer(tu.ReduceTestCase):
""" tests the cosine simiarity reducer """

def __init__(self, *args, **kwargs):
super(TestCosineSimilarityReducer, self).__init__(
red.KEYS, red.VALUES,
red.reduce_cosine_similarity,
'tests/fixtures/cosine_similarity_reducer.txt',
*args, **kwargs
)
self.default_args = {'precision': 6}

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the same number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 1, args=self.default_args)

def test_sums_up_correct_values(self):
"""
ensures that the correct sum is computed
"""
for key, value in self.run_reducer_tokenize(args=self.default_args):
if key[0] == 'file1' and key[1] == 'file2':
self.assertEqual(value[0], '1.000000')
else:
self.assertEqual(value[0], '1.500000')
51 changes: 51 additions & 0 deletions tests/unit_tests/tf_idf_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python

from math import log
import tests.test_utils as tu
import map_reduce_utils as mru
import tf_idf_map as map


class TestTfIdfMapper(tu.MapTestCase):
""" tests the tf-idf mapper """

def __init__(self, *args, **kwargs):
super(TestTfIdfMapper, self).__init__(
map.map_tf_idf,
'tests/fixtures/tf_idf_mapper.txt',
*args, **kwargs
)
self.default_args = {'corpus_size': 3, 'precision': 5}

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 1, args=self.default_args)

def test_computes_correct_tfidf_score(self):
"""
tests that the correct tfidf value is emitted
"""
results = self.run_mapper(args=self.default_args)
results = mru.tokenize_key_value_pair(results[0])
computed_tfidf = float(results[1][0])
expected_tf = (7.0 / 12.0)
expected_idf = log((float(self.default_args['corpus_size']) / 2.0), 10)
expected_tfidf = expected_tf * expected_idf
expected_tfidf = round(expected_tfidf, self.default_args['precision'])
self.assertEqual(expected_tfidf, computed_tfidf)

def test_has_correcct_precision(self):
"""
tests to ensure that the precision argument is ensured.
"""
# we split on '.' and then check the length of the string after
# the period
precision_to_test = 8
results = self.run_mapper(args={'precision': precision_to_test,
'corpus_size': 9})
result = mru.tokenize_key_value_pair(results[0])
computed_tfidf = result[1][0]
computed_precision = len(computed_tfidf.strip().split('.')[1])
self.assertEqual(precision_to_test, computed_precision)
67 changes: 67 additions & 0 deletions tests/unit_tests/word_count_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python

import tests.test_utils as tu
import word_count_map as map
import word_count_red as red


class TestWordCountMapper(tu.MapTestCase):
""" tests the word count mapper """

def __init__(self, *args, **kwargs):
super(TestWordCountMapper, self).__init__(
map.map_word_count,
'tests/fixtures/word_count_mapper.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are returned
"""
self.has_correct_number_of_keys_and_values(1, 2)

def test_lines_out_equals_lines_in(self):
"""
ensures that a line is emmited for each line of input
"""
self.lines_out_equals_lines_in()


class TestWordCountReducer(tu.ReduceTestCase):
""" tests the word count reducer """

def __init__(self, *args, **kwargs):
super(TestWordCountReducer, self).__init__(
red.KEYS, red.VALUES,
red.reduce_word_count,
'tests/fixtures/word_count_reducer.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 2)

def test_sums_up_correct_values(self):
"""
ensures that the correct sum is calculated
"""
for result in self.run_reducer_tokenize():
if result[0][1] == 'file1':
self.assertEqual(int(result[1][1]), 11)
elif result[0][1] == 'file2':
self.assertEqual(int(result[1][1]), 16)
elif result[0][1] == 'file3':
self.assertEqual(int(result[1][1]), 26)
else:
self.fail('unknown filename')

def test_lines_out_equals_lines_in(self):
"""
test to ensure that the number of lines of input is equal to
the number of lines of output.
"""
self.lines_out_equals_lines_in()
74 changes: 74 additions & 0 deletions tests/unit_tests/word_frequency_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python

import re
import tests.test_utils as tu
import word_freq_map as map
import word_freq_red as red


class TestWordFrequencyMapper(tu.MapTestCase):
"""
tests basic functionality of the word frequency mapper
"""

def __init__(self, *args, **kwargs):
super(TestWordFrequencyMapper, self).__init__(
map.map_word_frequency,
'tests/fixtures/word_frequency_mapper.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 1)

def test_emits_line_for_each_word_in_each_file(self):
"""
ensures that a line is emitted for each word in each file
"""
with open(self.default_fixture) as f:
output = self.run_mapper()
input = f.readlines()
num_files = len(input)
total_words = 0
for line in input:
total_words += len(line.strip().split())
self.assertEqual(len(output), total_words - num_files)

def test_appends_one(self):
"""
tests that 1 is appended to each line of output
"""
ends_with_one = '.*1$'
self.are_all_matches(re.compile(ends_with_one))


class TestWordFrequencyReducer(tu.ReduceTestCase):
""" tests the word frequency reducer """

def __init__(self, *args, **kwargs):
super(TestWordFrequencyReducer, self).__init__(
red.KEYS, red.VALUES,
red.reduce_word_frequency,
'tests/fixtures/word_frequency_reducer.txt',
*args, **kwargs
)

def test_has_correct_number_of_keys_and_values(self):
"""
ensures that the correct number of keys and values are emitted
"""
self.has_correct_number_of_keys_and_values(2, 1)

def test_sum_of_output_equals_length_of_input(self):
"""
tests that each line is accounted for in the sum produced as output
"""
output_total_sum = 0
for line in self.run_reducer_tokenize():
output_total_sum += int(line[1][0])
with open(self.default_fixture) as f:
input_len = len(f.readlines())
self.assertEqual(output_total_sum, input_len)
Loading

0 comments on commit fd85fc5

Please sign in to comment.