torproject · juga0 · May 6, 2019 · May 6, 2019 · May 19, 2019 · May 19, 2019
diff --git a/sbws/lib/stem_bandwidth_file.py b/sbws/lib/stem_bandwidth_file.py
@@ -0,0 +1,71 @@
+"""Stem's Bandwidth file HEADER_ATTR part as it is in stem's commit
+658dd5281604eb9c63a91e529501947ecc65ef6b, which will be included in the next
+Stem's release, 1.8.0, except ``_date`` because depends on other stem's module.
+"""
+# XXX: Remove this file when stem releases 1.8.0.
+from ..util.timestamp import isostr_to_dt_obj
+
+# Converts header attributes to a given type. Malformed fields should be
+# ignored according to the spec.
+
+def _str(val):
+  return val  # already a str
+
+
+def _int(val):
+  return int(val) if (val and val.isdigit()) else None
+
+
+def _date(val):
+  try:
+    return isostr_to_dt_obj(val)
+  except ValueError:
+    return None  # not an iso formatted date
+
+
+def _csv(val):
+  return map(lambda v: v.strip(), val.split(',')) if val is not None else None
+
+
+# mapping of attributes => (header, type)
+
+HEADER_ATTR = {
+  # version 1.1.0 introduced headers
+
+  'version': ('version', _str),
+
+  'software': ('software', _str),
+  'software_version': ('software_version', _str),
+
+  'earliest_bandwidth': ('earliest_bandwidth', _date),
+  'latest_bandwidth': ('latest_bandwidth', _date),
+  'created_at': ('file_created', _date),
+  'generated_at': ('generator_started', _date),
+
+  # version 1.2.0 additions
+
+  'consensus_size': ('number_consensus_relays', _int),
+  'eligible_count': ('number_eligible_relays', _int),
+  'eligible_percent': ('percent_eligible_relays', _int),
+  'min_count': ('minimum_number_eligible_relays', _int),
+  'min_percent': ('minimum_percent_eligible_relays', _int),
+
+  # version 1.3.0 additions
+
+  'scanner_country': ('scanner_country', _str),
+  'destinations_countries': ('destinations_countries', _csv),
+
+  # version 1.4.0 additions
+
+  'time_to_report_half_network': ('time_to_report_half_network', _int),
+
+  'recent_stats.consensus_count': ('recent_consensus_count', _int),
+  'recent_stats.prioritized_relay_lists': ('recent_priority_list_count', _int),
+  'recent_stats.prioritized_relays': ('recent_priority_relay_count', _int),
+  'recent_stats.measurement_attempts': ('recent_measurement_attempt_count', _int),
+  'recent_stats.measurement_failures': ('recent_measurement_failure_count', _int),
+  'recent_stats.relay_failures.no_measurement': ('recent_measurements_excluded_error_count', _int),
+  'recent_stats.relay_failures.insuffient_period': ('recent_measurements_excluded_near_count', _int),
+  'recent_stats.relay_failures.insufficient_measurements': ('recent_measurements_excluded_few_count', _int),
+  'recent_stats.relay_failures.stale': ('recent_measurements_excluded_old_count', _int),
+}
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
 """Classes and functions that create the bandwidth measurements document
-(v3bw) used by bandwidth authorities."""
+(v3bw) used by bandwidth authorities.
+
+See https://gitweb.torproject.org/torspec.git/tree/bandwidth-file-spec.txt for
+more information on the meaning of the ``KeyValues``
+"""
 
 import copy
 import logging
@@ -15,6 +19,11 @@
                           TORFLOW_SCALING, SBWS_SCALING, TORFLOW_BW_MARGIN,
                           TORFLOW_OBS_LAST, TORFLOW_OBS_MEAN,
                           PROP276_ROUND_DIG, MIN_REPORT, MAX_BW_DIFF_PERC)
+
+# XXX: replace this line by:
-# XXX: replace this line by:
+try:
+    from stem.descriptor.bandwidth_file import HEADER_ATTR, _int
+except ImportError:
+    from .stem_bandwidth_file import HEADER_ATTR, _int
+except:
+    raise
-# XXX: replace this line by:
+try:
+    from stem.descriptor.bandwidth_file import HEADER_ATTR, _int
+except ImportError:
+    from .stem_bandwidth_file import HEADER_ATTR, _int
+except:
+    raise
+# from stem.descriptor.bandwidth_file import HEADER_ATTR, _int
+# when stem releases 1.8.0. More information in stem_bandwidth_file.py
+from .stem_bandwidth_file import HEADER_ATTR, _int
 from sbws.lib.resultdump import ResultSuccess, _ResultType
 from sbws.util.filelock import DirectoryLock
 from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str,
@@ -27,91 +36,34 @@
 KEYVALUE_SEP_V1 = '='
 KEYVALUE_SEP_V2 = ' '
 
-# NOTE: in a future refactor make make all the KeyValues be a dictionary
-# with their type, so that it's more similar to stem parser.
-
 # Header KeyValues
 # =================
-# List of the extra KeyValues accepted by the class
-EXTRA_ARG_KEYVALUES = ['software', 'software_version', 'file_created',
-                       'earliest_bandwidth', 'generator_started',
-                       'scanner_country', 'destinations_countries']
-# number_eligible_relays is the number that ends in the bandwidth file
-# ie, have not been excluded by one of the filters in 4. below
-# They should be call recent_measurement_included_count to be congruent
-# with the other KeyValues.
+
+# XXX: These constants need to be kept even if using Stem's header one.
+# Unless Stem's will add a property type to the attributes.
 STATS_KEYVALUES = ['number_eligible_relays', 'minimum_number_eligible_relays',
                    'number_consensus_relays', 'percent_eligible_relays',
                    'minimum_percent_eligible_relays']
 
-# KeyValues that count the number of relays that are in the bandwidth file,
-# but ignored by Tor when voting, because they do not have a
-# measured bandwidth.
-BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED = [
-    # Number of relays that were measured but all the measurements failed
-    # because of network failures or it was
-    # not found a suitable helper relay
-    'recent_measurements_excluded_error_count',
-    # Number of relays that have successful measurements but the measurements
-    # were not away from each other in X time (by default 1 day).
-    'recent_measurements_excluded_near_count',
-    # Number of relays that have successful measurements and they are away from
-    # each other but they are not X time recent.
-    # By default this is 5 days, which is the same time the older
-    # the measurements can be by default.
-    'recent_measurements_excluded_old_count',
-    # Number of relays that have successful measurements and they are away from
-    # each other and recent
-    # but the number of measurements are less than X (by default 2).
-    'recent_measurements_excluded_few_count',
-]
-# Added in #29591
-# NOTE: recent_consensus_count, recent_priority_list_count,
-# recent_measurement_attempt_count and recent_priority_relay_count
-# are not reset when the scanner is stop.
-# They will accumulate the values since the scanner was ever started.
-BW_HEADER_KEYVALUES_MONITOR = [
-    # 1.1 header: the number of different consensuses, that sbws has seen,
-    # since the last 5 days
-    'recent_consensus_count',
-    # 2.4 Number of times a priority list has been created
-    'recent_priority_list_count',
-    # 2.5 Number of relays that there were in a priority list
-    # [50, number of relays in the network * 0.05]
-    'recent_priority_relay_count',
-    # 3.6 header: the number of times that sbws has tried to measure any relay,
-    # since the last 5 days
-    # This would be the number of times a relays were in a priority list
-    'recent_measurement_attempt_count',
-    # 3.7 header: the number of times that sbws has tried to measure any relay,
-    # since the last 5 days, but it didn't work
-    # This should be the number of attempts - number of ResultSuccess -
-    # something else we don't know yet
-    # So far is the number of ResultError
-    'recent_measurement_failure_count',
-    # The time it took to report about half of the network.
-    'time_to_report_half_network',
-] + BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED
-BANDWIDTH_HEADER_KEY_VALUES_INIT = \
-    ['earliest_bandwidth', 'generator_started',
-     'scanner_country', 'destinations_countries']\
-    + STATS_KEYVALUES \
-    + BW_HEADER_KEYVALUES_MONITOR
-
-KEYVALUES_INT = STATS_KEYVALUES + BW_HEADER_KEYVALUES_MONITOR
-# List of all unordered KeyValues currently being used to generate the file
-UNORDERED_KEYVALUES = EXTRA_ARG_KEYVALUES + STATS_KEYVALUES + \
-                      ['latest_bandwidth'] + \
-                      BW_HEADER_KEYVALUES_MONITOR
-# List of all the KeyValues currently being used to generate the file
-ALL_KEYVALUES = ['version'] + UNORDERED_KEYVALUES
+# The stem's attribute name for these Keys starts with this string.
+BW_HEADER_KEYVALUES_RECENT_MEASUREMENTS_EXCLUDED = \
+    [k for n, (k, _) in HEADER_ATTR.items()
+     if n.startswith("recent_stats.relay_failures.")]
+
+# XXX: The following constants are kept for compatibility with the generator/
+# parser here. Use stem's parser/generator when possible and refactor this
+# code when stem releases 1.8.0.
+KEYVALUES_INT = [k for k, t in HEADER_ATTR.values() if t is _int]
+# # List of all unordered KeyValues currently being used to generate the file
+UNORDERED_KEYVALUES = [k for k, _ in HEADER_ATTR.values()]
+UNORDERED_KEYVALUES.remove('version')
 
 TERMINATOR = '====='
 
 # Bandwidth Lines KeyValues
 # =========================
 # Num header lines in v1.X.X using all the KeyValues
-NUM_LINES_HEADER_V1 = len(ALL_KEYVALUES) + 2
+NUM_LINES_HEADER_V1 = len(HEADER_ATTR) + 2
 LINE_TERMINATOR = TERMINATOR + LINE_SEP
 
 # KeyValue separator in Bandwidth Lines
@@ -250,20 +202,28 @@ class V3BWHeader(object):
           when the generator started
     """
     def __init__(self, timestamp, **kwargs):
+        # XXX: do not convert all the arguments to string here, convert them
+        # only in __str__ and check their actual types.
         assert isinstance(timestamp, str)
         for v in kwargs.values():
             assert isinstance(v, str)
         self.timestamp = timestamp
-        # KeyValues with default value when not given by kwargs
+
+        # Initialize all defined attrs,
+        # XXX: Since using the stem's name for the attributes will require
+        # other refactoring here, use the name in the actual file.
+        header_keys = [k for k, _ in HEADER_ATTR.values()]
+        [setattr(self, k, v) for k, v in kwargs.items() if k in header_keys]
+
+        # Then initialize the ones with defaults, if they weren't already.
+        # Ideally these defaults could be defined in the HEADER_ATTR constant.
         self.version = kwargs.get('version', SPEC_VERSION)
         self.software = kwargs.get('software', 'sbws')
         self.software_version = kwargs.get('software_version', __version__)
         self.file_created = kwargs.get('file_created', now_isodt_str())
         # latest_bandwidth should not be in kwargs, since it MUST be the
         # same as timestamp
         self.latest_bandwidth = unixts_to_isodt_str(timestamp)
-        [setattr(self, k, v) for k, v in kwargs.items()
-         if k in BANDWIDTH_HEADER_KEY_VALUES_INIT]
 
     def __str__(self):
         if self.version.startswith('1.'):
@@ -337,9 +297,10 @@ def from_lines_v1(cls, lines):
             log.warn('Terminator is not in lines')
             return None
         ts = lines[0]
+        header_keys = [k for k, _ in HEADER_ATTR.values()]
         kwargs = dict([l.split(KEYVALUE_SEP_V1)
                        for l in lines[:index_terminator]
-                       if l.split(KEYVALUE_SEP_V1)[0] in ALL_KEYVALUES])
+                       if l.split(KEYVALUE_SEP_V1)[0] in header_keys])
         h = cls(ts, **kwargs)
         # last line is new line
         return h, lines[index_terminator + 1:-1]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,8 +1,17 @@
 """Common pytest configuration for unit and integration tests."""
+import argparse
+import os.path
 import pytest
+from unittest import mock
+
+from sbws.lib import resultdump
+from sbws.util import config
 from sbws.util.parser import create_parser
 
 
+FAKE_TIME = 1557556118
+
+
 @pytest.fixture(scope='session')
 def parser():
     return create_parser()
@@ -29,3 +38,59 @@ def readlines(self, name):
             with self.open(name, "r") as f:
                 return f.readlines()
     return D(request.fspath.dirpath("data"))
+
+
+@pytest.fixture()
+def tests_data_dir(scope="global"):
+    """Root data directory, common to all the tests."""
+    here = os.path.abspath(os.path.dirname(__file__))
+    return os.path.join(here, "data")
+
+
+@pytest.fixture()
+def sbws_dir(tests_data_dir):
+    return os.path.join(tests_data_dir, ".sbws")
+
+
+# The following fixtures are similar to the ones defined in unit and
+# integration sub-directories, but they are for the tests data to be read,
+# not to write.
+@pytest.fixture()
+def sbws_args(tests_data_dir):
+    config_fpath = os.path.join(tests_data_dir, ".sbws.ini")
+    sbws_args = argparse.Namespace(**{'config': config_fpath})
+    return sbws_args
+
+
+@pytest.fixture()
+def sbws_conf(sbws_args, sbws_dir):
+    """Default configuration with sbws home in the tmp test dir."""
+    conf = config.get_config(sbws_args)
+    conf['paths']['sbws_home'] = sbws_dir
+    conf['paths']['v3bw_fname'] = "${v3bw_dname}/latest.v3bw"
+    return conf
+
+
+@pytest.fixture()
+@mock.patch('time.time')
+def measurements(mock_time, sbws_conf):
+    # Because load_recent_results_in_datadir will use time.time()
+    # to decide which results are recent.
+    mock_time.return_value = FAKE_TIME
+    print("measurements", sbws_conf.getpath('paths', 'datadir'))
+    measurements = resultdump \
+        .load_recent_results_in_datadir(
+            sbws_conf.getint('general', 'data_period'),
+            sbws_conf.getpath('paths', 'datadir'))
+    return measurements
+
+
+@pytest.fixture
+def bandwidth_file_headers():
+    d = {
+        'earliest_bandwidth': "2019-05-11T06:26:41",
+        'file_created': "2019-05-11T06:32:32",
+        'generator_started': "2019-05-11T06:26:28",
+        'latest_bandwidth': "2019-05-11T06:28:38",
+    }
+    return d
diff --git a/tests/data/.sbws.ini b/tests/data/.sbws.ini
@@ -0,0 +1,30 @@
+[general]
+# Days into the past that measurements are considered valid
+data_period = 1
+
+[paths]
+sbws_home = .sbws
+
+[scanner]
+country = ZZ
+
+[destinations]
+local = on
+
+[destinations.local]
+; url = https://localhost:28888/sbws.bin
+url = http://127.0.0.1:28888/sbws.bin
+verify = False
+country = ZZ
+
+[tor]
+extra_lines =
+    DirAuthority auth1 orport=2002 no-v2 v3ident=D7DBC517EFD2BA1A5012CF1BD0BB38F17C8160BD 127.10.0.1:2003 AA45C13025C037F056E734169891878ED0880231
+    DirAuthority auth2 orport=2002 no-v2 v3ident=4EE103A081F400E6622F5461D51782B876BB5C24 127.10.0.2:2003 E7B3C9A0040D628DAC88B0251AE6334D28E8F531
+    DirAuthority auth3 orport=2002 no-v2 v3ident=8B85069C7FC0593801E6491A34100264FCE28980 127.10.0.3:2003 35E3B8BB71C81355649AEC5862ECB7ED7EFDBC5C
+    TestingTorNetwork 1
+    NumCPUs 1
+
+[logging]
+level = debug
+to_stdout_level = ${level}