diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1a9d57d..40624ab 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -39,7 +39,7 @@ you can run tests by making use of the `Makefile` and From the project root, call: - `make test` to run tests and coverage -- `make pep` to run style checks +- `pre-commit run` to run style checks (Ruff and some additional hooks) ## Building the documentation diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 87f9b0f..400b777 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -60,7 +60,7 @@ jobs: - name: Check formatting if: ${{ matrix.platform == 'ubuntu-22.04' && matrix.python-version == '3.11' }} run: | - make pep + make check-manifest pre-commit run --all-files || ( git status --short ; git diff ; exit 1 ) - name: Test with pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d06339..75fde44 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,27 +1,20 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-json - - id: check-ast - - id: check-added-large-files - - id: check-case-conflict - - id: check-docstring-first -- repo: https://github.com/pycqa/isort - rev: "5.12.0" - hooks: - - id: isort -- repo: https://github.com/psf/black - rev: "23.9.1" - hooks: - - id: black -- repo: https://github.com/pycqa/flake8 - rev: "6.1.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-docstrings] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-json + - id: check-ast + - id: check-added-large-files + - id: check-case-conflict + - id: check-docstring-first +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.1 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/Makefile b/Makefile index a146fd6..d1e13a0 100755 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -.PHONY: all inplace test flake pydocstyle check-manifest isort black pep build-doc dist-build +.PHONY: all inplace test check-manifest build-doc dist-build -all: inplace pep test build-doc dist-build +all: inplace check-manifest test build-doc dist-build inplace: @echo "Installing pybv" @@ -12,28 +12,10 @@ test: @echo "Running pytest: test modules" @pytest --cov=./pybv --cov-report=xml --verbose -flake: - @echo "Running flake8" - @flake8 --docstring-convention numpy --count pybv - -pydocstyle: - @echo "Running pydocstyle" - @pydocstyle - check-manifest: @echo "Running check-manifest" @check-manifest -isort: - @echo "Running check-manifest" - @isort . - -black: - @echo "Running black" - @black . - -pep: flake pydocstyle check-manifest isort black - build-doc: @echo "Building documentation" make -C docs/ clean diff --git a/docs/conf.py b/docs/conf.py index 8319c1e..d3db897 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,7 +45,7 @@ # General information about the project. project = "pybv" -copyright = "2018-{}, pybv developers".format(date.today().year) +copyright = f"2018-{date.today().year}, pybv developers" # noqa:A001 author = "pybv developers" version = pybv.__version__ release = version diff --git a/docs/sphinxext/gh_substitutions.py b/docs/sphinxext/gh_substitutions.py index bc60766..02b0054 100644 --- a/docs/sphinxext/gh_substitutions.py +++ b/docs/sphinxext/gh_substitutions.py @@ -10,8 +10,12 @@ from docutils.parsers.rst.roles import set_classes -def gh_role(name, rawtext, text, lineno, inliner, options={}, content=[]): +def gh_role(name, rawtext, text, lineno, inliner, options=None, content=None): """Link to a GitHub issue.""" + if content is None: + content = [] + if options is None: + options = {} try: # issue/PR mode (issues/PR-num will redirect to pull/PR-num) int(text) diff --git a/pybv/__init__.py b/pybv/__init__.py index 92eb443..22ee46a 100644 --- a/pybv/__init__.py +++ b/pybv/__init__.py @@ -1,7 +1,7 @@ """A lightweight I/O utility for the BrainVision data format.""" - __version__ = "0.8.0.dev5" + from .io import write_brainvision __all__ = ["write_brainvision"] diff --git a/pybv/_export.py b/pybv/_export.py index d0cabfe..c843d8e 100644 --- a/pybv/_export.py +++ b/pybv/_export.py @@ -19,31 +19,31 @@ def _export_mne_raw(*, raw, fname, events=None, overwrite=False): The raw data to export. fname : str | pathlib.Path The name of the file where raw data will be exported to. Must end with - ``".vhdr"``, and accompanying *.vmrk* and *.eeg* files will be written - inside the same directory. + ``".vhdr"``, and accompanying *.vmrk* and *.eeg* files will be written inside + the same directory. events : np.ndarray | None - Events to be written to the marker file (*.vmrk*). When array, must be in - `MNE-Python format `_. - When ``None`` (default), events will be written based on ``raw.annotations``. + Events to be written to the marker file (*.vmrk*). If array, must be in + `MNE-Python format `_. If + ``None`` (default), events will be written based on ``raw.annotations``. overwrite : bool Whether or not to overwrite existing data. Defaults to ``False``. + """ - # Prepare file location + # prepare file location if not str(fname).endswith(".vhdr"): raise ValueError("`fname` must have the '.vhdr' extension for BrainVision.") fname = Path(fname) folder_out = fname.parents[0] fname_base = fname.stem - # Prepare data from raw + # prepare data from raw data = raw.get_data() # gets data starting from raw.first_samp sfreq = raw.info["sfreq"] # in Hz meas_date = raw.info["meas_date"] # datetime.datetime ch_names = raw.ch_names - # write voltage units as micro-volts and all other units without - # scaling - # We write units that we don't know as n/a + # write voltage units as micro-volts and all other units without scaling + # write units that we don't know as n/a unit = [] for ch in raw.info["chs"]: if ch["unit"] == FIFF.FIFF_UNIT_V: @@ -54,9 +54,9 @@ def _export_mne_raw(*, raw, fname, events=None, overwrite=False): unit.append(_unit2human.get(ch["unit"], "n/a")) unit = [u if u != "NA" else "n/a" for u in unit] - # We enforce conversion to float32 format - # XXX: Could add a feature that checks data and optimizes `unit`, `resolution`, - # and `format` so that raw.orig_format could be retained if reasonable. + # enforce conversion to float32 format + # XXX: Could add a feature that checks data and optimizes `unit`, `resolution`, and + # `format` so that raw.orig_format could be retained if reasonable. if raw.orig_format != "single": warn( f"Encountered data in '{raw.orig_format}' format. " @@ -67,12 +67,12 @@ def _export_mne_raw(*, raw, fname, events=None, overwrite=False): fmt = "binary_float32" resolution = 0.1 - # Handle events + # handle events # if we got an ndarray, this is in MNE-Python format msg = "`events` must be None or array in MNE-Python format." if events is not None: - # Subtract raw.first_samp because brainvision marks events starting from - # the first available data point and ignores the raw.first_samp + # Subtract raw.first_samp because brainvision marks events starting from the + # first available data point and ignores the raw.first_samp assert isinstance(events, np.ndarray), msg assert events.ndim == 2, msg assert events.shape[-1] == 3, msg @@ -107,14 +107,14 @@ def _mne_annots2pybv_events(raw): """Convert mne Annotations to pybv events.""" events = [] for annot in raw.annotations: - # Handle onset and duration: seconds to sample, - # relative to raw.first_samp / raw.first_time + # handle onset and duration: seconds to sample, relative to + # raw.first_samp / raw.first_time onset = annot["onset"] - raw.first_time onset = raw.time_as_index(onset).astype(int)[0] duration = int(annot["duration"] * raw.info["sfreq"]) - # Triage type and description - # Defaults to type="Comment", and the full description + # triage type and description + # defaults to type="Comment" and the full description etype = "Comment" description = annot["description"] for start in ["Stimulus/S", "Response/R", "Comment/"]: @@ -137,11 +137,11 @@ def _mne_annots2pybv_events(raw): ) if "ch_names" in annot: - # Handle channels + # handle channels channels = list(annot["ch_names"]) event_dict["channels"] = channels - # Add a "pybv" event + # add a "pybv" event events += [event_dict] return events diff --git a/pybv/io.py b/pybv/io.py index 6fa6245..46a5f41 100644 --- a/pybv/io.py +++ b/pybv/io.py @@ -43,123 +43,118 @@ def write_brainvision( Parameters ---------- data : np.ndarray, shape (n_channels, n_times) - The raw data to export. Voltage data is assumed to be in **volts** and - will be scaled as specified by `unit`. Non-voltage channels (as - specified by `unit`) are never scaled (e.g., `"°C"`). + The raw data to export. Voltage data is assumed to be in **volts** and will be + scaled as specified by `unit`. Non-voltage channels (as specified by `unit`) are + never scaled (e.g., `"°C"`). sfreq : int | float The sampling frequency of the data in Hz. ch_names : list of {str | int}, len (n_channels) The names of the channels. Integer channel names are converted to string. ref_ch_names : str | list of str, len (n_channels) | None - The name of the channel used as a reference during the recording. If - references differed between channels, you may supply a list of - reference channel names corresponding to each channel in `ch_names`. - If ``None`` (default), assume that all channels are referenced to a - common channel that is not further specified (BrainVision default). - - .. note:: The reference channel name specified here does not need to - appear in `ch_names`. It is permissible to specify a - reference channel that is not present in `data`. + The name of the channel used as a reference during the recording. If references + differ between channels, you may supply a list of reference channel names + corresponding to each channel in `ch_names`. If ``None`` (default), assume that + all channels are referenced to a common channel that is not further specified + (BrainVision default). + + .. note:: The reference channel name specified here does not need to appear in + `ch_names`. It is permissible to specify a reference channel that is + not present in `data`. fname_base : str - The base name for the output files. Three files will be created - (*.vhdr*, *.vmrk*, *.eeg*) and all will share this base name. + The base name for the output files. Three files will be created (*.vhdr*, + *.vmrk*, *.eeg*), and all will share this base name. folder_out : str - The folder where output files will be saved. Will be created if it does - not exist yet. + The folder where output files will be saved. Will be created if it does not + exist. overwrite : bool Whether or not to overwrite existing files. Defaults to ``False``. events : np.ndarray, shape (n_events, {2, 3}) | list of dict, len (n_events) | None - Events to write in the marker file (*.vmrk*). Defaults to ``None`` - (not writing any events). - - If an array is passed, it must have either two or three columns and - consist of non-negative integers. The first column is always the - zero-based *onset* index of each event (corresponding to the - time dimension of the `data` array). The second column is a number - associated with the *description* of the event. The (optional) third - column specifies the *duration* of each event in samples (defaults to - ``1``). All events are written as *type* "Stimulus" and interpreted - as relevant to all *channels*. For more fine-grained control over how to - write events, pass a list of dict as described next. - - If list of dict is passed, each dict in the list corresponds to an - event and may have the following entries: + Events to write in the marker file (*.vmrk*). Defaults to ``None`` (not writing + any events). + + If an array is passed, it must have either two or three columns and consist of + non-negative integers. The first column is always the zero-based *onset* index + of each event (corresponding to the time dimension of the `data` array). The + second column is a number associated with the *description* of the event. The + (optional) third column specifies the *duration* of each event in samples + (defaults to ``1``). All events are written as *type* "Stimulus" and interpreted + as relevant to all *channels*. For more fine-grained control over how to write + events, pass a list of dict as described next. + + If a list of dict is passed, each dict in the list corresponds to an event and + may have the following entries: - ``"onset"`` : int - The zero-based index of the event onset, corresponding to the - time dimension of the `data` array. + The zero-based index of the event onset, corresponding to the time + dimension of the `data` array. - ``"duration"`` : int The duration of the event in samples (defaults to ``1``). - ``"description"`` : str | int - The description of the event. Must be a non-negative int when - `type` (see below) is either ``"Stimulus"`` or ``"Response"``, and may - be a str when `type` is ``"Comment"``. + The description of the event. Must be a non-negative int when `type` + (see below) is either ``"Stimulus"`` or ``"Response"``, and may be a str + when `type` is ``"Comment"``. - ``"type"`` : str The type of the event, must be one of ``{"Stimulus", "Response", - "Comment"}`` (defaults to ``"Stimulus"``). Additional types like - the known BrainVision types ``"New Segment"``, ``"SyncStatus"``, etc. - are currently not supported. + "Comment"}`` (defaults to ``"Stimulus"``). Additional types like the + known BrainVision types ``"New Segment"``, ``"SyncStatus"``, etc. are + currently not supported. - ``"channels"`` : str | list of {str | int} The channels that are impacted by the event. Can be ``"all"`` - (reflecting all channels), or a channel name, or a list of - channel names. An empty list means the same as ``"all"``. - Integer channel names are converted to strings, as in the - `ch_names` parameter. Defaults to ``"all"``. - - Note that ``onset`` and ``description`` MUST be specified in each - dict. - - .. note:: When specifying more than one but less than "all" channels - that are impacted by an event, ``pybv`` will write the same - event for as many times as channels are specified (see - :gh:`77` for a discussion). This is valid according to the - BrainVision specification, however for maximum compatibility - with other BrainVision readers, we do not (yet) recommend - using this feature. + (reflecting all channels), or a channel name, or a list of channel + names. An empty list means the same as ``"all"``. Integer channel names + are converted to strings, as in the `ch_names` parameter. Defaults to + ``"all"``. + + Note that ``"onset"`` and ``"description"`` MUST be specified in each dict. + + .. note:: When specifying more than one but less than "all" channels that are + impacted by an event, ``pybv`` will write the same event for as many + times as channels are specified (see :gh:`77` for a discussion). This + is valid according to the BrainVision specification, but for maximum + compatibility with other BrainVision readers, we do not recommend + using this feature yet. resolution : float | np.ndarray, shape (n_channels,) - The resolution in `unit` in which you'd like the data to be stored. If - float, the same resolution is applied to all channels. If array with - n_channels elements, each channel is scaled with its own corresponding - resolution from the array. Note that `resolution` is applied on top - of the default resolution that a data format (see `fmt`) has. For - example, the ``"binary_int16"`` format by design has no floating point - support, but when scaling the data in µV for ``0.1`` resolution (default), - accurate writing for all values >= 0.1 µV is guaranteed. In contrast, - the ``"binary_float32"`` format by design already supports floating points - up to 1e-6 resolution, and writing data in µV with 0.1 resolution - will thus guarantee accurate writing for all values >= 1e-7 µV + The resolution in `unit` in which you'd like the data to be stored. If float, + the same resolution is applied to all channels. If array with `n_channels` + elements, each channel is scaled with its own corresponding resolution from the + array. Note that `resolution` is applied on top of the default resolution that a + data format (see `fmt`) has. For example, the ``"binary_int16"`` format by + design has no floating point support, but when scaling the data in µV for + ``0.1`` resolution (default), accurate writing for all values ≥ 0.1 µV is + guaranteed. In contrast, the ``"binary_float32"`` format by design alread + supports floating points up to 1e-6 resolution, and writing data in µV with 0.1 + resolution will thus guarantee accurate writing for all values ≥ 1e-7 µV (``1e-6 * 0.1``). unit : str | list of str The unit of the exported data. This can be one of ``"V"``, ``"mV"``, ``"µV"`` - (or equivalently ``"uV"``) , or ``"nV"``, which will scale the data - accordingly. Defaults to ``"µV"``. Can also be a list of units with one unit - per channel. Non-voltage channels are stored "as is", for example - temperature might be available in ``"°C"``, which ``pybv`` will not scale. + (or equivalently ``"uV"``), or ``"nV"``, which will scale the data accordingly. + Defaults to ``"µV"``. Can also be a list of units with one unit per channel. + Non-voltage channels are stored "as is", for example temperature might be + available in ``"°C"``, which ``pybv`` will not scale. fmt : str Binary format the data should be written as. Valid choices are ``"binary_float32"`` (default) and ``"binary_int16"``. meas_date : datetime.datetime | str | None The measurement date specified as a :class:`datetime.datetime` object. - Alternatively, can be a str in the format "YYYYMMDDhhmmssuuuuuu" - ("u" stands for microseconds). Note that setting a measurement date - implies that one additional event is created in the *.vmrk* file. To - prevent this, set this parameter to ``None`` (default). + Alternatively, can be a string in the format "YYYYMMDDhhmmssuuuuuu" ("u" stands + for microseconds). Note that setting a measurement date implies that one + additional event is created in the *.vmrk* file. To prevent this, set this + parameter to ``None`` (default). Notes ----- - iEEG/EEG/MEG data is assumed to be in V, and ``pybv`` will scale these data - to µV by default. Any unit besides µV is officially unsupported in the - BrainVision specification. However, if one specifies other voltage units - such as mV or nV, we will still scale the signals accordingly in the - exported file. We will also write channels with non-voltage units such as - °C as is (without scaling). For maximum compatibility, all signals - should be written as µV. - - When passing a list of dict to `events`, the event ``type`` that can be passed - is currently limited to one of ``{"Stimulus", "Response", "Comment"}``. The - BrainVision specification itself does not limit event types, and future - extensions of ``pybv`` may permit additional or even arbitrary event types. + iEEG/EEG/MEG data is assumed to be in V, and ``pybv`` will scale these data to µV by + default. Any unit besides µV is officially unsupported in the BrainVision + specification. However, if one specifies other voltage units such as mV or nV, we + will still scale the signals accordingly in the exported file. We will also write + channels with non-voltage units such as °C as is (without scaling). For maximum + compatibility, all signals should be written as µV. + + When passing a list of dict to `events`, the event ``type`` that can be passed is + currently limited to one of ``{"Stimulus", "Response", "Comment"}``. The BrainVision + specification itself does not limit event types, and future extensions of ``pybv`` + may permit additional or even arbitrary event types. References ---------- @@ -169,19 +164,24 @@ def write_brainvision( -------- >>> data = np.random.random((3, 5)) >>> # write data with varying units - ... # Note channels A1 and A2 are expected to be in volt and will get - ... # rescaled to µV and mV respectively. - ... # TEMP is expected to be in some other unit (i.e., NOT volt), and - ... # will not get scaled (it is written "as is") - ... write_brainvision(data=data, sfreq=1, ch_names=["A1", "A2", "TEMP"], - ... folder_out="./", - ... fname_base="pybv_test_file", - ... unit=["µV", "mV", "°C"]) + ... # channels A1 and A2 are expected to be in volt and will get rescaled to µV and + ... # mV, respectively. + ... # TEMP is expected to be in some other unit (i.e., NOT volt), and will not get + ... # scaled (it is written "as is") + ... write_brainvision( + ... data=data, + ... sfreq=1, + ... ch_names=["A1", "A2", "TEMP"], + ... folder_out="./", + ... fname_base="pybv_test_file", + ... unit=["µV", "mV", "°C"] + ... ) >>> # remove the files >>> for ext in [".vhdr", ".vmrk", ".eeg"]: ... os.remove("pybv_test_file" + ext) - """ # noqa: E501 - # Input checks + + """ + # input checks if not isinstance(data, np.ndarray): raise ValueError(f"data must be np.ndarray, but found: {type(data)}") @@ -201,8 +201,8 @@ def write_brainvision( if len(data) != nchan: raise ValueError( - f"Number of channels in data ({len(data)}) does not " - f"match number of channel names ({len(ch_names)})" + f"Number of channels in data ({len(data)}) does not match number of " + f"channel names ({len(ch_names)})." ) if len(set(ch_names)) != nchan: @@ -210,7 +210,7 @@ def write_brainvision( events = _chk_events(events, ch_names, data.shape[1]) - # Ensure we have a list of strings as reference channel names + # ensure we have a list of strings as reference channel names if ref_ch_names is None: ref_ch_names = [""] * nchan # common but unspecified reference elif isinstance(ref_ch_names, str): @@ -218,30 +218,27 @@ def write_brainvision( else: if "" in ref_ch_names: msg = ( - f"ref_ch_names contains an empty string: {ref_ch_names}\n" - f"Empty strings are reserved values and not permitted " - f"as reference channel names." + f"ref_ch_names contains an empty string: {ref_ch_names}\nEmpty strings " + "are reserved values and not permitted as reference channel names." ) raise ValueError(msg) ref_ch_names = [str(ref_ch_name) for ref_ch_name in ref_ch_names] if len(ref_ch_names) != nchan: raise ValueError( - f"The number of reference channel names ({len(ref_ch_names)})" - f"must match the number of channels in your data ({nchan})" + f"The number of reference channel names ({len(ref_ch_names)}) must match " + f"the number of channels in your data ({nchan})." ) # ensure ref chs that are in data are zero for ref_ch_name in list(set(ref_ch_names) & set(ch_names)): if not np.allclose(data[ch_names.index(ref_ch_name), :], 0): raise ValueError( - f"The provided data for the reference channel " - f"{ref_ch_name} does not appear to be zero across " - f"all time points. This indicates that this channel " - f"either did not serve as a reference during the recording, " - f"or the data has been altered since. Please either pick a " - f"different reference channel, or omit the " - f"ref_ch_name parameter." + f"The provided data for the reference channel {ref_ch_name} does not " + "appear to be zero across all time points. This indicates that this " + "channel either did not serve as a reference during the recording, or " + "the data has been altered since. Please either pick a different " + "reference channel, or omit the ref_ch_name parameter." ) if not isinstance(sfreq, (int, float)): @@ -264,8 +261,8 @@ def write_brainvision( unit = [unit] * nchan if len(unit) != nchan: raise ValueError( - f"Number of channels in unit ({len(unit)}) does not " - f"match number of channel names ({nchan})" + f"Number of channels in unit ({len(unit)}) does not match number of channel" + f" names ({nchan})" ) units = unit @@ -281,15 +278,15 @@ def write_brainvision( # only show the warning once if a greek letter was encountered if show_warning: warn( - f"Encountered small Greek letter mu 'μ' or 'u' in unit: {unit}. " - f"Converting to micro sign 'µ'." + f"Encountered small Greek letter mu 'μ' or 'u' in unit: {unit}. Converting " + "to micro sign 'µ'." ) # measurement date if not isinstance(meas_date, (str, datetime.datetime, type(None))): raise ValueError( - f"`meas_date` must be of type str, datetime.datetime," - f' or None but is of type "{type(meas_date)}"' + f"`meas_date` must be of type str, datetime.datetime, or None but is of " + f'type "{type(meas_date)}"' ) elif isinstance(meas_date, datetime.datetime): meas_date = meas_date.strftime("%Y%m%d%H%M%S%f") @@ -297,12 +294,11 @@ def write_brainvision( pass elif not (meas_date.isdigit() and len(meas_date) == 20): raise ValueError( - "Got a str for `meas_date`, but it was not formatted " - "as expected. Please supply a str in the format: " - '"YYYYMMDDhhmmssuuuuuu".' + "Got a str for `meas_date`, but it was not formatted as expected. Please " + 'supply a str in the format: "YYYYMMDDhhmmssuuuuuu".' ) - # Create output file names/paths, checking if they already exist + # create output file names/paths, checking if they already exist folder_out_created = not op.exists(folder_out) os.makedirs(folder_out, exist_ok=True) eeg_fname = op.join(folder_out, fname_base + ".eeg") @@ -310,11 +306,11 @@ def write_brainvision( vhdr_fname = op.join(folder_out, fname_base + ".vhdr") for fname in (eeg_fname, vmrk_fname, vhdr_fname): if op.exists(fname) and not overwrite: - raise IOError( + raise OSError( f"File already exists: {fname}.\n" f"Consider setting overwrite=True." ) - # Write output files, but delete everything if we come across an error + # write output files, but delete everything if we come across an error try: _write_bveeg_file( eeg_fname, @@ -354,16 +350,16 @@ def write_brainvision( def _chk_events(events, ch_names, n_times): """Check that the events parameter is as expected. - This function will always return `events` as a list of dicts. - If `events` is ``None``, it will be an empty list. - If `events` is a list of dict, it will add missing keys to each dict with - default values, and it will -- for each ith event -- turn ``events[i]["channels"]`` - into a list of 1-based channel name indices, where ``0`` equals ``"all"``. - Event descriptions for ``"Stimulus"`` and ``"Response"`` will be reformatted to - a str of the format ``"S{:>n}"`` (or with a leading ``"R"`` for ``"Response"``), - where ``n`` is determined by the description with the most digits (minimum 3). - For each ith event, the onset (``events[i]["onset"]``) will be incremented by 1 - to comply with the 1-based indexing used in BrainVision marker files (*.vmrk*). + This function will always return `events` as a list of dicts. If `events` is + ``None``, it will be an empty list. If `events` is a list of dict, it will add + missing keys to each dict with default values, and it will, for each ith event, turn + ``events[i]["channels"]`` into a list of 1-based channel name indices, where ``0`` + equals ``"all"``. Event descriptions for ``"Stimulus"`` and ``"Response"`` will be + reformatted to a str of the format ``"S{:>n}"`` (or with a leading ``"R"`` for + ``"Response"``), where ``n`` is determined by the description with the most digits + (minimum 3). For each ith event, the onset (``events[i]["onset"]``) will be + incremented by 1 to comply with the 1-based indexing used in BrainVision marker + files (*.vmrk*). Parameters ---------- @@ -378,6 +374,7 @@ def _chk_events(events, ch_names, n_times): ------- events_out : list of dict, len (n_events) The preprocessed events, always provided as list of dict. + """ if not isinstance(events, (type(None), np.ndarray, list)): raise ValueError("events must be an array, a list of dict, or None") @@ -387,8 +384,8 @@ def _chk_events(events, ch_names, n_times): events_out = [] # default events - # NOTE: using "ch_names" as default for channels translates directly - # into "all" but is robust with respect to channels named "all" + # NOTE: using "ch_names" as default for channels translates directly into "all" but + # is robust with respect to channels named "all" event_defaults = dict(duration=1, type="Stimulus", channels=ch_names) # validate input: ndarray @@ -397,13 +394,12 @@ def _chk_events(events, ch_names, n_times): raise ValueError(f"When array, events must be 2D, but got {events.ndim}") if events.shape[1] not in (2, 3): raise ValueError( - "When array, events must have 2 or 3 columns, " - f"but got: {events.shape[1]}" + "When array, events must have 2 or 3 columns, but got: " + f"{events.shape[1]}" ) if not all([np.issubdtype(i, np.integer) for i in events.flat]): raise ValueError( - "When array, all entries in events must be int, but " - "found other types" + "When array, all entries in events must be int, but found other types" ) # convert array to list of dict @@ -432,8 +428,8 @@ def _chk_events(events, ch_names, n_times): # each item must be dict if not isinstance(event, dict): raise ValueError( - "When list, events must be a list of dict, but found " - "non-dict element in list" + "When list, events must be a list of dict, but found non-dict element " + "in list" ) # NOTE: We format 1 -> "S 1", 10 -> "S 10", 100 -> "S100", etc., @@ -449,13 +445,13 @@ def _chk_events(events, ch_names, n_times): twidth = max(3, int(np.ceil(np.log10(max_event_descr)))) # do full validation - for iev, event in enumerate(events_out): + for event in events_out: # required keys for required_key in ["onset", "description"]: if required_key not in event: raise ValueError( - "When list of dict, each dict in events must have " - "the keys 'onset' and 'description'" + "When list of dict, each dict in events must have the keys 'onset' " + "and 'description'" ) # populate keys with default if missing (in-place) @@ -470,20 +466,20 @@ def _chk_events(events, ch_names, n_times): if not (0 <= event["onset"] < n_times): raise ValueError( - "events: at least one onset sample is not in range of " - f"data (0-{n_times-1})" + "events: at least one onset sample is not in range of data (0-" + f"{n_times - 1})" ) if event["duration"] < 0: raise ValueError( - "events: at least one duration is negative. Durations " - "must be >= 0 samples." + "events: at least one duration is negative. Durations must be >= 0 " + "samples." ) if not (0 <= event["onset"] + event["duration"] <= n_times): raise ValueError( - "events: at least one event has a duration that exceeds " - f"the range of data (0-{n_times-1})" + "events: at least one event has a duration that exceeds the range of " + f"data (0-{n_times - 1})" ) event["onset"] = event["onset"] + 1 # VMRK uses 1-based indexing @@ -497,14 +493,14 @@ def _chk_events(events, ch_names, n_times): if event["type"] in ["Stimulus", "Response"]: if not isinstance(event["description"], int): raise ValueError( - f"events: when `type` is {event['type']}, " - "`description` must be non-negative int" + f"events: when `type` is {event['type']}, `description` must be " + "non-negative int" ) if event["description"] < 0: raise ValueError( - f"events: when `type` is {event['type']}, " - "descriptions must be non-negative ints." + f"events: when `type` is {event['type']}, descriptions must be " + "non-negative ints." ) tformat = event["type"][0] + "{:>" + str(twidth) + "}" @@ -514,14 +510,14 @@ def _chk_events(events, ch_names, n_times): assert event["type"] == "Comment" if not isinstance(event["description"], (int, str)): raise ValueError( - f"events: when `type` is {event['type']}, " - "`description` must be str or int" + f"events: when `type` is {event['type']}, `description` must be str" + " or int" ) event["description"] = str(event["description"]) # `channels` - # "all" becomes ch_names (list of all channel names) - # single str 'ch_name' becomes [ch_name] + # "all" becomes ch_names (list of all channel names), single str 'ch_name' + # becomes [ch_name] if not isinstance(event["channels"], (list, str)): raise ValueError("events: `channels` must be str or list of str") @@ -531,8 +527,8 @@ def _chk_events(events, ch_names, n_times): raise ValueError( "Found channel named 'all'. Your `channels` specification in " "events is also 'all'. This is ambiguous, because 'all' is a " - "reserved keyword. Either rename the channel called 'all', " - "or explicitly list all ch_names in `channels` in each event " + "reserved keyword. Either rename the channel called 'all', or " + "explicitly list all ch_names in `channels` in each event " "instead of using 'all'." ) event["channels"] = ch_names @@ -543,8 +539,8 @@ def _chk_events(events, ch_names, n_times): for ch in event["channels"]: if not isinstance(ch, (str, int)): raise ValueError( - "events: `channels` must be list of str or list of " - "int corresponding to ch_names" + "events: `channels` must be list of str or list of int " + "corresponding to ch_names" ) if str(ch) not in ch_names: @@ -557,15 +553,13 @@ def _chk_events(events, ch_names, n_times): if len(set(event["channels"])) != len(event["channels"]): raise ValueError("events: found duplicate channel names") - # warn if more than one but less than all channels are specified - # (experimental feature) + # warn if more than one but less than all channels are specified (experimental) if len(event["channels"]) > 1 and len(event["channels"]) < len(ch_names): warn( - "events: you specified at least one event that impacts more " - "than one but less than all channels in the data. " - "Such events will be written to .vmrk for as many times as " - "channels are specified.\n\n" - "This feature may not be supported by all BrainVision readers." + "events: you specified at least one event that impacts more than one " + "but less than all channels in the data. Such events will be written to" + " .vmrk for as many times as channels are specified.\n\nThis feature " + "may not be supported by all BrainVision readers." ) # convert channels to indices (1-based, 0="all") @@ -584,8 +578,8 @@ def _chk_fmt(fmt): """Check that the format string is valid, return (BV, numpy) datatypes.""" if fmt not in SUPPORTED_FORMATS: errmsg = ( - f"Data format {fmt} not supported. Currently supported " - f'formats are: {", ".join(SUPPORTED_FORMATS)}' + f"Data format {fmt} not supported. Currently supported formats are: " + f'{", ".join(SUPPORTED_FORMATS)}' ) raise ValueError(errmsg) return SUPPORTED_FORMATS[fmt] @@ -595,8 +589,8 @@ def _chk_multiplexed(orientation): """Validate an orientation, return if it is multiplexed or not.""" if orientation not in SUPPORTED_ORIENTS: errmsg = ( - f"Orientation {orientation} not supported. Currently " - f'supported orientations are: {", ".join(SUPPORTED_ORIENTS)}' + f"Orientation {orientation} not supported. Currently supported orientations" + f'are: {", ".join(SUPPORTED_ORIENTS)}' ) raise ValueError(errmsg) return orientation == "multiplexed" @@ -647,14 +641,12 @@ def _write_vmrk_file(vmrk_fname, eeg_fname, events, meas_date): def _scale_data_to_unit(data, units): """Scale `data` in Volts to `data` in `units`.""" - # only µV is supported by the BrainVision specs, but we support additional - # voltage prefixes (e.g., V, mV, nV); if such voltage units are used, we - # issue a warning + # only µV is supported by the BrainVision specs, but we support additional voltage + # prefixes (e.g., V, mV, nV); if such voltage units are used, we issue a warning voltage_units = set() - # similar to voltages other than µV, we also support arbitrary units, but - # since these are not supported by the BrainVision specs we issue a warning - # related signals + # similar to voltages other than µV, we also support arbitrary units, but since + # these are not supported by the BrainVision specs, we issue a warning non_voltage_units = set() # create a vector to multiply with to play nice with numpy @@ -671,19 +663,16 @@ def _scale_data_to_unit(data, units): if len(voltage_units) > 0: msg = ( - f"Encountered unsupported voltage units: " - f'{", ".join(voltage_units)}\n' - f"We will scale the data appropriately, but for maximum " - f"compatibility you should use µV for all channels." + f'Encountered unsupported voltage units: {", ".join(voltage_units)}\n' + "We will scale the data appropriately, but for maximum compatibility you " + "should use µV for all channels." ) warn(msg) if len(non_voltage_units) > 0: msg = ( - f"Encountered unsupported non-voltage units: " - f'{", ".join(non_voltage_units)}\n' - f"Note that the BrainVision format specification supports only " - f"µV." + f'Encountered unsupported non-voltage units: {", ".join(non_voltage_units)}' + "\nNote that the BrainVision format specification supports only µV." ) warn(msg) return data * scales @@ -699,7 +688,7 @@ def _write_vhdr_file( ch_names, ref_ch_names, orientation, - format, + format, # noqa: A002 resolution, units, ): @@ -776,7 +765,7 @@ def _check_data_in_range(data, dtype): return True -def _write_bveeg_file(eeg_fname, data, orientation, format, resolution, units): +def _write_bveeg_file(eeg_fname, data, orientation, format, resolution, units): # noqa: A002 """Write BrainVision data file.""" # check the orientation and format _chk_multiplexed(orientation) @@ -785,33 +774,33 @@ def _write_bveeg_file(eeg_fname, data, orientation, format, resolution, units): # convert the data to the desired unit data = _scale_data_to_unit(data, units) - # Invert the resolution so that we know how much to scale our data + # invert the resolution so that we know how much to scale our data scaling_factor = 1 / resolution data = data * np.atleast_2d(scaling_factor).T - # Convert the data to required format + # convert the data to required format if not _check_data_in_range(data, dtype): mod = " ('{resolution}')" if isinstance(resolution, np.ndarray): # if we have individual resolutions, do not print them all mod = "s" msg = ( - f"`data` can not be represented in '{format}' given " - f"the desired resolution{mod} and units ('{units}')." + f"`data` can not be represented in '{format}' given the desired " + f"resolution{mod} and units ('{units}')." ) if format == "binary_int16": msg += "\nPlease consider writing using 'binary_float32' format." raise ValueError(msg) data = data.astype(dtype=dtype) - # We always write data as little-endian without BOM - # `data` is already in native byte order due to numpy operations that - # result in copies of the `data` array (see above) + # we always write data as little-endian without BOM + # `data` is already in native byte order due to NumPy operations that result in + # copies of the `data` array (see above) assert data.dtype.byteorder == "=" # swap bytes if system architecture is big-endian if sys.byteorder == "big": # pragma: no cover data = data.byteswap() - # Save to binary + # save to binary data.ravel(order="F").tofile(eeg_fname) diff --git a/pybv/tests/test_bv_writer.py b/pybv/tests/test_bv_writer.py index 2e8e9a6..3470fba 100644 --- a/pybv/tests/test_bv_writer.py +++ b/pybv/tests/test_bv_writer.py @@ -294,8 +294,7 @@ def test_bv_writer_inputs(tmpdir): fname_base=fname, folder_out=tmpdir, ) - # Passing data that's not all-zero for a reference channel should raise - # an exception + # passing data that's not all-zero for a reference channel should raise an exception data_ = data.copy() data_[ch_names.index(ref_ch_name), :] = 5 with pytest.raises(ValueError, match="reference channel.*not.*zero"): @@ -307,7 +306,7 @@ def test_bv_writer_inputs(tmpdir): fname_base=fname, folder_out=tmpdir, ) - # Empty str is a reserved value for ref_ch_names + # empty str is a reserved value for ref_ch_names with pytest.raises(ValueError, match="Empty strings are reserved values"): _ref_ch_names = [""] + ch_names[1:] write_brainvision( @@ -318,7 +317,7 @@ def test_bv_writer_inputs(tmpdir): fname_base=fname, folder_out=tmpdir, ) - # Try ambiguous list of dict events with "all" ch + # try ambiguous list of dict events with "all" ch with pytest.raises(ValueError, match="Found channel named 'all'.*ambiguous"): write_brainvision( data=data[:1, :], @@ -502,21 +501,20 @@ def test_write_read_cycle(tmpdir, meas_date, ref_ch_names): @pytest.mark.parametrize("format", SUPPORTED_FORMATS.keys()) @pytest.mark.parametrize("resolution", resolutions) @pytest.mark.parametrize("unit", SUPPORTED_VOLTAGE_SCALINGS) -def test_format_resolution_unit(tmpdir, format, resolution, unit): +def test_format_resolution_unit(tmpdir, format, resolution, unit): # noqa: A002 """Test different combinations of formats, resolutions, and units. - This test would raise warnings for several cases of "unit" - (Encountered unsupported voltage units), and a specific warning - if "unit" is "uV" (Encountered small Greek letter mu). - We ignore those warnings throughout the test. + This test would raise warnings for several cases of "unit" ("Encountered unsupported + voltage units"), and a specific warning if "unit" is "uV" ("Encountered small Greek + letter mu"). We ignore those warnings throughout the test. - Each run of the test is furthermore expected to exit early - with a ValueError for combinations of "resolution" and "format" - that would result in data that cannot accurately be written. + Each run of the test is furthermore expected to exit early with a ValueError for + combinations of "resolution" and "format" that would result in data that cannot + accurately be written. """ - # Check whether this test will be numerically possible + # check whether this test will be numerically possible tmpdata = _scale_data_to_unit(data.copy(), [unit] * n_chans) - tmpdata = tmpdata * np.atleast_2d((1 / resolution)).T + tmpdata = tmpdata * np.atleast_2d(1 / resolution).T _, dtype = _chk_fmt(format) data_will_fit = _check_data_in_range(tmpdata, dtype) @@ -532,7 +530,7 @@ def test_format_resolution_unit(tmpdir, format, resolution, unit): ) if not data_will_fit: - # End this test early + # end this test early match = f"can not be represented in '{format}' given" with pytest.raises(ValueError, match=match): write_brainvision(**kwargs) @@ -542,14 +540,14 @@ def test_format_resolution_unit(tmpdir, format, resolution, unit): vhdr_fname = tmpdir / fname + ".vhdr" raw_written = mne.io.read_raw_brainvision(vhdr_fname=vhdr_fname, preload=True) - # Check that the correct units were written in the BV file + # check that the correct units were written in the BV file orig_units = [u for key, u in raw_written._orig_units.items()] assert len(set(orig_units)) == 1 if unit is not None: assert orig_units[0] == unit.replace("u", "µ") - # Check round trip of data: in binary_int16 format, the tolerance - # is given by the lowest resolution + # check round trip of data: in binary_int16 format, the tolerance is given by the + # lowest resolution if format == "binary_int16": absolute_tolerance = np.atleast_2d(resolution).min() else: @@ -608,7 +606,7 @@ def test_write_multiple_units(tmpdir, unit): absolute_tolerance = 0 assert_allclose(data, raw_written.get_data(), atol=absolute_tolerance) - # Check that the correct units were written in the BV file + # check that the correct units were written in the BV file orig_units = [u for key, u in raw_written._orig_units.items()] assert len(set(orig_units)) == 1 assert orig_units[0] == unit.replace("u", "µ") @@ -626,7 +624,7 @@ def test_write_multiple_units(tmpdir, unit): raw_written = mne.io.read_raw_brainvision(vhdr_fname=vhdr_fname, preload=True) - # Check that the correct units were written in the BV file + # check that the correct units were written in the BV file orig_units = [u for key, u in raw_written._orig_units.items()] assert len(set(orig_units)) == 2 assert all( @@ -663,7 +661,7 @@ def test_write_unsupported_units(tmpdir): absolute_tolerance = 0 assert_allclose(data, raw_written.get_data(), atol=absolute_tolerance) - # Check that the correct units were written in the BV file + # check that the correct units were written in the BV file orig_units = [u for key, u in raw_written._orig_units.items()] assert len(set(orig_units)) == 2 assert all([orig_units[idx] == unit for idx in range(n_chans - 1)]) @@ -768,12 +766,12 @@ def test_event_writing(tmpdir): # should be one more, because event[3] is written twice (once per channel) assert len(raw.annotations) == len(events) + 1 - # note: mne orders events by onset, use sorted + # note: MNE orders events by onset, use sorted onsets = np.array([ev["onset"] / raw.info["sfreq"] for ev in events]) onsets = sorted(onsets) + [1.0] # add duplicate event (due to channels) np.testing.assert_array_equal(raw.annotations.onset, onsets) - # mne does not (yet; at 1.0.3) read ch_names for annotations from vmrk + # MNE does not (yet; at 1.0.3) read ch_names for annotations from vmrk np.testing.assert_array_equal( [i for i in raw.annotations.ch_names], [() for i in range(len(events) + 1)] ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e4b728b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.ruff.lint] +select = ["A", "B006", "D", "E", "F", "I", "W", "UP"] +ignore = ["D203", "D213"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 023382d..14bebf7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,12 +4,8 @@ check-manifest pytest pytest-cov pytest-sugar -flake8>=3.9.2 -flake8-docstrings -pycodestyle +ruff sphinx sphinx-copybutton numpydoc -isort -black pre-commit diff --git a/setup.cfg b/setup.cfg index 03ad286..3bbbd3f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,15 +54,3 @@ export = [bdist_wheel] universal = true - -[flake8] -docstring-convention = numpy -# Recommend matching the black line length (default 88), -# rather than using the flake8 default of 79: -max-line-length = 88 -extend-ignore = - # See https://github.com/PyCQA/pycodestyle/issues/373 - E203 - -[isort] -profile=black diff --git a/setup.py b/setup.py index ec5a1af..0803ccc 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # get the version version = None -with open(os.path.join("pybv", "__init__.py"), "r") as fid: +with open(os.path.join("pybv", "__init__.py")) as fid: for line in (line.strip() for line in fid): if line.startswith("__version__"): version_str = line.split("=")[1].strip() diff --git a/specification/README.md b/specification/README.md index ee895ce..049fa41 100644 --- a/specification/README.md +++ b/specification/README.md @@ -1,4 +1,4 @@ -The `BrainVisionCoreDataFormat_1-0.pdf` file can be contains the specification +The file [`BrainVisionCoreDataFormat_1-0.pdf`](BrainVisionCoreDataFormat_1-0.pdf) contains the specification for the BrainVision Core Data Format (BVCDF). The document stored in this repository exists for backup reasons.