Skip to content

Commit

Permalink
lag transforms (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Nov 3, 2023
1 parent f6e00d3 commit 47fb948
Show file tree
Hide file tree
Showing 15 changed files with 1,576 additions and 160 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

defaults:
run:
shell: bash -l {0}
shell: bash

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -27,7 +27,7 @@ jobs:
platform-id: manylinux_x86_64
- os: windows-2019
platform-id: win_amd64
- os: macos-latest
- os: macos-11
platform-id: macosx_x86_64
- os: macos-latest
platform-id: macosx_arm64
Expand All @@ -41,6 +41,11 @@ jobs:
CIBW_BUILD: cp38-${{ matrix.platform-id }}
CIBW_ARCHS: all
CIBW_TEST_SKIP: '*-macosx_arm64'
CIBW_BUILD_VERBOSITY: 3
CIBW_BEFORE_BUILD_MACOS: |
sudo xcode-select -s /Applications/Xcode_11.7.app/Contents/Developer
pip install toml
python scripts/disable_omp_arm64.py
- uses: actions/upload-artifact@v3
with:
Expand All @@ -59,12 +64,13 @@ jobs:
- name: Clone repo
uses: actions/checkout@v3

- name: Set up environment
uses: mamba-org/setup-micromamba@v1
- uses: actions/setup-python@v4
with:
environment-file: environment.yml
create-args: python=${{ matrix.python-version }}
cache-environment: true
python-version: ${{ matrix.python-version }}
cache: pip

- name: Install dependencies
run: pip install -r requirements-test.txt

- name: Download wheels
uses: actions/download-artifact@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*.so
*.dylib
*.dll
*.exp

# Fortran module files
*.mod
Expand Down
28 changes: 26 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
cmake_minimum_required(VERSION 3.25)
project(coreforecast)

option(USE_OPENMP "Enable OpenMP" ON)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

set(CMAKE_CXX_STANDARD 17)
if(USE_OPENMP)
if(APPLE)
find_package(OpenMP)
if(NOT OpenMP_FOUND)
# libomp 15.0+ from brew is keg-only, so have to search in other locations.
# See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
execute_process(COMMAND brew --prefix libomp
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
set(OpenMP_C_LIB_NAMES omp)
set(OpenMP_CXX_LIB_NAMES omp)
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
endif()
endif()
find_package(OpenMP REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

if(APPLE)
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
Expand All @@ -14,10 +35,9 @@ endif()
if(UNIX)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -O3 -Wall -Wextra -Wpedantic")
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ot /Oy /W4")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob2 /Ot /Oy /W4")
endif()


if(SKBUILD)
set(LIBRARY_OUTPUT_PATH ${SKBUILD_PLATLIB_DIR}/coreforecast/lib)
else()
Expand All @@ -29,3 +49,7 @@ add_library(coreforecast SHARED src/coreforecast.cpp)
if(MSVC)
set_target_properties(coreforecast PROPERTIES OUTPUT_NAME "libcoreforecast")
endif()

if(USE_OPENMP AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_libraries(coreforecast PUBLIC OpenMP::OpenMP_CXX)
endif()
163 changes: 143 additions & 20 deletions coreforecast/grouped_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
from importlib.resources import files


DTYPE_FLOAT32 = ctypes.c_int(0)
DTYPE_FLOAT64 = ctypes.c_int(1)

if platform.system() in ("Windows", "Microsoft"):
prefix = "Release"
extension = "dll"
Expand All @@ -30,63 +27,189 @@ def _data_as_void_ptr(arr: np.ndarray):


class GroupedArray:
def __init__(self, data: np.ndarray, indptr: np.ndarray):
def __init__(self, data: np.ndarray, indptr: np.ndarray, num_threads: int = 1):
data = np.ascontiguousarray(data, dtype=data.dtype)
if data.dtype == np.float32:
self.dtype = DTYPE_FLOAT32
self.prefix = "GroupedArrayFloat32"
elif data.dtype == np.float64:
self.dtype = DTYPE_FLOAT64
self.prefix = "GroupedArrayFloat64"
else:
self.dtype = DTYPE_FLOAT32
self.prefix = "GroupedArrayFloat32"
data = data.astype(np.float32)
self.data = data
if indptr.dtype != np.int32:
indptr = indptr.astype(np.int32)
self.indptr = indptr
self._handle = ctypes.c_void_p()
_LIB.GroupedArray_CreateFromArrays(
_LIB[f"{self.prefix}_Create"](
_data_as_void_ptr(data),
ctypes.c_int32(data.size),
indptr.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(indptr.size),
self.dtype,
ctypes.c_int(indptr.size),
ctypes.c_int(num_threads),
ctypes.byref(self._handle),
)

def __del__(self):
_LIB.GroupedArray_Delete(self._handle, self.dtype)
_LIB[f"{self.prefix}_Delete"](self._handle)

def __len__(self):
return self.indptr.size - 1

def __getitem__(self, i):
return self.data[self.indptr[i] : self.indptr[i + 1]]

def scaler_fit(self, stats_fn_name: str) -> np.ndarray:
stats = np.full((len(self), 2), np.nan, dtype=self.data.dtype)
stats_fn = _LIB[stats_fn_name]
stats_fn(
def scaler_fit(self, scaler_type: str) -> np.ndarray:
stats = np.full_like(self.data, np.nan, shape=(len(self), 2))
_LIB[f"{self.prefix}_{scaler_type}ScalerStats"](
self._handle,
self.dtype,
_data_as_void_ptr(stats),
)
return stats

def scaler_transform(self, stats: np.ndarray) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB.GroupedArray_ScalerTransform(
_LIB[f"{self.prefix}_ScalerTransform"](
self._handle,
_data_as_void_ptr(stats),
self.dtype,
_data_as_void_ptr(out),
)
return out

def scaler_inverse_transform(self, stats: np.ndarray) -> np.ndarray:
out = np.empty_like(self.data)
_LIB.GroupedArray_ScalerInverseTransform(
_LIB[f"{self.prefix}_ScalerInverseTransform"](
self._handle,
_data_as_void_ptr(stats),
self.dtype,
_data_as_void_ptr(out),
)
return out

def take_from_groups(self, k: int) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_TakeFromGroups"](
self._handle,
ctypes.c_int(k),
_data_as_void_ptr(out),
)
return out

def lag_transform(self, lag: int) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_LagTransform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
)
return out

def rolling_transform(
self, stat_name: str, lag: int, window_size: int, min_samples: int
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Rolling{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def rolling_update(
self, stat_name: str, lag: int, window_size: int, min_samples: int
) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_Rolling{stat_name}Update"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def seasonal_rolling_transform(
self,
stat_name: str,
lag: int,
season_length: int,
window_size: int,
min_samples: int,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_SeasonalRolling{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(season_length),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def seasonal_rolling_update(
self,
stat_name: str,
lag: int,
season_length: int,
window_size: int,
min_samples: int,
) -> np.ndarray:
out = np.empty_like(self.data, shape=len(self))
_LIB[f"{self.prefix}_SeasonalRolling{stat_name}Update"](
self._handle,
ctypes.c_int(lag),
ctypes.c_int(season_length),
ctypes.c_int(window_size),
ctypes.c_int(min_samples),
_data_as_void_ptr(out),
)
return out

def expanding_transform_with_aggs(
self,
stat_name: str,
lag: int,
aggs: np.ndarray,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Expanding{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
_data_as_void_ptr(aggs),
)
return out

def expanding_transform(
self,
stat_name: str,
lag: int,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
_LIB[f"{self.prefix}_Expanding{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
_data_as_void_ptr(out),
)
return out

def exponentially_weighted_transform(
self,
stat_name: str,
lag: int,
alpha: float,
) -> np.ndarray:
out = np.full_like(self.data, np.nan)
if self.prefix == "GroupedArrayFloat32":
alpha = ctypes.c_float(alpha)
else:
alpha = ctypes.c_double(alpha)
_LIB[f"{self.prefix}_ExponentiallyWeighted{stat_name}Transform"](
self._handle,
ctypes.c_int(lag),
alpha,
_data_as_void_ptr(out),
)
return out
Loading

0 comments on commit 47fb948

Please sign in to comment.