Source code for pysdkit.data._time_series
# -*- coding: utf-8 -*-
"""
Created on 2025/02/10 18:35:29
@author: Whenxuan Wang
@email: wwhenxuan@gmail.com
Generate time series test examples
"""
import numpy as np
from typing import Optional, Union
def simulate_seasonal_term(
periodicity: Union[float, np.ndarray],
total_cycles: Union[float, np.ndarray],
noise_std: Union[float, np.ndarray] = 1.0,
harmonics=None,
) -> np.ndarray:
"""
This function is adapted from the famous statistical analysis library in Python, statsmodels.
It synthesizes time series data based on a specified periodicity, number of cycles, and noise.
Code reference: https://www.statsmodels.org/stable/examples/notebooks/generated/statespace_seasonal.html
:param periodicity: The length of one cycle in the seasonal pattern (e.g., the number of time steps per cycle).
:param total_cycles: The total number of full cycles to generate in the time series data.
:param noise_std: The standard deviation of the noise to be added to the seasonal components.
:param harmonics: The number of harmonics (sinusoidal components) used to generate the seasonal pattern.
If None, it defaults to half the periodicity.
:return: A numpy array representing the generated time series data with seasonal behavior.
"""
# Calculate the total duration of the time series (in terms of time steps).
duration = periodicity * total_cycles
# Ensure that the total duration is an integer.
assert duration == int(duration)
duration = int(duration)
# Set the number of harmonics if not provided. Default is half the periodicity.
harmonics = harmonics if harmonics else int(np.floor(periodicity / 2))
# Calculate the angular frequency corresponding to the periodicity.
lambda_p = 2 * np.pi / float(periodicity)
# Initialize the seasonal noise components for each harmonic (both cosine and sine components).
gamma_jt = noise_std * np.random.randn(harmonics)
gamma_star_jt = noise_std * np.random.randn(harmonics)
# Define the total number of time steps, including extra time for burn-in.
total_timesteps = 100 * duration # Pad for burn-in phase
# Initialize an array to hold the generated time series.
series = np.zeros(total_timesteps)
# Loop over each time step to generate the time series data.
for t in range(total_timesteps):
# Initialize new gamma values for the next time step.
gamma_jtp1 = np.zeros_like(gamma_jt)
gamma_star_jtp1 = np.zeros_like(gamma_star_jt)
# For each harmonic, update its seasonal components (cosine and sine) based on the previous values.
for j in range(1, harmonics + 1):
# Calculate the cosine and sine values for this harmonic.
cos_j = np.cos(lambda_p * j)
sin_j = np.sin(lambda_p * j)
# Update the gamma values for the next time step using a recursive process.
gamma_jtp1[j - 1] = (
gamma_jt[j - 1] * cos_j
+ gamma_star_jt[j - 1] * sin_j
+ noise_std * np.random.randn()
)
gamma_star_jtp1[j - 1] = (
-gamma_jt[j - 1] * sin_j
+ gamma_star_jt[j - 1] * cos_j
+ noise_std * np.random.randn()
)
# Assign the new gamma values to the time series for the current time step.
series[t] = np.sum(gamma_jtp1)
# Move to the next time step.
gamma_jt = gamma_jtp1
gamma_star_jt = gamma_star_jtp1
# After generating the series, discard the burn-in period and return the desired portion of the time series.
wanted_series = series[-duration:] # Discard burn-in phase
return wanted_series
[docs]
def generate_time_series(
duration: int = 300,
periodicities: np.ndarray = np.array([10, 30, 50]),
num_harmonics: np.ndarray = np.array([3, 2, 2]),
std: np.ndarray = np.array([2, 3, 5]),
seed: Optional[int] = 42,
) -> np.ndarray:
"""
Generates a time series data by combining multiple seasonal components with different periodicities,
numbers of harmonics, and noise levels. Each component represents a periodic signal, and the final
series is the sum of these components.
:param duration: The length of the time series to generate, in terms of number of time steps.
:param periodicities: An array of the periodicities (number of time steps per cycle) for each seasonal component.
:param num_harmonics: An array specifying the number of harmonics (sinusoidal components) to use for each seasonal component.
:param std: An array of standard deviations of the noise for each seasonal component. The noise is added to each harmonic.
:param seed: A random seed for reproducibility. If None, a random seed is used.
:return: A numpy array representing the generated time series that combines all the seasonal components.
"""
# Set the random seed for reproducibility of the results (if provided).
np.random.seed(seed=seed)
# List to store the seasonal components that will be combined to form the final series.
terms = []
# Loop through each input periodicity and generate a seasonal time series for each.
for ix, _ in enumerate(periodicities):
# For each periodicity, generate a seasonal time series using the simulate_seasonal_term function.
s = simulate_seasonal_term(
periodicity=periodicities[
ix
], # The length of the cycle for this component.
total_cycles=duration
/ periodicities[
ix
], # Number of cycles to generate based on the total duration.
harmonics=num_harmonics[ix], # Number of harmonics for this component.
noise_std=std[ix], # Standard deviation of noise for this component.
)
# Append the generated seasonal component to the list of terms.
terms.append(s)
# Combine all the seasonal components (terms) by summing them together element-wise.
series = np.sum(terms, axis=0)
# Return the final time series, which is a combination of the different seasonal components.
return series