Source code for ecgan.config.dataclasses

"""Custom (partially nested) dataclasses describing configurations of individual components."""
# pylint: disable=C0103
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union

from ecgan.config.nested_dataclass import nested_dataclass
from ecgan.utils.custom_types import (
    DiscriminationStrategy,
    LatentDistribution,
    MetricOptimization,
    SamplingAlgorithm,
    TrackerType,
    Transformation,
    WeightInitialization,
)
from ecgan.utils.miscellaneous import generate_seed


[docs]@dataclass class OptimizerConfig: """Type hints for Optimizer dicts.""" _name = 'optimizer' NAME: str LR: float # Learning rate WEIGHT_DECAY: Optional[float] = None MOMENTUM: Optional[float] = None DAMPENING: Optional[float] = None BETAS: Optional[Tuple[float, float]] = None EPS: Optional[float] = None ALPHA: Optional[float] = None CENTERED: Optional[bool] = None
@nested_dataclass class InverseModuleConfig: """Type hints for the module config of an inverse mapping module.""" KERNEL_SIZES: List[int] LOSS: str NAME: str OPTIMIZER: OptimizerConfig @nested_dataclass class ReconstructionConfig: """Type hints for ReconstructionType dicts.""" STRATEGY: str @nested_dataclass class EmbeddingConfig: """Type hints for ReconstructionType dicts.""" CREATE_UMAP: bool LOAD_PRETRAINED_UMAP: bool @nested_dataclass class LatentWalkReconstructionConfig(ReconstructionConfig): """Type hints for latent walk reconstructions.""" MAX_RECONSTRUCTION_ITERATIONS: int EPSILON: float LATENT_OPTIMIZER: OptimizerConfig CRITERION: str ADAPT_LR: bool LR_THRESHOLD: float VERBOSE_STEPS: Optional[int] = None
[docs]@dataclass class LossConfig: """Type hints for a generic loss configuration.""" NAME: str GRADIENT_PENALTY_WEIGHT: Optional[float] = None CLIPPING_BOUND: Optional[float] = None REDUCTION: Optional[str] = None
[docs]@dataclass class BaseCNNConfig: """Generalized configuration of an CNN module.""" HIDDEN_CHANNELS: List[int]
[docs]@dataclass class BaseRNNConfig: """Generalized configuration of an RNN module.""" HIDDEN_DIMS: int # Amount of layers HIDDEN_SIZE: int # Size of each layer
[docs]@dataclass class TrackingConfig: """Config for tracking and logging information.""" TRACKER_NAME: str ENTITY: str PROJECT: str EXPERIMENT_NAME: str LOCAL_SAVE: bool SAVE_PDF: bool S3_CHECKPOINT_UPLOAD: bool # Currently only supported for W&B tracker LOG_LEVEL: str = 'info' @property def tracker_name(self) -> TrackerType: return TrackerType(self.TRACKER_NAME)
@nested_dataclass class ExperimentConfig: """ Parameters regarding the experiment itself. Includes information on the experiment, the used dataset and the directory from where the dataset is loaded. """ _name = 'experiment' TRACKER: TrackingConfig DATASET: str MODULE: str LOADING_DIR: str TRAIN_ON_GPU: bool @staticmethod def configure( # pylint: disable=R0913 entity: str, project: str, experiment_name: str, module: str, dataset: str, tracker: str = TrackerType.LOCAL.value, local_save: bool = False, save_pdf: bool = False, loading_dir: str = 'data', train_on_gpu: bool = True, s3_checkpoint_upload: bool = False, log_level: str = 'info', ) -> Dict: """Return a default experiment configuration.""" return { 'experiment': { 'TRACKER': { 'TRACKER_NAME': tracker, 'PROJECT': project, 'EXPERIMENT_NAME': experiment_name, 'ENTITY': entity, 'LOCAL_SAVE': local_save, 'SAVE_PDF': save_pdf, 'S3_CHECKPOINT_UPLOAD': s3_checkpoint_upload, 'LOG_LEVEL': log_level, }, 'MODULE': module, 'DATASET': dataset, 'LOADING_DIR': loading_dir, 'TRAIN_ON_GPU': train_on_gpu, } } @property def name(self): return self._name
[docs]@dataclass class PreprocessingConfig: """Create a preprocessing config object.""" _name = 'preprocessing' LOADING_DIR: str NUM_WORKERS: int WINDOW_LENGTH: int WINDOW_STEP_SIZE: int RESAMPLING_ALGORITHM: SamplingAlgorithm TARGET_SEQUENCE_LENGTH: int LOADING_SRC: Optional[str] NUM_SAMPLES: int
[docs] @staticmethod def configure( loading_src: Optional[str], target_sequence_length: int, loading_dir: str = 'data', num_workers: int = 4, window_length: int = 0, window_step_size: int = 0, resampling_algo: str = 'lttb', num_samples: int = 0, ): """Return a default preprocessing configuration.""" return { 'preprocessing': { 'LOADING_DIR': loading_dir, 'LOADING_SRC': loading_src, 'NUM_WORKERS': num_workers, 'WINDOW_LENGTH': window_length, 'WINDOW_STEP_SIZE': window_step_size, 'RESAMPLING_ALGORITHM': resampling_algo, 'TARGET_SEQUENCE_LENGTH': target_sequence_length, 'NUM_SAMPLES': num_samples, } }
@property def name(self): return self._name @property def resampling_algorithm(self) -> SamplingAlgorithm: return SamplingAlgorithm(self.RESAMPLING_ALGORITHM)
[docs]@dataclass class SyntheticPreprocessingConfig(PreprocessingConfig): """Preprocessing configuration for synthetic datasets.""" RANGE: Tuple[int, int] ANOMALY_PERCENTAGE: float NOISE_PERCENTAGE: float SYNTHESIS_SEED: int
[docs] @staticmethod def configure( # pylint: disable=R0913, W0221 loading_src: Optional[str], target_sequence_length: int, loading_dir: str = 'data', num_workers: int = 4, window_length: int = 0, window_step_size: int = 0, resampling_algo: str = 'lttb', num_samples: int = 0, data_range: Tuple[int, int] = (0, 25), anomaly_percentage: float = 0.2, noise_percentage: float = 0.5, synthesis_seed: int = 1337, ) -> Dict: """Provide a default configuration for a synthetic dataset.""" result_dict: Dict = PreprocessingConfig.configure( loading_src=loading_src, target_sequence_length=target_sequence_length, loading_dir=loading_dir, num_workers=num_workers, window_length=window_length, window_step_size=window_step_size, resampling_algo=resampling_algo, num_samples=num_samples, ) update_dict: Dict = { "RANGE": data_range, "ANOMALY_PERCENTAGE": anomaly_percentage, "NOISE_PERCENTAGE": noise_percentage, "SYNTHESIS_SEED": synthesis_seed, } result_dict['preprocessing'].update(update_dict) return result_dict
[docs]@dataclass class SinePreprocessingConfig(SyntheticPreprocessingConfig): """Preprocessing config for the synthetic sine dataset.""" AMPLITUDE: float = 3.0 FREQUENCY: float = 3.0 PHASE: float = 5.0 VERTICAL_TRANSLATION: float = 1.0
[docs] @staticmethod def configure( # pylint: disable=W0221, R0913 loading_src: Optional[str], target_sequence_length: int, loading_dir: str = 'data', num_workers: int = 4, window_length: int = 0, window_step_size: int = 0, resampling_algo: str = 'lttb', num_samples: int = 0, data_range: Tuple[int, int] = (0, 25), anomaly_percentage: float = 0.2, noise_percentage: float = 0.5, synthesis_seed: int = 1337, amplitude: float = 3, frequency: float = 3, phase: float = 5, vertical_translation: float = 1, ) -> Dict: """Return the default configuration for the sine dataset.""" result_dict = SyntheticPreprocessingConfig.configure( loading_src=loading_src, target_sequence_length=target_sequence_length, loading_dir=loading_dir, num_workers=num_workers, window_length=window_length, window_step_size=window_step_size, resampling_algo=resampling_algo, num_samples=num_samples, data_range=data_range, anomaly_percentage=anomaly_percentage, noise_percentage=noise_percentage, synthesis_seed=synthesis_seed, ) update_dict = { "AMPLITUDE": amplitude, "FREQUENCY": frequency, "PHASE": phase, "VERTICAL_TRANSLATION": vertical_translation, } result_dict['preprocessing'].update(update_dict) return result_dict
[docs]@dataclass class TrainerConfig: """Used to initialize a config for training.""" _name = "trainer" NUM_WORKERS: int CHANNELS: Union[int, List[int]] EPOCHS: int BATCH_SIZE: int TRANSFORMATION: str SPLIT_PATH: str SPLIT_METHOD: str SPLIT: Tuple[float, float] TRAIN_ONLY_NORMAL: bool CROSS_VAL_FOLDS: int CHECKPOINT_INTERVAL: int SAMPLE_INTERVAL: int BINARY_LABELS: bool MANUAL_SEED: int
[docs] @staticmethod def configure( # pylint: disable=R0913 transformation: Transformation = Transformation.NONE, num_workers: int = 4, epochs: int = 500, batch_size: int = 64, split_path: str = 'split.pkl', split_method: str = 'random', split: Tuple[float, float] = (0.85, 0.15), cross_val_folds: int = 5, checkpoint_interval: int = 10, sample_interval: int = 1, train_only_normal: bool = True, binary_labels: bool = True, channels: Union[int, List[int]] = 0, manual_seed: int = generate_seed(), ): """Return a default configuration for the trainer.""" return { 'trainer': { 'NUM_WORKERS': num_workers, 'CHANNELS': channels, 'EPOCHS': epochs, 'BATCH_SIZE': batch_size, 'TRANSFORMATION': transformation.value, 'SPLIT_PATH': split_path, 'SPLIT_METHOD': split_method, 'SPLIT': split, 'CROSS_VAL_FOLDS': cross_val_folds, 'CHECKPOINT_INTERVAL': checkpoint_interval, 'SAMPLE_INTERVAL': sample_interval, 'TRAIN_ONLY_NORMAL': train_only_normal, 'BINARY_LABELS': binary_labels, 'MANUAL_SEED': manual_seed, } }
@property def name(self): return self._name @property def transformation(self) -> Transformation: """Return an instance of the internal enum class `Transformation`.""" return Transformation(self.TRANSFORMATION)
[docs]@dataclass class WeightInitializationConfig: """Base weight initialization config.""" NAME: str @property def weight_init_type(self) -> WeightInitialization: return WeightInitialization(self.NAME)
[docs]@dataclass class NormalInitializationConfig(WeightInitializationConfig): """Base weight initialization config for drawing from a normal distribution.""" MEAN: float STD: float
[docs]@dataclass class UniformInitializationConfig(WeightInitializationConfig): """Base weight initialization config for drawing from a uniform distribution.""" LOWER_BOUND: float UPPER_BOUND: float
@nested_dataclass class ModuleConfig: """Generalized configuration of a module.""" _name = "module" @property def name(self): return self._name @nested_dataclass class BaseNNConfig(ModuleConfig): """Generic neural network configuration.""" OPTIMIZER: OptimizerConfig LOSS: LossConfig LAYER_SPECIFICATION: Union[BaseCNNConfig, BaseRNNConfig] WEIGHT_INIT: Union[WeightInitializationConfig, NormalInitializationConfig, UniformInitializationConfig] SPECTRAL_NORM: bool = False INPUT_NORMALIZATION: Optional[str] = None @nested_dataclass class AutoEncoderConfig(ModuleConfig): """Generalized configuration of a GAN module.""" LATENT_SIZE: int ENCODER: BaseNNConfig DECODER: BaseNNConfig TANH_OUT: bool LATENT_SPACE: str @property def latent_distribution(self) -> LatentDistribution: """Convenience conversion to internal enum type.""" return LatentDistribution(self.LATENT_SPACE) @nested_dataclass class VariationalAutoEncoderConfig(AutoEncoderConfig): """Generalized configuration of a GAN module.""" KL_BETA: float @nested_dataclass class GeneratorConfig(BaseNNConfig): """Generic generator configuration.""" TANH_OUT: bool = False @nested_dataclass class GANModuleConfig(ModuleConfig): """Generalized configuration of a GAN module.""" LATENT_SIZE: int GENERATOR: GeneratorConfig DISCRIMINATOR: BaseNNConfig GENERATOR_ROUNDS: int DISCRIMINATOR_ROUNDS: int LATENT_SPACE: str @property def latent_distribution(self) -> LatentDistribution: """Convenience conversion to internal enum type.""" return LatentDistribution(self.LATENT_SPACE) @nested_dataclass class EncoderGANConfig(GANModuleConfig): """Generalized configuration for the BeatGAN module.""" ENCODER: BaseNNConfig @nested_dataclass class VAEGANConfig(EncoderGANConfig): """VAEGAN config.""" KL_WARMUP: int KL_ANNEAL_ROUNDS: int KL_BETA: int @nested_dataclass class AdExperimentConfig: """Basic experimental settings for the anomaly detection process.""" _name = 'ad_experiment' TRACKER: TrackingConfig RUN_URI: str RUN_VERSION: str FOLD: int SAVE_DIR: str @property def name(self): return self._name
[docs]@dataclass class DetectionConfig: """Generalized configuration of a module.""" _name = "detection" DETECTOR: str BATCH_SIZE: int NUM_WORKERS: int AMOUNT_OF_RUNS: int SAVE_DATA: bool @property def name(self) -> str: return self._name
@nested_dataclass class ReconstructionDetectionConfig(DetectionConfig): """Generalized configuration of a module.""" EMBEDDING: EmbeddingConfig @nested_dataclass class GANDetectorConfig(ReconstructionDetectionConfig): """Base config for GAN based anomaly detection.""" DISCRIMINATION_STRATEGY: str AD_SCORE_STRATEGY: str NORMALIZE_ERROR: bool RECONSTRUCTION: Union[ReconstructionConfig, LatentWalkReconstructionConfig] @property def ad_score_strategy(self) -> MetricOptimization: return MetricOptimization(self.AD_SCORE_STRATEGY) @property def discrimination_strategy(self) -> DiscriminationStrategy: return DiscriminationStrategy(self.DISCRIMINATION_STRATEGY) @nested_dataclass class InverseDetectorConfig(GANDetectorConfig): """Config for anomaly detectors utilizing GAN inversion.""" RECONSTRUCTION: ReconstructionConfig INVERSE_MAPPING_URI: Optional[str] @nested_dataclass class GANLatentWalkConfig(GANDetectorConfig): """Config for anomaly detectors utilizing latent walks to approximate the reconstructed series.""" RECONSTRUCTION: LatentWalkReconstructionConfig INVERSE_MAPPING_URI: Optional[str]