Source code for imputegap.algorithms.cdrec

import time
import ctypes as __native_c_types_import;

from imputegap.tools import utils



[docs]
def native_cdrec(__py_matrix, __py_rank, __py_epsilon, __py_iterations):
    """
    Perform matrix imputation using the CDRec algorithm with native C++ support.

    Parameters
    ----------
    __py_matrix : numpy.ndarray
        The input matrix with missing values (NaNs).
    __py_rank : int
        The truncation rank for matrix decomposition (must be greater than 0 and less than the number of columns).
    __py_epsilon : float
        The epsilon value, used as the threshold for stopping iterations based on difference.
    __py_iterations : int
        The maximum number of allowed iterations for the algorithm.

    Returns
    -------
    numpy.ndarray
        The recovered matrix after imputation.

    References
    ----------
    Khayati, M., Cudré-Mauroux, P. & Böhlen, M.H. Scalable recovery of missing blocks in time series with high and low cross-correlations. Knowl Inf Syst 62, 2257–2280 (2020). https://doi.org/10.1007/s10115-019-01421-7
    """

    shared_lib = utils.load_share_lib("lib_cdrec.so")

    __py_n = len(__py_matrix);
    __py_m = len(__py_matrix[0]);

    assert (__py_rank >= 0);
    assert (__py_rank < __py_m);
    assert (__py_epsilon > 0);
    assert (__py_iterations > 0);

    __ctype_size_n = __native_c_types_import.c_ulonglong(__py_n);
    __ctype_size_m = __native_c_types_import.c_ulonglong(__py_m);

    __ctype_rank = __native_c_types_import.c_ulonglong(__py_rank);
    __ctype_epsilon = __native_c_types_import.c_double(__py_epsilon);
    __ctype_iterations = __native_c_types_import.c_ulonglong(__py_iterations);

    # Native code uses linear matrix layout, and also it's easier to pass it in like this
    __ctype_matrix = utils.__marshal_as_native_column(__py_matrix);

    shared_lib.cdrec_imputation_parametrized(__ctype_matrix, __ctype_size_n, __ctype_size_m, __ctype_rank, __ctype_epsilon, __ctype_iterations);

    __py_imputed_matrix = utils.__marshal_as_numpy_column(__ctype_matrix, __py_n, __py_m);

    return __py_imputed_matrix;




[docs]
def cdrec(incomp_data, truncation_rank, iterations, epsilon, logs=True, lib_path=None):
    """
    CDRec algorithm for matrix imputation of missing values using Centroid Decomposition.

    Parameters
    ----------
    incomp_data : numpy.ndarray
        The input matrix with contamination (missing values represented as NaNs).
    truncation_rank : int
        The truncation rank for matrix decomposition (must be greater than 1 and smaller than the number of series).
    epsilon : float
        The learning rate (stopping criterion threshold).
    iterations : int
        The maximum number of iterations allowed for the algorithm.
    logs : bool, optional
        Whether to log the execution time (default is True).
    lib_path : str, optional
        Custom path to the shared library file (default is None).

    Returns
    -------
    numpy.ndarray
        The imputed matrix with missing values recovered.

    Example
    -------
    >>> recov_data = cdrec(incomp_data=incomp_data, truncation_rank=1, iterations=100, epsilon=0.000001, logs=True)
    >>> print(recov_data)

    """

    print(f"\t\t\t\t(PYTHON) CDRec: ({incomp_data.shape[0]},{incomp_data.shape[1]}) for rank {truncation_rank}, "
          f"epsilon {epsilon}, and iterations {iterations}...")

    start_time = time.time()  # Record start time

    # Call the C++ function to perform recovery
    recov_data = native_cdrec(incomp_data, truncation_rank, epsilon, iterations)

    end_time = time.time()

    if logs:
        print(f"\n\t\t> logs, imputation cdrec - Execution Time: {(end_time - start_time):.4f} seconds\n")

    return recov_data