Source code for base

# -*- coding: utf-8 -*-
# Copyright © 2015 Telecom ParisTech, TSI
# Auteur(s) : Romain Serizel
# the beta_nmf module for GPGPU is free software: you can redistribute it
# or modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation, either version 3
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU LesserGeneral Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
base.py
~~~~~~~
.. topic:: Contents

    The base module includes the basic functions such as
    beta-divergence, nonnegative random matrices generator or load_data."""

import numpy as np
import theano.tensor as T
from theano.ifelse import ifelse
from sklearn import preprocessing
import h5py


[docs]def beta_div(X, W, H, beta):
    """Compute beta divergence D(X|WH)

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar


    Returns
    -------
    div : Theano scalar
        beta divergence D(X|WH)"""
    div = ifelse(T.eq(beta, 2),
                 T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
                 ifelse(T.eq(beta, 0),
                        T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
                        ifelse(T.eq(beta, 1),
                               T.sum(T.mul(X, (T.log(X) -
                                           T.log(T.dot(H, W)))) +
                                     T.dot(H, W) - X),
                               T.sum(1. / (beta * (beta - 1.)) *
                                     (T.power(X, beta) +
                                      (beta - 1.) *
                                      T.power(T.dot(H, W), beta) -
                                      beta * T.power(T.mul(X, T.dot(H, W)),
                                                     (beta - 1)))))))
    return div


[docs]def load_data(f_name, scale=True, rnd=True):
    """Get data from H5FS file.

    Parameters
    ----------
    f_name : String
        file name
    scale : Boolean (default True)
        scale data to unit variance (scikit-learn function)
    rnd : Boolean (default True)
        randomize the data along time axis


    Returns
    -------
    data : Dictionnary
        dictionary containing the data

        x_train: numpy array

            train data matrix """

    train_df = h5py.File(f_name, 'r')
    x_train = train_df['x_train'][:]
    train_df.close()
    if scale:
        print "scaling..."
        x_train = preprocessing.scale(x_train, with_mean=False)
    print "Total dataset size:"
    print "n train samples: %d" % x_train.shape[0]
    print "n features: %d" % x_train.shape[1]

    if rnd:
        print "Radomizing..."
        np.random.shuffle(x_train)

    data = dict(
        x_train=x_train,
    )
    return data


[docs]def nnrandn(shape):
    """generates randomly a nonnegative ndarray of given shape

    Parameters
    ----------
    shape : tuple
        The shape


    Returns
    -------
    out : array of given shape
        The non-negative random numbers
    """
    return np.abs(np.random.randn(*shape))