Source code for base
# -*- coding: utf-8 -*-
# Copyright © 2015 Telecom ParisTech, TSI
# Auteur(s) : Romain Serizel
# the beta_nmf module for GPGPU is free software: you can redistribute it
# or modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation, either version 3
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU LesserGeneral Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
base.py
~~~~~~~
.. topic:: Contents
The base module includes the basic functions such as
beta-divergence, nonnegative random matrices generator or load_data."""
import numpy as np
import theano.tensor as T
from theano.ifelse import ifelse
from sklearn import preprocessing
import h5py
[docs]def beta_div(X, W, H, beta):
"""Compute beta divergence D(X|WH)
Parameters
----------
X : Theano tensor
data
W : Theano tensor
Bases
H : Theano tensor
activation matrix
beta : Theano scalar
Returns
-------
div : Theano scalar
beta divergence D(X|WH)"""
div = ifelse(T.eq(beta, 2),
T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
ifelse(T.eq(beta, 0),
T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
ifelse(T.eq(beta, 1),
T.sum(T.mul(X, (T.log(X) -
T.log(T.dot(H, W)))) +
T.dot(H, W) - X),
T.sum(1. / (beta * (beta - 1.)) *
(T.power(X, beta) +
(beta - 1.) *
T.power(T.dot(H, W), beta) -
beta * T.power(T.mul(X, T.dot(H, W)),
(beta - 1)))))))
return div
[docs]def load_data(f_name, scale=True, rnd=True):
"""Get data from H5FS file.
Parameters
----------
f_name : String
file name
scale : Boolean (default True)
scale data to unit variance (scikit-learn function)
rnd : Boolean (default True)
randomize the data along time axis
Returns
-------
data : Dictionnary
dictionary containing the data
x_train: numpy array
train data matrix """
train_df = h5py.File(f_name, 'r')
x_train = train_df['x_train'][:]
train_df.close()
if scale:
print "scaling..."
x_train = preprocessing.scale(x_train, with_mean=False)
print "Total dataset size:"
print "n train samples: %d" % x_train.shape[0]
print "n features: %d" % x_train.shape[1]
if rnd:
print "Radomizing..."
np.random.shuffle(x_train)
data = dict(
x_train=x_train,
)
return data
[docs]def nnrandn(shape):
"""generates randomly a nonnegative ndarray of given shape
Parameters
----------
shape : tuple
The shape
Returns
-------
out : array of given shape
The non-negative random numbers
"""
return np.abs(np.random.randn(*shape))