Source code for updates

"""
updates.py
~~~~~~~~~~
.. topic:: Contents

  The update module regroups the update functions used for
  the mini-batch NMF"""
import theano.tensor as T
from theano.ifelse import ifelse
import theano


[docs]def gradient_h(X, W, H, beta): """Compute the gradient of the beta-divergence relatively to the factor H Parameters ---------- X: theano tensor Data matrix to be decomposed W: theano tensor Factor matrix containing the bases of the decomposition H: theano tensor Factor matrix containing the actiovations of the decomposition beta: theano scalar Coefficient beta for the beta-divergence Special cases: * beta = 1: Itakura-Saito * beta = 1: Kullback-Leibler * beta = 2: Euclidean distance Returns ------- grad_h: theano matrix Gradient of the local beta-divergence with respect to H """ grad_h = ifelse(T.eq(beta, 2), T.dot(X, W) - T.dot(T.dot(H, W.T), W), ifelse(T.eq(beta, 1), T.dot(T.mul(T.power(T.dot(H, W.T), (-1)), X), W) - T.dot(T.ones_like(X), W), T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), W) - T.dot(T.power(T.dot(H, W.T), (beta-1)), W) )) return grad_h
[docs]def gradient_h_mu(X, W, H, beta): """Compute the gradient of the beta-divergence relatively to the factor H Return positive and negative contribution e.g. for multiplicative updates Parameters ---------- X: theano tensor Data matrix to be decomposed W: theano tensor Factor matrix containing the bases of the decomposition H: theano tensor Factor matrix containing the actiovations of the decomposition beta: theano scalar Coefficient beta for the beta-divergence Special cases: * beta = 1: Itakura-Saito * beta = 1: Kullback-Leibler * beta = 2: Euclidean distance Returns ------- grad_h: theano matrix (T.stack(grad_h_pos, grad_h_neg)) :grad_h_pos: Positive term of the gradient of the local beta-divergence with respect to H :grad_h_neg: Positive term of the gradient of the local beta-divergence with respect to H """ grad_h_neg = ifelse(T.eq(beta, 2), T.dot(X, W), ifelse(T.eq(beta, 1), T.dot(T.mul(T.power(T.dot(H, W.T), (-1)), X), W), T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), W))) grad_h_pos = ifelse(T.eq(beta, 2), T.dot(T.dot(H, W.T), W), ifelse(T.eq(beta, 1), T.dot(T.ones_like(X), W), T.dot(T.power(T.dot(H, W.T), (beta-1)), W))) return T.stack(grad_h_pos, grad_h_neg)
[docs]def gradient_w(X, W, H, beta): """Compute the gradient of the beta-divergence relatively to the factor W Parameters ---------- X: theano tensor Data matrix to be decomposed W: theano tensor Factor matrix containing the bases of the decomposition H: theano tensor Factor matrix containing the actiovations of the decomposition beta: theano scalar Coefficient beta for the beta-divergence Special cases: * beta = 1: Itakura-Saito * beta = 1: Kullback-Leibler * beta = 2: Euclidean distance Returns ------- grad_w: theano matrix Gradient of the local beta-divergence with respect to W """ grad_w = ifelse(T.eq(beta, 2), T.dot(X.T, H) - T.dot(T.dot(H, W.T).T, H), ifelse(T.eq(beta, 1), T.dot(T.mul(T.power(T.dot(H, W.T), (-1)), X).T, H) - T.dot(T.ones_like(X).T, H), T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X).T, H) - T.dot(T.power(T.dot(H, W.T), (beta-1)).T, H) )) return grad_w
[docs]def gradient_w_mu(X, W, H, beta): """Compute the gradient of the beta-divergence relatively to the factor W Return positive and negative contribution e.g. for multiplicative updates Parameters ---------- X: theano tensor Data matrix to be decomposed W: theano tensor Factor matrix containing the bases of the decomposition H: theano tensor Factor matrix containing the actiovations of the decomposition beta: theano scalar Coefficient beta for the beta-divergence Special cases: * beta = 1: Itakura-Saito * beta = 1: Kullback-Leibler * beta = 2: Euclidean distance Returns ------- grad_w: theano matrix (T.stack(grad_w_pos, grad_w_neg)) :grad_w_pos: Positive term of the gradient of the local beta-divergence with respect to W :grad_w_neg: Positive term of the gradient of the local beta-divergence with respect to W """ grad_w_neg = ifelse(T.eq(beta, 2), T.dot(X.T, H), ifelse(T.eq(beta, 1), T.dot(T.mul(T.power(T.dot(H, W.T), (-1)), X).T, H), T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X).T, H))) grad_w_pos = ifelse(T.eq(beta, 2), T.dot(T.dot(H, W.T).T, H), ifelse(T.eq(beta, 1), T.dot(T.ones_like(X).T, H), T.dot(T.power(T.dot(H, W.T), (beta-1)).T, H))) return T.stack(grad_w_pos, grad_w_neg)
[docs]def mu_update_h(X, W, H, beta): """Compute the gradient of the beta-divergence relatively to the factor H and update H with multiplicative rules Parameters ---------- X: theano tensor Data matrix to be decompsed W: theano tensor Factor matrix containing the bases of the decomposition H: theano tensor Factor matrix containing the activations of the decomposition beta: theano scalar Coefficient beta for the beta-divergence Special cases: * beta = 1: Itakura-Saito * beta = 1: Kullback-Leibler * beta = 2: Euclidean distance Returns ------- H: theano matrix New value of H updated with multiplicative updates """ grad_h_neg = ifelse(T.eq(beta, 2), T.dot(X, W), ifelse(T.eq(beta, 1), T.dot(T.mul(T.power(T.dot(H, W.T), (-1)), X), W), T.dot(T.mul(T.power(T.dot(H, W.T), (beta - 2)), X), W))) grad_h_pos = ifelse(T.eq(beta, 2), T.dot(T.dot(H, W.T), W), ifelse(T.eq(beta, 1), T.dot(T.ones_like(X), W), T.dot(T.power(T.dot(H, W.T), (beta-1)), W))) H *= grad_h_neg/grad_h_pos return H, T.stack(grad_h_pos, grad_h_neg)
[docs]def mu_update(factor, gradient_pos, gradient_neg): """Update the factor based on multiplicative rules Parameters ---------- factor: theano tensor The factor to be updated gradient_pos: theano tensor Positive part of gradient relatively to factor gradient_neg: theano tensor Negative part of gradient relatively to factor Returns ------- factor: theano matrix New value of factor update with multiplicative updates """ factor *= gradient_neg/gradient_pos return factor
[docs]def update_grad_w(grad, grad_old, grad_new): """Update the global gradient for W Parameters ---------- grad: theano tensor The global gradient grad_old: theano tensor The previous value of the local gradient grad_new: theano tensor The new version of the local gradient Returns ------- grad: theano tensor New value of the global gradient """ return grad - grad_old + grad_new