Commit e673595d authored by Luca Pasa's avatar Luca Pasa
Browse files

initial commit

parents
.idea
*.pyc
*.npy
./Restore_File/*
*.slurm
*.err
*.out
*.wav
.Code/toy_sample/*
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.framework.ops import convert_to_tensor
class ESNCell(rnn_cell_impl.RNNCell):
"""Echo State Network Cell.
Based on http://www.faculty.jacobs-university.de/hjaeger/pubs/EchoStatesTechRep.pdf
Only the reservoir, the randomized recurrent layer, is modelled. The readout trainable layer
which map reservoir output to the target output is not implemented by this cell,
thus neither are feedback from readout to the reservoir (a quite common technique).
Here a practical guide to use Echo State Networks:
http://minds.jacobs-university.de/sites/default/files/uploads/papers/PracticalESN.pdf
Since at the moment TF doesn't provide a way to compute spectral radius
of a matrix the echo state property necessary condition `max(eig(W)) < 1` is approximated
scaling the norm 2 of the reservoir matrix which is an upper bound of the spectral radius.
See https://en.wikipedia.org/wiki/Matrix_norm, the section on induced norms.
"""
def __init__(self, num_units, wr2_scale=0.7, connectivity=0.1, leaky=1.0, activation=math_ops.tanh,
win_init=init_ops.random_normal_initializer(),
wr_init=init_ops.random_normal_initializer(),
bias_init=init_ops.random_normal_initializer()):
"""Initialize the Echo State Network Cell.
Args:
num_units: Int or 0-D Int Tensor, the number of units in the reservoir
wr2_scale: desired norm2 of reservoir weight matrix.
`wr2_scale < 1` is a sufficient condition for echo state property.
connectivity: connection probability between two reservoir units
leaky: leaky parameter
activation: activation function
win_init: initializer for input weights
wr_init: used to initialize reservoir weights before applying connectivity mask and scaling
bias_init: initializer for biases
"""
self._num_units = num_units
self._leaky = leaky
self._activation = activation
def _wr_initializer(shape, dtype, partition_info=None):
wr = wr_init(shape, dtype=dtype)
connectivity_mask = math_ops.cast(
math_ops.less_equal(
random_ops.random_uniform(shape),
connectivity),
dtype)
wr = math_ops.multiply(wr, connectivity_mask)
wr_norm2 = math_ops.sqrt(math_ops.reduce_sum(math_ops.square(wr)))
is_norm_0 = math_ops.cast(math_ops.equal(wr_norm2, 0), dtype)
wr = wr * wr2_scale / (wr_norm2 + 1 * is_norm_0)
return wr
self._win_initializer = win_init
self._bias_initializer = bias_init
self._wr_initializer = _wr_initializer
@property
def output_size(self):
return self._num_units
@property
def state_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
""" Run one step of ESN Cell
Args:
inputs: `2-D Tensor` with shape `[batch_size x input_size]`.
state: `2-D Tensor` with shape `[batch_size x self.state_size]`.
scope: VariableScope for the created subgraph; defaults to class `ESNCell`.
Returns:
A tuple `(output, new_state)`, computed as
`output = new_state = (1 - leaky) * state + leaky * activation(Win * input + Wr * state + B)`.
Raises:
ValueError: if `inputs` or `state` tensor size mismatch the previously provided dimension.
"""
inputs = convert_to_tensor(inputs)
input_size = inputs.get_shape().as_list()[1]
dtype = inputs.dtype
with vs.variable_scope(scope or type(self).__name__): # "ESNCell"
win = vs.get_variable("InputMatrix", [input_size, self._num_units], dtype=dtype,
trainable=False, initializer=self._win_initializer)
wr = vs.get_variable("ReservoirMatrix", [self._num_units, self._num_units], dtype=dtype,
trainable=False, initializer=self._wr_initializer)
b = vs.get_variable("Bias", [self._num_units], dtype=dtype, trainable=False, initializer=self._bias_initializer)
in_mat = array_ops.concat([inputs, state], axis=1)
weights_mat = array_ops.concat([win, wr], axis=0)
output = (1 - self._leaky) * state + self._leaky * self._activation(math_ops.matmul(in_mat, weights_mat) + b)
return output, output
\ No newline at end of file
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
import tensorflow as tf
from Code.Cells.ESNCell import ESNCell
from Code.Utils.decoratorTF import define_scope
import numpy as np
def pinv(A, reltol=1e-6):
# Compute the SVD of the input matrix A
s, u, v = tf.svd(A)
# Invert s, clear entries lower than reltol*s[0].
atol = tf.reduce_max(s) * reltol
s = tf.boolean_mask(s, s > atol)
s_inv = tf.diag(1. / s)
# Compute v * s_inv * u_t * b from the left to avoid forming large intermediate matrices.
return tf.matmul(v, tf.matmul(s_inv, u, transpose_b=True))
class ESN:
def __init__(self, input_dim, reservoir_dim, output_dim, wr2_scale=0.7, connectivity=0.1):
self.input_dim = input_dim
self.reservoir_dim = reservoir_dim
self.output_dim = output_dim
self.wr2_scale = wr2_scale
self.connectivity = connectivity
def init_placeholder(self, x_ph, y_ph, x_len_ph):
# placeholders
self.x_ph = x_ph
self.y_ph = y_ph
self.x_len_ph = x_len_ph
self.reservoir
self.compute_W_out
self.output
@define_scope("reservoir")
def reservoir(self):
# fw_cell:
cells = []
for dim in self.reservoir_dim:
cells.append(ESNCell(dim, self.connectivity, self.wr2_scale))
ESN_cells = tf.nn.rnn_cell.MultiRNNCell(cells)
esn_outputs, state = tf.nn.dynamic_rnn(
cell=ESN_cells,
inputs=self.x_ph,
dtype=tf.float32,
sequence_length=self.x_len_ph
)
return esn_outputs
@define_scope("compute_wout")
def compute_W_out(self):
flat_reservoir=tf.reshape(self.reservoir,[-1,self.reservoir_dim[-1]])
flat_target=tf.reshape(self.y_ph,[-1,self.output_dim])
print flat_reservoir.shape
print flat_target.shape
self.W_out = tf.matmul(pinv(flat_reservoir),flat_target)
return self.W_out
@define_scope("compute_output")
def output(self):
if self.W_out is not None:
n_sample=tf.shape(self.reservoir)[0]
flat_reservoir = tf.reshape(self.reservoir, [-1, self.reservoir_dim[-1]])
output= tf.matmul(flat_reservoir, self.W_out)
return tf.reshape(output,[n_sample,-1,self.output_dim])
else:
return None
if __name__ == '__main__':
_input_dim=3
_res_dim=[5]
_output_dim=4
graph = tf.Graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(graph=graph, config=config) as sess:
model=ESN(_input_dim, _res_dim, _output_dim)
_x_ph = tf.placeholder("float32", [None, None, _input_dim], 'x')
_x_len_ph = tf.placeholder("int32", shape=[None], name='x_len')
_y_ph = tf.placeholder("float32", [None, None, _output_dim], 'y')
model.init_placeholder(_x_ph, _y_ph, _x_len_ph)
_x = np.random.rand(10,8,_input_dim)
_x_len = np.asarray([8]*10)
_y = np.random.rand(10,8,_output_dim)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# initializer
sess.run(init_op)
sess.run(model.compute_W_out,feed_dict={_x_ph:_x,
_y_ph:_y,
_x_len_ph:_x_len})
res= sess.run(model.output,feed_dict={_x_ph:_x,
_y_ph:_y,
_x_len_ph:_x_len})
print res.shape
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.framework.ops import convert_to_tensor
class ESNCell(rnn_cell_impl.RNNCell):
"""Echo State Network Cell.
Based on http://www.faculty.jacobs-university.de/hjaeger/pubs/EchoStatesTechRep.pdf
Only the reservoir, the randomized recurrent layer, is modelled. The readout trainable layer
which map reservoir output to the target output is not implemented by this cell,
thus neither are feedback from readout to the reservoir (a quite common technique).
Here a practical guide to use Echo State Networks:
http://minds.jacobs-university.de/sites/default/files/uploads/papers/PracticalESN.pdf
Since at the moment TF doesn't provide a way to compute spectral radius
of a matrix the echo state property necessary condition `max(eig(W)) < 1` is approximated
scaling the norm 2 of the reservoir matrix which is an upper bound of the spectral radius.
See https://en.wikipedia.org/wiki/Matrix_norm, the section on induced norms.
"""
def __init__(self, num_units, wr2_scale=0.7, connectivity=0.1, leaky=1.0, activation=math_ops.tanh,
win_init=init_ops.random_normal_initializer(),
wr_init=init_ops.random_normal_initializer(),
bias_init=init_ops.random_normal_initializer()):
"""Initialize the Echo State Network Cell.
Args:
num_units: Int or 0-D Int Tensor, the number of units in the reservoir
wr2_scale: desired norm2 of reservoir weight matrix.
`wr2_scale < 1` is a sufficient condition for echo state property.
connectivity: connection probability between two reservoir units
leaky: leaky parameter
activation: activation function
win_init: initializer for input weights
wr_init: used to initialize reservoir weights before applying connectivity mask and scaling
bias_init: initializer for biases
"""
self._num_units = num_units
self._leaky = leaky
self._activation = activation
def _wr_initializer(shape, dtype, partition_info=None):
wr = wr_init(shape, dtype=dtype)
connectivity_mask = math_ops.cast(
math_ops.less_equal(
random_ops.random_uniform(shape),
connectivity),
dtype)
wr = math_ops.multiply(wr, connectivity_mask)
wr_norm2 = math_ops.sqrt(math_ops.reduce_sum(math_ops.square(wr)))
is_norm_0 = math_ops.cast(math_ops.equal(wr_norm2, 0), dtype)
wr = wr * wr2_scale / (wr_norm2 + 1 * is_norm_0)
return wr
self._win_initializer = win_init
self._bias_initializer = bias_init
self._wr_initializer = _wr_initializer
@property
def output_size(self):
return self._num_units
@property
def state_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
""" Run one step of ESN Cell
Args:
inputs: `2-D Tensor` with shape `[batch_size x input_size]`.
state: `2-D Tensor` with shape `[batch_size x self.state_size]`.
scope: VariableScope for the created subgraph; defaults to class `ESNCell`.
Returns:
A tuple `(output, new_state)`, computed as
`output = new_state = (1 - leaky) * state + leaky * activation(Win * input + Wr * state + B)`.
Raises:
ValueError: if `inputs` or `state` tensor size mismatch the previously provided dimension.
"""
inputs = convert_to_tensor(inputs)
input_size = inputs.get_shape().as_list()[1]
dtype = inputs.dtype
with vs.variable_scope(scope or type(self).__name__): # "ESNCell"
win = vs.get_variable("InputMatrix", [input_size, self._num_units], dtype=dtype,
trainable=False, initializer=self._win_initializer)
wr = vs.get_variable("ReservoirMatrix", [self._num_units, self._num_units], dtype=dtype,
trainable=False, initializer=self._wr_initializer)
b = vs.get_variable("Bias", [self._num_units], dtype=dtype, trainable=False, initializer=self._bias_initializer)
in_mat = array_ops.concat([inputs, state], axis=1)
weights_mat = array_ops.concat([win, wr], axis=0)
output = (1 - self._leaky) * state + self._leaky * self._activation(math_ops.matmul(in_mat, weights_mat) + b)
return output, output
\ No newline at end of file
This diff is collapsed.
import tensorflow as tf
from scipy import signal
import numpy as np
# ----------Audio Seq Reco-----------#
def reconstruct_sources(specs_ph, phase_source_specs_ph, num_samples=48000, sample_rate=16e3, window_size=25,
step_size=10, pow_law_input=True):
# use phase
#mixed_mag_specs=tf.abs(phase_source_specs_ph)
rec_ang_specs = tf.angle(phase_source_specs_ph)
#mag_specs= mixed_mag_specs ** 0.3
if pow_law_input:
mag_abs_specs = tf.cast(specs_ph,tf.float32) ** (1 / 0.3)
else:
mag_abs_specs = tf.cast(specs_ph, tf.float32)
specs_ph_with_phase = tf.complex(real=mag_abs_specs * tf.cos(rec_ang_specs), imag=mag_abs_specs * tf.sin(rec_ang_specs))#tf.cast(abs_specs,tf.complex64)
# Compute inverse STFT
window_frame_size = int(round(window_size / 1e3 * sample_rate))
step_frame_size = int(round(step_size / 1e3 * sample_rate))
reconstructed_sources = tf.contrib.signal.inverse_stft(specs_ph_with_phase, frame_length=window_frame_size,
frame_step=step_frame_size,
window_fn=tf.contrib.signal.inverse_stft_window_fn(
step_frame_size))
if num_samples > 0:
reco_signal = tf.slice(reconstructed_sources, begin=[0, 0], size=[tf.shape(specs_ph_with_phase)[0], num_samples])
else:
reco_signal = reconstructed_sources
return reco_signal
def reconstruct_sources_no_pl_comp(specs_ph, phase_source_specs_ph, num_samples=48000, sample_rate=16e3, window_size=25,
step_size=10):
return reconstruct_sources(specs_ph, phase_source_specs_ph, num_samples, sample_rate, window_size,
step_size, False)
def get_power_law_spec(spec,pow=0.3):
return np.abs(spec)**pow
def get_power_law_spec_tf(spec,pow=0.3):
return tf.abs(spec)**pow
def downsampling(samples, sample_rate, downsample_rate):
secs = len(samples) / sample_rate
num_samples = int(downsample_rate * secs)
return signal.resample(samples, num_samples)
def compute_stsf(sources, window_size=25, step_size=10, out_shape=None):
paddings = [[0, 0], [256, 0]]
sources_pad = tf.pad(sources, paddings)
window_frame_size = int(round(window_size / 1e3 * 16e3))
step_frame_size = int(round(step_size / 1e3 * 16e3))
# Compute STFTs
specs = tf.contrib.signal.stft(sources_pad, frame_length=window_frame_size,
frame_step=step_frame_size, pad_end=True, fft_length=512)
if out_shape is not None:
specs = tf.slice(specs, begin=[0, 0, 0], size=out_shape)
with tf.Session() as sess:
return sess.run(specs)
def compute_stsf_tf(sources, window_size=25, step_size=10, out_shape=None):
paddings = [[0, 0], [256, 0]]
sources_pad = tf.pad(sources, paddings)
window_frame_size = int(round(window_size / 1e3 * 16e3))
step_frame_size = int(round(step_size / 1e3 * 16e3))
# Compute STFTs
specs = tf.contrib.signal.stft(sources_pad, frame_length=window_frame_size,
frame_step=step_frame_size, pad_end=True, fft_length=512)
if out_shape is not None:
specs = tf.slice(specs, begin=[0, 0, 0], size=out_shape)
return specs
# ------------------------------------#
\ No newline at end of file
import functools
import tensorflow as tf
def doublewrap(function):
"""
A decorator decorator, allowing to use the decorator to be used without
parentheses if not arguments are provided. All arguments must be optional.
"""
@functools.wraps(function)
def decorator(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
return function(args[0])
else:
return lambda wrapee: function(wrapee, *args, **kwargs)
return decorator
@doublewrap
def define_scope(function, scope=None, *args, **kwargs):
"""
A decorator for functions that define TensorFlow operations. The wrapped
function will only be executed once. Subsequent calls to it will directly
return the result so that operations are added to the graph only once.
The operations added by the function live within a tf.variable_scope(). If
this decorator is used with arguments, they will be forwarded to the
variable scope. The scope name defaults to the name of the wrapped
function.
"""
attribute = '_cache_' + function.__name__
name = scope or function.__name__
@property
@functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
with tf.variable_scope(name, *args, **kwargs):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
\ No newline at end of file
import tensorflow as tf
def pearson_loss(x, y):
mx = tf.reduce_mean(x)
my = tf.reduce_mean(y)
xm, ym = x-mx, y-my
r_num = tf.reduce_sum(tf.multiply(xm,ym))
r_den = tf.sqrt(tf.multiply(tf.reduce_sum(tf.square(xm)), tf.reduce_sum(tf.square(ym))))
r = tf.truediv(r_num,r_den)
# ensure r is in range
r = tf.maximum(tf.minimum(r, 1.0), -1.0)
return 1-tf.square(r)
import os
from datetime import datetime
def print_par_on_file(test_name, log_dir, par_list):
assert isinstance(par_list, dict), "par_list as to be a dictionary"
f=open(os.path.join(log_dir,test_name+".log"),'w+')
f.write(test_name)
f.write("\n")
f.write(str(datetime.now().utcnow()))
f.write("\n\n")
for key, value in par_list.iteritems():
f.write(str(key)+": \t"+str(value))
f.write("\n")
if __name__ == '__main__':
a={"test1":1,"test2":2}
print_par_on_file("test","./",a)
from vqmetrics.vqmetrics import pesq
pesq(reference='./ground_truth_0.wav',degraded='reco_0.wav')
\ No newline at end of file
This diff is collapsed.
from distutils.core import setup
setup(
name='vqmetrics',
version='0.1.0',
py_modules=['vqmetrics'],
author='Roman Imankulov',
author_email='roman@netangels.ru',
url='http://github.com/imankulov/vqmertics/',
description='Set of functions to convert between different speech ' + \
'quality estimation metrics, helper class with Speex codec options',
long_description = """
vqmetrics module contains a set of functions to convert between different
speech quality estimation metrics such as PESQ MOS, MOS LQO, R-factor.
This module Contains also one helper class with Speex codec options:
- mapping between speex "quality" and "mode" option
- size (in bits) for earch speex frame with given mode
- required bandwidth estimation
""",
)
#!/usr/bin/env python
"""
- Set of functions for converting between different speech quality estimation
metrics such as PESQ MOS, MOS LQO, R-factor.
- Python wrapper for ITU-T pesq utlity.
- Helper class to define Speex codec parameters based on other options:
- mapping between speex "quality" and "mode" option
- size (in bits) for earch speex frame with given mode
- required bandwidth estimation
"""
from __future__ import division
import sys, os
from math import sqrt, pi, atan2, log, pow, cos, log, exp
__all__ = 'SpeexMetric mos2r r2mos delay2id pesq2mos mos2pesq pesq'.split()
class SpeexMetric(object):
"""
SpeexMetric class
>>> m = SpeexMetric(quality=7)
>>> m.mode
5
>>> m.size
300
>>> m = SpeexMetric(mode=5)
>>> m.quality
8
>>> m.size
300
>>> m.get_bandwidth(1)
31000
>>> m.get_bandwidth(2)
23000
>>> m.get_bandwidth(3)
20333
"""
def __init__(self, quality=None, mode=None):
if quality is None and mode is None:
raise ValueError('Speex quality or mode must be set up')
if quality is not None and mode is not None:
raise ValueError('You must set up just one option: quality or mode')
if quality:
self.quality = quality
# 0 1 2 3 4 5 6 7 8 9 10
self.mode = (1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7)[self.quality]
else:
self.mode = mode
self.quality = {