Commit 89d2fbf0 authored by Luca Pasa's avatar Luca Pasa
Browse files

Merge branch 'master' of gitlab.iit.it:lpasa/AV_ASR

parents 8663d37b e37f2c5f
......@@ -3,11 +3,13 @@
HTK=~/htk/HTKTools/
# MULTI_GRID_FOLDER=/home/storage/Data/GRID/
MULTI_GRID_FOLDER=/home/storage/Data/MULTI_GRID_100/
for DIR in "$MULTI_GRID_FOLDER"/*
do
for FILE in "$DIR/base_audio"/*
for FILE in "$DIR/audio"/*
do
if [ ${FILE: -4} == ".wav" ]; then
$HTK/HCopy -C hcopyConf.txt $FILE ${FILE%.*}.mfc
......@@ -27,4 +29,4 @@
fi
done
done
\ No newline at end of file
done
......@@ -17,15 +17,15 @@ if __name__ == '__main__':
test_step= 10
learningDecay = 1
momentum = 0.9
test_name="GRID_100_Spectro_Concat_DAE_4_speech_Test_lr-"+str(learningRate)+"_batch_size-"+str(batch_size)+"_n_hidden_encode-"+str(nHidden_encode)
test_name="MULTI_GRID_100_Spectro_Concat_DAE_4_speech_Test_lr-"+str(learningRate)+"_batch_size-"+str(batch_size)+"_n_hidden_encode-"+str(nHidden_encode)
graph = tf.Graph()
with tf.Session(graph=graph) as sess:
model = DAE_4_speech(sess=sess, graph=graph, n_in_audio=nIn_audio, n_in_video=nIn_video, n_hidden=nHidden,
n_hidden_encode=nHidden_encode, batch_size=batch_size, learning_rate=learningRate,
learning_decay=learningDecay, momentum=momentum, updating_step=updating_step)
model.training_model(training_set_path="/home/storage/Data/MULTI_GRID/rawMultiModalTfRec/TRAIN_CTC_SENTENCES/",
test_set_path="/home/storage/Data/MULTI_GRID/rawMultiModalTfRec/TEST_CTC_SENTENCES/",
validation_set_path="/home/storage/Data/MULTI_GRID/rawMultiModalTfRec/VAL_CTC_SENTENCES/",
model.training_model(training_set_path="/home/storage/Data/MULTI_GRID_100/rawMultiModalTfRec/TRAIN_CTC_SENTENCES/",
test_set_path="/home/storage/Data/MULTI_GRID_100/rawMultiModalTfRec/TEST_CTC_SENTENCES/",
validation_set_path="/home/storage/Data/MULTI_GRID_100/rawMultiModalTfRec/VAL_CTC_SENTENCES/",
n_epoch=num_epochs, test_step=test_step, test_name=test_name,
log_dir="./test_log/")
......@@ -102,7 +102,7 @@ class Bi_DEA_model:
# fw_cell:
cells_fw = []
for dim in reversed(self.n_hidden):
cells_fw.append(tf.contrib.rnn.BasicLSTMCell(dim))
cells_fw.append(tf.contrib.rnn.BasicLSTMCell(dim))
#cells_fw.append(tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(dim),
#output_keep_prob=self.keep_prob, input_keep_prob=self.keep_prob))
......
......@@ -306,7 +306,7 @@ if __name__ == '__main__':
test_step = 10
learningDecay = 1
momentum = 0.9
test_name = "Bi_On_Overfitting_TEST_Concat_DAE_lr-" + str(learningRate) + "_batch_size-" + str(
test_name = "MULTI_GRID_100_Spectro_Concat_DAE_4_speech_Test_lr-" + str(learningRate) + "_batch_size-" + str(
batch_size) + "_n_hidden_encode-" + str(nHidden[0])
graph = tf.Graph()
......
......@@ -71,10 +71,10 @@ def get_output(sess, model, input_audio, input_video, input_len):
if __name__ == '__main__':
num_epochs = 500
batch_size = 18
nIn_audio = 123
batch_size = 30
nIn_audio = 257
nIn_video = 134
nHidden = [350,500]
nHidden = [500,600]
nHidden_encode = 750
learningRate = 0.001
traininglog_dir = "./"
......@@ -84,7 +84,7 @@ if __name__ == '__main__':
momentum = 0.9
ckpt_file = "../Baseline_Models/RESULT/BaseLine1/Overfitting_TEST_Concat_DAE_4_speech_Test_lr-0.001_batch_size-18_n_hidden_encode-750.ckpt-500"
ckpt_file = "../Baseline_Models/RESULT/BaseLine1/MULTI_GRID_100_Spectro_Concat_DAE_4_speech_Test_lr-0.001_batch_size-30_n_hidden_encode-750.ckpt-230"
graph = tf.Graph()
with graph.as_default():
with tf.Session(graph=graph) as sess:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment