Commit ddeede6e authored by Luca Pasa's avatar Luca Pasa
Browse files

dev motion vector tf records

parent 89bdc381
import glob
import numpy as np
from scipy.io.wavfile import read
np.set_printoptions(threshold=np.nan)
import tensorflow as tf
def serialize_sequence(base_audio_sequence, audio_sequence, video_sequence, labels):
# The object we return
ex = tf.train.SequenceExample()
# A non-sequential feature of our example
base_audio_sequence_length = len(base_audio_sequence)
audio_sequence_length = len(audio_sequence)
video_sequence_length = len(video_sequence)
labels_length = len(labels)
ex.context.feature["base_audio_length"].int64_list.value.append(base_audio_sequence_length)
ex.context.feature["audio_length"].int64_list.value.append(audio_sequence_length)
ex.context.feature["video_length"].int64_list.value.append(video_sequence_length)
ex.context.feature["label_length"].int64_list.value.append(labels_length)
# Feature lists for the two sequential features of our example
fl_base_audio_feat = ex.feature_lists.feature_list["base_audio_feat"]
fl_audio_feat = ex.feature_lists.feature_list["audio_feat"]
fl_video_feat = ex.feature_lists.feature_list["video_feat"]
fl_labels = ex.feature_lists.feature_list["labels"]
for base_audio_feat in base_audio_sequence:
fl_base_audio_feat.feature.add().float_list.value.extend(base_audio_feat)
for audio_feat in audio_sequence:
fl_audio_feat.feature.add().float_list.value.extend(audio_feat)
for video_feat in video_sequence:
fl_video_feat.feature.add().float_list.value.extend(video_feat)
for label in labels:
fl_labels.feature.add().float_list.value.append(label)
return ex
# load dataset mean and std
dataset_audio_base_mean=np.load('dataset_audio_raw_base_mean.npy')
dataset_audio_base_std=np.load('dataset_audio_raw_base_stdev.npy')
dataset_multi_audio_mean = np.load('dataset_audio_raw_multi_mean.npy')
dataset_multi_audio_std = np.load('dataset_audio_raw_multi_stdev.npy')
dataset_video_mean = np.load('dataset_video_mean.npy')
dataset_video_std = np.load('dataset_video_stdev.npy')
# destination folders
train_dir = '/home/storage/Data/MULTI_GRID_100/rawAudioMotionTfRec/TRAIN_CTC_SENTENCES/'
val_dir = '/home/storage/Data/MULTI_GRID_100/rawAudioMotionTfRec/VAL_CTC_SENTENCES/'
test_dir = '/home/storage/Data/MULTI_GRID_100/rawAudioMotionTfRec/TEST_CTC_SENTENCES/'
f = open('./dictionary.txt', 'r')
dictionary = f.read()
phonemes = dictionary.replace('\n', ' ').split(' ')
phonemes = [ph for ph in sorted(set(phonemes)) if ph is not '']
#print('Number of phonemes = ', len(phonemes))
#print(phonemes)
features_file_list_base_audio = sorted(glob.glob('/home/storage/Data/MULTI_GRID_100/s*/base_audio/*.npy'))
features_file_list_audio = sorted(glob.glob('/home/storage/Data/MULTI_GRID_100/s*/multi_audio/*.npy'))
features_file_list_video = sorted(glob.glob('/home/storage/Data/MULTI_GRID_100/s*/video/*.txt'))
#assert len(features_file_list_audio) == len(features_file_list_video) == len(features_file_list_base_audio), "#base_audop != #multi_audio != #video"
print('Total number of files = {}'.format(
len(features_file_list_audio))) # it has to be equal to len(features_file_list_video)
# prepare indices for cross validation
indices = np.arange(len(features_file_list_audio)) # same of indices_video = np.arange(len(features_file_list_video))
np.random.seed(3)
np.random.shuffle(indices)
# cross validation split
train_percent = 0.6
val_percent = 0.2
test_percent = 0.2
print(len(features_file_list_audio))
num_sentences_train = int(len(features_file_list_audio) * train_percent)
num_sentences_val = int(len(features_file_list_audio) * val_percent)
num_sentences_test = len(features_file_list_audio) - num_sentences_train - num_sentences_val
print('num sentences train = ', num_sentences_train)
print('num sentences val = ', num_sentences_val)
print('num sentences test = ', num_sentences_test)
train_indices = indices[:num_sentences_train]
val_indices = indices[num_sentences_train:(num_sentences_train + num_sentences_val)]
test_indices = indices[(num_sentences_val + num_sentences_train):]
train_counter = 0
val_counter = 0
test_counter = 0
for file_index, (wav_base_file_audio,wav_file_audio, txt_file_video) in enumerate(zip(features_file_list_base_audio,
features_file_list_audio,
features_file_list_video)):
# print('base audio {:s},multi audio {:s}, video {:s}'.format(csv_base_file_audio, csv_file_audio, txt_file_video))
features_base_audio = np.load(wav_base_file_audio)#np.loadtxt(csv_base_file_audio, delimiter=',')
features_audio = np.load(wav_file_audio)#np.loadtxt(csv_file_audio, delimiter=',')
features_video = np.loadtxt(txt_file_video)
delta_features_video = np.zeros_like(features_video)
delta_features_video[1:] = features_video[1:] - features_video[:-1]
# label path
labels_file = wav_file_audio.replace('/multi_audio/', '/transcription/').replace('.npy', '.transcription')
f = open(labels_file, 'r')
labels = f.read()
labels = labels.replace('\n', '').replace('SP', '').split(',')
labels = [lab for lab in labels if lab is not '']
#print('labels : ', labels)
labels = [phonemes.index(ph) for ph in labels]
#print('labels : ', labels)
labels = np.asarray(labels)
#print(labels.shape)
#print('')
features_base_audio = np.subtract(features_base_audio, dataset_audio_base_mean) / dataset_audio_base_std
features_audio = np.subtract(features_audio, dataset_multi_audio_mean) / dataset_multi_audio_std
delta_features_video = np.subtract(delta_features_video, dataset_video_mean) / dataset_video_std
if file_index in train_indices:
sentence_file = train_dir + 'sequence_full_{:05d}.tfrecords'.format(train_counter)
train_counter += 1
if file_index in val_indices:
sentence_file = val_dir + 'sequence_full_{:05d}.tfrecords'.format(val_counter)
val_counter += 1
if file_index in test_indices:
sentence_file = test_dir + 'sequence_full_{:05d}.tfrecords'.format(test_counter)
test_counter += 1
fp = open(sentence_file, 'w')
writer = tf.python_io.TFRecordWriter(fp.name)
serialized_sentence = serialize_sequence(features_base_audio, features_audio, delta_features_video, labels)
# write to tfrecord
writer.write(serialized_sentence.SerializeToString())
writer.close()
# fp.close()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment