Commit 9c81ec46 authored by Luca Pasa's avatar Luca Pasa
Browse files

update dataset creator

parent 824d30fe
......@@ -2,6 +2,7 @@ from pydub import AudioSegment
import os
import random
import shutil
import glob
#conf local
#grid_path = "/home/nameless/Project/MultiModalSpeech/Data/GRID/"
......@@ -9,8 +10,14 @@ import shutil
#conf Comago
grid_path = "/DATA_NEW/lpasa/Data/GRID/"
grid_multi_speaker_path = "/DATA_NEW/lpasa/Data/MULTI_GRID/"
# grid_path = "/DATA_NEW/lpasa/Data/GRID/"
# grid_multi_speaker_path = "/DATA_NEW/lpasa/Data/MULTI_GRID/"
#conf local (dell)
grid_path='/home/storage/Data/GRID/'
grid_multi_speaker_path ='/home/storage/Data/MULTI_GRID_100/'
#Sarabebe utile lavorare su una coppia di grid in mododa poter cancellare i file mano a mano e quindi poter greare facilmente test e validation con la sicurezza
......@@ -38,6 +45,7 @@ def create_multi_grid_folder(n_speakers=33): # de deafualt value is 33 because
def two_files_audio_sum(file_1_path, file_2_path, file_sum_name,volume_reduction=0):
s1 = AudioSegment.from_file(file_1_path)
s2 = AudioSegment.from_file(file_2_path) - volume_reduction
if s1.duration_seconds >= s2.duration_seconds:
......@@ -46,7 +54,7 @@ def two_files_audio_sum(file_1_path, file_2_path, file_sum_name,volume_reduction
audio_sum = s2.overlay(s1)
audio_sum.export(file_sum_name, format='wav')
def random_files_selector(folders,n_file=1):
def random_files_selector(folders,n_file=1, file_extension=".wav"):
'''
:param n_file: number of files that have to be selected by the method
:param folders: list of folders where the file will be randomly selected
......@@ -54,15 +62,17 @@ def random_files_selector(folders,n_file=1):
'''
dir_file_list=[]
for dir in folders:
dir_file_list.append(tuple((dir, os.listdir(dir))))
dir_file_list.append(tuple((dir, glob.glob( os.path.join(dir,"*"+file_extension)))))
selected_files=[]
for i in range(n_file):
random_folder_files=random.choice(dir_file_list)
is_not_append=True
while is_not_append:
random_file=random.choice(random_folder_files[1])
if random_folder_files not in selected_files:
selected_files.append(random_folder_files[0]+"/"+random_file)
random_file=os.path.split(random_file)[-1]
if random_folder_files not in selected_files :
selected_files.append(os.path.join(random_folder_files[0],random_file))
is_not_append=False
return selected_files
......@@ -134,7 +144,7 @@ def create_multi_speaker_data(n_speech_4_speaker, n_combinations=3, n_speakers=2
other_speakers=[speaker for speaker in list_of_speakers if speaker != s]
#first we have to select n_speech_4_speaker by the current speaker, that will be use as target for the combinations
current_speech_list=random_files_selector([grid_path+"s"+str(s)+"/audio"],n_speech_4_speaker)
current_speech_list=random_files_selector([grid_path+"s"+str(s)+"/audio"],n_speech_4_speaker,"*.wav")
#now, for each speech we have to create n_combinations combinations:
for base_speech in current_speech_list:
......@@ -144,7 +154,7 @@ def create_multi_speaker_data(n_speech_4_speaker, n_combinations=3, n_speakers=2
while condition:
#to create a combinations we have to select n_speakers-1 speeches from other spearkers
other_speeches_to_combine=random_files_selector([grid_path+"s"+str(s_other)+"/audio" for s_other in other_speakers], n_speakers-1)
other_speeches_to_combine=random_files_selector([grid_path+"s"+str(s_other)+"/audio" for s_other in other_speakers], n_speakers-1,"*.wav")
for speech_to_combine in other_speeches_to_combine:
......@@ -177,6 +187,6 @@ def create_multi_speaker_data(n_speech_4_speaker, n_combinations=3, n_speakers=2
if __name__ == '__main__':
create_multi_grid_folder()
create_multi_speaker_data(10,n_of_GRID_speakers=33,n_speakers=2)
create_multi_speaker_data(n_speech_4_speaker=200,n_of_GRID_speakers=33,n_speakers=2)
# print random_files_selector(["/home/nameless/Project/MultiModalSpeech/Data/GRID/s3/audio","/home/nameless/Project/MultiModalSpeech/Data/GRID/s2/audio","/home/nameless/Project/MultiModalSpeech/Data/GRID/s1/audio"],6)
\ No newline at end of file
from scipy.io import wavfile
import numpy as np
from librosa.feature import melspectrogram
from scipy import signal
import glob
GRID_AUDIO_FILES='/home/storage/Data/MULTI_GRID/s*/base_audio/*.wav'
def mel_specgram(audio=None, sample_rate=16e3, spec=None, n_mels=80, window_size=20, overlap_size=10):
times = None # TODO: add times with spectrogram input
if audio is not None:
nperseg = int(round(window_size / 1e3 * sample_rate))
noverlap = int(round(overlap_size / 1e3 * sample_rate))
freqs, times, spec = signal.spectrogram(audio, fs=sample_rate, window='hann', nperseg=nperseg, noverlap=noverlap, detrend=False)
elif spec is None:
return None
mel_spec = melspectrogram(S=spec, sr=sample_rate, n_mels=n_mels)
channels = np.arange(n_mels)
return channels, times, mel_spec.T
def log_mel_specgram(audio=None, sample_rate=16e3, spec=None, n_mels=80, window_size=20, overlap_size=10, eps=1e-10):
channels, times, mel_spec = mel_specgram(audio, sample_rate, spec, n_mels, window_size, overlap_size)
return channels, times, np.log(mel_spec + eps)
def create_dataset():
features_file_list_base_audio = glob.glob(GRID_AUDIO_FILES)
for audio_sample in features_file_list_base_audio:
sample_rate, sample = wavfile.read(audio_sample)
_, _, spec = log_mel_specgram(sample, sample_rate, window_size=25, overlap_size=15)
print sample
if __name__ == '__main__':
create_dataset()
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment