I've synthetized a spoken clock audio signal in python (code below) and did a proof of concept in Kdenlive 21.03.70: it works! [EDIT: Not so finally, 8-( I did a test on an hour long recording and it missed completely at the hour count)] At this link you can listen to a soundfile mixed in Kdenlive after it successfully aligned the two sounds. The file is only mono (left channel) at the beginning, but at 10 seconds in you can ear (right channel) the test track aligned within 7 ms by Kdenlive Align Audio to Reference action (docs here)

 


 

 

So the hardware is as simple as a SAMD21 board (10USD) and a GPS (20 to 30 USD, depending on the retailer)! Stay tuned for a working prototype!

 

 

"""
this sloppy program outputs a WAV file (named TEST_FILE) consisting of 20 short 2 kHz bips,
each followed by the corresponding spoken time (HHMMSS)

a sample WAV output file is presented there
https://mamot.fr/@lutzray/105232599071636881

The time is simply incremented and nothing is done for assuring the
validity of the syncing bip position, it's a proof of concept for testing NLE
video editing programs and synthetic voices

sox is used for merging WAV files and should be available on your path

63 sound clips are necessary and should be in the VOICE_CLIPS_REP rep. 
The files are 0 to 59 spoken numbers + 3 spoken words. They must be named:

0.wav
1.wav
...
59.wav
heure.wav
minute.wav
seconde.wav

French sound files are momentarily (nov 2020) accessible at this URL
https://famille.lutz.quebec/index.php/s/bTKjTYC88ke7Ykp

ongoing project documented at
https://atomicsynchronator.blogspot.com/
"""
import numpy as np
import soundfile as sf
import os.path, datetime

VOICE_CLIPS_REP = 'libtts/' # see description above

TEST_FILE = 'bipPlusVoices.wav' # output
INTERVAL_BETWEEN_BIPS = 5 # in secs
SYNC_BIP_FREQUENCY = 2000 # in Hertz
SYNC_BIP_LENGTH = 50 # in whole cycles
INTERVAL_BEFORE_VOICE = 0.5 # in seconds
hh, mm, ss = ('heure','minute','seconde')
SAMPLING_RATE =44100 # per seconds
AMPL = 0.97 # 1.0 is detected as a clipped soundfile by some editing programs

def gen_sinus(amp, freq, duration): # duration in cycles
    period = 1/freq
    length = duration*period # in secs
    n_samples = int(SAMPLING_RATE*length)
    t = np.linspace(0, length, n_samples)
    return amp*np.sin(2*np.pi*freq*t)


def append_data_to_WAV(audio_data, existing_snd_fn): # creates it if needed
    new_sounds_fn = '/tmp/newsounds.wav'
    sox_output_fn = '/tmp/soxout.wav'
    sf.write(new_sounds_fn, audio_data, SAMPLING_RATE)
    if not os.path.exists(existing_snd_fn):
        os.system('mv %s %s'%(new_sounds_fn, existing_snd_fn))
        return
    sox_command = '/usr/local/bin/sox %s %s %s'%(existing_snd_fn, new_sounds_fn, sox_output_fn)
    os.system(sox_command)
    os.system('mv %s %s'%(sox_output_fn, existing_snd_fn)) 

def patch_voice_clips_from_TTS_lib(clips): # clips = list of filename wo .wav ['23', 'heure']
    sounds = [sf.read('%s%s.wav'%(VOICE_CLIPS_REP, clip)) for clip in clips]
    sounds = [np.array(clip[0]) for clip in sounds]
    return np.concatenate(sounds)


sync_bip = gen_sinus(AMPL, SYNC_BIP_FREQUENCY, SYNC_BIP_LENGTH)
os.system('rm %s'%TEST_FILE)
pseudo_now = datetime.datetime.now()

for i_bip in range(20): # number of spoken times in the output file
    silence_bef_voice = np.zeros(int(INTERVAL_BEFORE_VOICE*SAMPLING_RATE))
    time_strings = \
        [pseudo_now.hour, hh, pseudo_now.minute, mm, pseudo_now.second, ss]
    voice_data = patch_voice_clips_from_TTS_lib(time_strings)
    voice_duration = len(voice_data) # in samples
    silence_length_after_voice = INTERVAL_BETWEEN_BIPS - \
                                    voice_duration/SAMPLING_RATE -\
                                    INTERVAL_BEFORE_VOICE -\
                                    len(sync_bip)/SAMPLING_RATE
    silence_after_voice = np.zeros(int(silence_length_after_voice*SAMPLING_RATE))
    append_data_to_WAV(sync_bip, TEST_FILE)
    append_data_to_WAV(silence_bef_voice, TEST_FILE)
    append_data_to_WAV(voice_data, TEST_FILE)
    append_data_to_WAV(silence_after_voice, TEST_FILE)
    pseudo_now = pseudo_now + datetime.timedelta(seconds=INTERVAL_BETWEEN_BIPS)

Comments

Popular Posts