تنزيل sub to audio - تنزيل رمز مصدر sub to audio

العنوان الفرعي للصوت

العنوان الفرعي إلى الصوت ، قم بإنشاء الصوت/الكلام من أي ملف ترجم باستخدام Coqui-Ai TTS ومزامنة توقيت الصوت وفقًا لوقت الترجمة.

العرض التوضيحي:

التبعيات

FFMPEG ، PYDUB ، LIBROSA ، COQUI-AI TTS ، FFMPEG-PYTHON

تثبيت

pip install TTS
pip install git+https://github.com/bnsantoso/sub-to-audio

pip install TTS
pip install subtoaudio

FFMPEG على Linux

apt-get install ffmpeg

مثال الاستخدام

الاستخدام الأساسي مشابه جدًا لـ Coqui-Ai TTS ، يمكنك التحقق من وثائقها و <lang-ISO_Code>.

ملاحظة: استخدم ترجمات غير متداخلة مع شخصية مثالية في الثانية / CPS للحصول على أفضل نتيجة

ملاحظة: استخدم برنامجًا مثل Aegisub لتحرير العنوان الفرعي الخاص بك

 from subtoaudio import SubToAudio

# list all model
SubToAudio (). coqui_model ()

# get model index
model = SubToAudio (). coqui_model ()[ 1 ]

# The code will output 'yoursubtitle.wav' in the current directory.
sub = SubToAudio ( model_name = model )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle )

# you can choose 1100 different language using fairseq model
sub = SubToAudio ( fairseq_language = '<lang-iso_code>' )
subtitle = sub . subtitle ( "yoursubtitle.ass" )
sub . convert_to_audio ( sub_data = subtitle ) 

# specify model name
sub = SubToAudio ( model_name = "tts_models/multilingual/multi-dataset/your_tts" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , output_path = "subtitle.wav" )

# specify model and config path
sub = SubToAudio ( model_path = "path/to/your/model.pth" config_path = "config/path.json" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle )

# speaker=tts.speakers[0] or None if model doesnt have multiple speakers
# language=tts.languages[0] or None if doesnt have multiple languages

# list speaker
sub . speakers ()
speaker1 = sub . speakers ()[ 1 ]

# list languages
sub . languages ()
langu = sub . languages ()[ 0 ]

sub = SubToAudio ( model_name = "tts_models/multilingual/multi-dataset/your_tts" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , language = langu , speaker = speaker1 , output_path = "subtitle.wav" )

# Save temporary audio to current folder
sub = SubToAudio ( model_name = "tts_models/multilingual/multi-dataset/your_tts" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , output_path = "subtitle.wav" , save_temp = True )

تحويل الصوت

لاستخدام طريقة تحويل الصوت ، يجب أن تمرر voice_conversion:bool و speaker_wav:str Stramater on self.convert_to_audio . لا يمكن تشغيل التحويل الصوتي إذا يحتوي النموذج الخاص بك على مكبرات صوت متعددة.

 from subtoaudio import SubToAudio

sub = SubToAudio ( fairseq_language = "eng" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , voice_conversion = True , speaker_wav = "voice.wav" , language = "en" )

Coqui Studio API

لاستخدام Coqui Studio API ستحتاج إلى تكوين متغير بيئة Coqui_Studio_Token.

 import os

os . environ [ 'COQUI_STUDIO_TOKEN' ] = # yourapi

بعد مجموعة الرمز المميز الخاص بك ، يمكنك الحصول على طراز Coqui Studio ، يمكنك اتباع اسم coqui_studio/en/<studio_speaker_name>/coqui_studio

 from subtoaudio import SubToAudio

sub = SubToAudio ( model_name = "coqui_studio/en/Torcull Diarmuid/coqui_studio" , progress_bar = False )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , output_path = "subtitle.wav" , save_temp = True )

# use emotion paramater and speed paramater
sub . convert_to_audio ( sub_data = subtitle , output_path = "subtitle.wav" , emotion = "Happy" , speed = 1.5 )

وضع الإيقاع

استخدم المعلمة tempo_mode لتسريع الصوت. هناك ثلاث أوضاع للإيقاع:

tempo_mode="all" : هذا يسارع كل الصوت. استخدم tempo_speed=float لتحديد السرعة.
tempo_mode="overflow" : هذا يسرع الصوت لمطابقة مدة الترجمة الكلية بالإضافة إلى المدة الفارغة قبل ظهور العنوان الفرعي التالي. 'tempo_limit' سوف يحد من زيادة السرعة أثناء الفائض.
tempo_mode="precise" : هذا يسرع الصوت لمطابقة المدة التي يظهر فيها الترجمة. "

 from subtoaudio import SubToAudio

# Speed up tempo or speech rate
sub = SubToAudio ( model_name = "tts_models/de/thorsten/tacotron2-DDC" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "all" , tempo_speed = 1.3 )

# Change the tempo or speech rate of all audio files , default is 1.2
sub = SubToAudio ( "tts_models/multilingual/multi-dataset/xtts_v1" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "all" , tempo_speed = 1.3 )

# Change tempo or speech rate to audio that doesn't match the subtitle duration
sub = SubToAudio ( fairseq_language = "ind" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "overflow" )

# Limit tempo speed on the overflow mode 
sub = SubToAudio ( fairseq_language = "ind" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "overflow" , tempo_limit = 1.2 )

# Match audio length to subtitle duration
sub = SubToAudio ( fairseq_language = "ind" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "precise" )

وضع التحول

ستقوم المعلمة shift_mode بتغيير الصوت الذي لا يتطابق مع مدة الترجمة.

shift_mode="right" : Shift Time Audio to the Right ومنع تداخل الصوت.
shift_mode="left" : Shift Audio إلى اليسار ومنع تداخل الصوت ، ولكن كن حذرًا من المساحة المحدودة على الجانب الأيسر ، حيث قد تختفي بعض الصوت.
shift_mode="interpose" : Shift Audio إلى Mid Position ومنع اليمين واليسار من التداخل الصوتي. (ملاحظة: يمكن أن يكون هذا الوضع عالياً ، لذا استخدمه بحذر.)
shift_mode="left-overlap" : Shift Time Audio to to to to the to to to to to to to thetterlap.
shift_mode="interpose-overlap" : SHIFT AUDIO إلى MID FACE ، مما يسمح بالتداخل.
shift_limit=int or "str" : الحد من تحول الصوت ، استخدم عدد صحيح للمللي ثانية أو سلسلة مثل 2.5s للثانية

 from subtoaudio import SubToAudio

# shift mode with limit of 2 second to the right.

sub = SubToAudio ( fairseq_language = "vie" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = sub , tempo_mode = "overflow" , shift_mode = "right" , limit_shift = "2s" )

# shift audio to left position or, time before next subtitle appear

sub = SubToAudio ( fairseq_language = "fra" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = sub , shift_mode = "left-overlap" )

# shift to left, and limit shift only 1 sec.
sub = SubToAudio ( fairseq_language = "ind" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = sub , shift_mode = "left" , shift_limit = 1000 ) # 1000 = 1s

مثال اللحاء والسلحفاة

 from subtoaudio import SubToAudio

#  Random Speaker will give you weird result when using bark model with SubToAudio

# Bark random
sub = SubToAudio ( "tts_models/multilingual/multi-dataset/bark" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "overflow" )

# Tortoise random
sub = SubToAudio ( "tts_models/en/multi-dataset/tortoise-v2" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , shift_mode = "overflow" , preset = "fast" )

#  To use voice clone you need voice_dir and speaker paramater
#  Voice Clone expecting .wav or .npz file inside folder speaker_1
#  voice/speaker_1/hana.wav or voice/speaker_1/hana.npz
#  if your speaker folder only have .wav file, it will generate .npz file after you runing it.

sub = SubToAudio ( "tts_models/multilingual/multi-dataset/bark" )
subtitle = sub . subtitle ( "yoursubtitle.srt" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "overflow" , voice_dir = "voice/" , speaker = "speaker_1" )

# same with bark, the folder structure like this 'voice/speaker2/ron.wav'
sub = SubToAudio ( "tts_models/en/multi-dataset/tortoise-v2" )
subtitle = sub . subtitle ( "yoursubtitle.ass" )
sub . convert_to_audio ( sub_data = subtitle , tempo_mode = "overflow" , voice_dir = "voice/" , speaker = "speaker2" )