matlab工具箱TTSBOX源码中文分析

阅读 147

2022-11-25


function wav = tts(txt,voice,pace,fs)
%TTS text to speech.
% TTS (TXT) synthesizes speech from string TXT, and speaks it. The audio
% format is mono, 16 bit, 16k Hz by default.
%
% WAV = TTS(TXT) does not vocalize but output to the variable WAV.
%
% TTS(TXT,VOICE) uses the specific voice. Use TTS('','List') to see a
% list of availble voices. Default is the first voice.
%
% TTS(...,PACE) set the pace of speech to PACE. PACE ranges from
% -10 (slowest) to 10 (fastest). Default 0.
%
% TTS(...,FS) set the sampling rate of the speech to FS kHz. FS must be
% one of the following: 8000, 11025, 12000, 16000, 22050, 24000, 32000,
% 44100, 48000. Default 16.
%
% This function requires the Microsoft Win32 Speech API (SAPI).
%
% Examples:
% % Speak the text;
% tts('I can speak.');
% % List availble voices;
% tts('I can speak.','List');
% % Do not speak out, store the speech in a variable;
% w = tts('I can speak.',[],-4,44100);
% wavplay(w,44100);
%
% See also WAVREAD, WAVWRITE, WAVPLAY.

% Written by Siyi Deng; 12-21-2007;

if ~ispc, error('Microsoft Win32 SAPI is required.'); end
if ~ischar(txt), error('First input must be string.'); end

SV = actxserver('SAPI.SpVoice');%创建一个object对象COM.SAPI_SpVoice
TK = invoke(SV,'GetVoices');%创建一个object对象Interface.928....
if nargin > 1 %判断参数多余一个
% Select voice;%根据文字选择中文发音对象或者英文发音对象。或这系统中的其他国家语言发音对象
for k = 0:TK.Count-1
if strcmpi(voice,TK.Item(k).GetDescription)
SV.Voice = TK.Item(k);
break;
elseif strcmpi(voice,'list')
disp(TK.Item(k).GetDescription);
end
end
% Set pace;
if nargin > 2
if isempty(pace), pace = 0; end %如果pace为空那么默认不加速
if abs(pace) > 10, pace = sign(pace)*10; end %如果加/减速大于10那么只取左右端的极限
SV.Rate = pace;%接口语音速率设置
end
end

if nargin < 4 || ~ismember(fs,[8000,11025,12000,16000,22050,24000,32000,...
44100,48000]), fs = 16000; end %如果没有采样频率或者采样频率取值不再规定成员里面,那么使用默认值 16kHz

if nargout > 0 %如果函数有输入参数
% Output variable;
MS = actxserver('SAPI.SpMemoryStream');%创建COM.API_SpMemoryStream对象
MS.Format.Type = sprintf('SAFT%dkHz16BitMono',fix(fs/1000));%设置语音生成频率kHz fix(fs/1000)取商部分
SV.AudioOutputStream = MS; %把生成接口MS生成的语音流<-连接->SVobject输出对象
end

invoke(SV,'Speak',txt);%调用COM的服务对象[SV]使用方法['Speak']处理文本数据[txt]

if nargout > 0 %r如果函数有输入数据
% Convert uint8 to double precision;
wav = reshape(double(invoke(MS,'GetData')),2,[])';
%invoke(MS,'GetData')调用返回矩阵信息
%根据数据把数据填成提取[2,:]矩阵先列后行,奇数提取,偶数提取
wav = (wav(:,2)*256+wav(:,1))/32768;
%2字节提取,归一化计算获得单声道wav文件 2^16=32768 (量化16位)
wav(wav >= 1) = wav(wav >= 1)-2;
%处理数据后大于等于1的相当于对值为复数的进行处理
%16bit =2字节。小端模式(低字节 高字节)
%存放数据在电脑中用补码表示,
%样例: 05 34 =0 0110100 00000101 显然符号位0为正数(正数三码一样)值为0x(3405)
% 05 80=1 0000000 00000101显然符号位1为负数,值为5-32768
delete(MS);%释放空间
clear MS;
end

delete(SV); %释放空间
clear SV TK;
pause(0.2);

end % TTS;


精彩评论(0)

0 0 举报