Всем привет.
Сильно застрял.
Не могу реализовать поиск фраз в poketsphinx
Делаю так:
Код:
def pocketsphinx_decode(audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False):
if isinstance(language, str): # directory containing language data
language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), language)
acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
language_model_file = os.path.join(language_directory, "language-model.lm.bin")
phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
# create decoder object
config = pocketsphinx.Decoder.default_config()
config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files
config.set_string("-lm", language_model_file)
config.set_string("-dict", phoneme_dictionary_file)
config.set_string("-logfn", os.devnull) # disable logging (logging causes unwanted output in terminal)
config.set_string("-kws_threshold", "1e-5" )
# config.set_string("-keyphrase", "пятьдесят")
decoder = pocketsphinx.Decoder(config)
# obtain audio data
raw_data = audio_data.get_wav_data(convert_rate=16000,
convert_width=2) # the included language models require audio to be 16-bit mono 16 kHz in little-endian format
# keyword_entries=[("два",1)]
if keyword_entries is not None: # explicitly specified set of keywords
with open("keyfile",mode='w') as f:
# generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
f.writelines(
"{} /1e{}/\n".format(keyword, int(100 * sensitivity - 110)) for keyword, sensitivity in keyword_entries)
f.close()
#f.flush()
# perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
decoder.set_kws("keywords", f.name)
decoder.set_search("keywords")
#test = decoder.get_search()
#test2 = decoder.get_kws("keyfile")
decoder.start_utt() # begin utterance processing
decoder.process_raw(raw_data, False,True) # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
decoder.end_utt() # stop utterance processing
else:
decoder.start_utt() # begin utterance processing
decoder.process_raw(raw_data, False,
True) # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
decoder.end_utt() # stop utterance processing
if show_all: return decoder
# return results
hypothesis = decoder.hyp()
print(hypothesis.hypstr)
При простом распознавании все работает отлично.
Но, когда я вставляю поиск по словам
keyword_entries=[("два",1)]
Распознание не происходит.
Причем с английской моделью все работает отлично.
Может кто сталкивался или может в принципе посоветовать дельную русскую модель для Poketsphinx?