Gereksinimlerim this question'a benzer çünkü soru şu an 3 yaşındadır. Soruyu benimkiyle ilgili bilgilerle yeniden gönderiyorum, bir .wav (veya başka bir standart) alan bir uygulama oluşturmak istiyorum ses dosyası formatı) ve metne dönüştürür.Dikte Uygulaması kullanılarak Sphinx4
Konuşma Tanıma için sphinx4 kullanmaya karar verdim, Sfenks ile sağlanan Transcriber demosunu geliştirmeye çalışıyorum. Onun iyi ama Bu sadece belirli bir Dilbilgisi için çalışır (.gram ve .gxml dosyalarında yazılmış).
EDIT Bunu İngilizce ile birlikte kullanabilmek için? VoxForge_en_0.4 ile yapılandırmaya çalışıyorum. Config.XML dosyam benziyor: -
<?xml version="1.0" encoding="UTF-8"?>
<!--
Sphinx-4 Configuration file
-->
<!-- ******************************************************** -->
<!-- biship configuration file -->
<!-- ******************************************************** -->
<config>
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="absoluteBeamWidth" value="500"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="absoluteWordBeamWidth" value="20"/>
<property name="relativeWordBeamWidth" value="1E-60"/>
<property name="wordInsertionProbability" value="1E-16"/>
<property name="languageWeight" value="7.0"/>
<property name="silenceInsertionProbability" value=".1"/>
<property name="frontend" value="epFrontEnd"/>
<property name="recognizer" value="recognizer"/>
<property name="showCreations" value="false"/>
<!-- ******************************************************** -->
<!-- word recognizer configuration -->
<!-- ******************************************************** -->
<component name="recognizer"
type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder"/>
<propertylist name="monitors">
<item>accuracyTracker </item>
<item>speedTracker </item>
<item>memoryTracker </item>
<item>recognizerMonitor </item>
</propertylist>
</component>
<!-- ******************************************************** -->
<!-- The Decoder configuration -->
<!-- ******************************************************** -->
<component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
<property name="searchManager" value="wordPruningSearchManager"/>
<property name="featureBlockSize" value="50"/>
</component>
<!-- ******************************************************** -->
<!-- The Search Manager -->
<!-- ******************************************************** -->
<component name="wordPruningSearchManager"
type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
<property name="logMath" value="logMath"/>
<property name="linguist" value="lexTreeLinguist"/>
<property name="pruner" value="trivialPruner"/>
<property name="scorer" value="threadedScorer"/>
<property name="activeListManager" value="activeListManager"/>
<property name="growSkipInterval" value="0"/>
<property name="checkStateOrder" value="false"/>
<property name="buildWordLattice" value="true"/>
<property name="acousticLookaheadFrames" value="1.7"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Active Lists -->
<!-- ******************************************************** -->
<component name="activeListManager"
type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
<propertylist name="activeListFactories">
<item>standardActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
</propertylist>
</component>
<component name="standardActiveListFactory"
type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<component name="wordActiveListFactory"
type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Pruner -->
<!-- ******************************************************** -->
<component name="trivialPruner"
type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
<!-- ******************************************************** -->
<!-- TheScorer -->
<!-- ******************************************************** -->
<component name="threadedScorer"
type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}"/>
</component>
<!-- ******************************************************** -->
<!-- The linguist configuration -->
<!-- ******************************************************** -->
<component name="lexTreeLinguist"
type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
<property name="logMath" value="logMath"/>
<property name="acousticModel" value="wsj"/>
<property name="languageModel" value="trigramModel"/>
<property name="dictionary" value="dictionary"/>
<property name="addFillerWords" value="false"/>
<property name="fillerInsertionProbability" value="1E-10"/>
<property name="generateUnitStates" value="false"/>
<property name="wantUnigramSmear" value="true"/>
<property name="unigramSmearWeight" value="1"/>
<property name="wordInsertionProbability"
value="${wordInsertionProbability}"/>
<property name="silenceInsertionProbability"
value="${silenceInsertionProbability}"/>
<property name="languageWeight" value="${languageWeight}"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Dictionary configuration -->
<!-- ******************************************************** -->
<component name="dictionary"
type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
<property name="dictionaryPath"
value="file:src/voxforge-en-0.4/etc/cmudict.0.7a"/>
<property name="fillerPath"
value="file:src/voxforge-en-0.4/model_parameters/voxforge_en_sphinx.cd_cont_5000/noisedict"/>
<property name="addSilEndingPronunciation" value="false"/>
<property name="wordReplacement" value="<sil>"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Language Model configuration -->
<!-- ******************************************************** -->
<component name="trigramModel"
type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
<property name="unigramWeight" value=".5"/>
<property name="maxDepth" value="3"/>
<property name="logMath" value="logMath"/>
<property name="dictionary" value="dictionary"/>
<property name="location" value="file:src/voxforge-en-0.4/wsj5k.DMP"/>
<!-- <property name="location" value="file:src/voxforge-Language/language_model.arpaformat.DMP"/>-->
</component>
<!-- ******************************************************** -->
<!-- The acoustic model configuration -->
<!-- ******************************************************** -->
<component name="wsj"
type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
<property name="loader" value="wsjLoader"/>
<property name="unitManager" value="unitManager"/>
</component>
<component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
<property name="logMath" value="logMath"/>
<property name="unitManager" value="unitManager"/>
<!-- <property name="location" value="file:src/hub4opensrc.cd_continuous_8gau"/>-->
<property name="location" value="file:src/voxforge-en-0.4/model_parameters/voxforge_en_sphinx.cd_cont_5000" />
<property name="dataLocation" value=""/>
</component>
<!-- ******************************************************** -->
<!-- The unit manager configuration -->
<!-- ******************************************************** -->
<component name="unitManager"
type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
<!-- ******************************************************** -->
<!-- The frontend configuration -->
<!-- ******************************************************** -->
<component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
<propertylist name="pipeline">
<item>audioFileDataSource </item>
<item>dataBlocker </item>
<item>speechClassifier </item>
<item>speechMarker </item>
<item>nonSpeechDataFilter </item>
<item>preemphasizer </item>
<item>windower </item>
<item>fft </item>
<item>melFilterBank </item>
<item>dct </item>
<item>liveCMN </item>
<item>featureExtraction </item>
</propertylist>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
<component name="microphone"
type="edu.cmu.sphinx.frontend.util.Microphone">
<property name="closeBetweenUtterances" value="false"/>
</component>
<component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>
<component name="speechClassifier"
type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
<property name="threshold" value="13"/>
</component>
<component name="nonSpeechDataFilter"
type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>
<component name="speechMarker"
type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
<property name="speechTrailer" value="50"/>
</component>
<component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>
<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="liveCMN"
type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
<!-- ******************************************************* -->
<!-- monitors -->
<!-- ******************************************************* -->
<component name="accuracyTracker"
type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showRawResults" value="false"/>
<property name="showAlignedResults" value="false"/>
</component>
<component name="memoryTracker"
type="edu.cmu.sphinx.instrumentation.MemoryTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showDetails" value="false"/>
<property name="showSummary" value="false"/>
</component>
<component name="speedTracker"
type="edu.cmu.sphinx.instrumentation.SpeedTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="frontend" value="${frontend}"/>
<property name="showDetails" value="false"/>
</component>
<component name="recognizerMonitor"
type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
<property name="recognizer" value="${recognizer}"/>
<propertylist name="allocatedMonitors">
<item>configMonitor </item>
</propertylist>
</component>
<component name="configMonitor"
type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
<property name="showConfig" value="false"/>
</component>
<!-- ******************************************************* -->
<!-- Miscellaneous components -->
<!-- ******************************************************* -->
<component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001"/>
<property name="useAddTable" value="true"/>
</component>
</config>
Yapılandırmamda herhangi bir sorun var mı? Plz yardımı ....
Bu sorunun net bir odak noktası yoktur ve oldukça faydasızdır. –
Neleri anlayamıyorsunuz ....? niyet çizgiden açık: ".wav (veya başka bir standart ses dosyası formatı) alan ve onu metne dönüştüren bir uygulama oluşturmak istiyorum." Sorunun geri kalanından daha fazlası için ne yaptım ve hangi hatalarla karşılaştığımı açıkladım. – aProgrammer
"Bir .wav (veya başka bir standart ses dosyası formatı) alan ve onu metne dönüştüren bir uygulama oluşturmak istiyorum" ifadesi, bu bir soru değil.Ayrıca kullanmamanız gereken Voxforge modelini kullanmayı da yanıltınız. Ayrıca, asla sorunun ne olduğunu tam olarak tarif edemezsiniz. Ben kullanabilir olandan –