一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码 / 憋错料

本人小试牛刀，试验了一下用c#.net3.0 WPF技术开发了一个语音识别程序，

windows.cs

using System;
using System.Collections.Generic;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;

using System.Reflection;
using System.Windows.Threading;
using System.IO;
using System.Xml;
using System.Collections.ObjectModel;
using System.ComponentModel;

using System.Speech.Recognition;
using System.Speech.Recognition.SrgsGrammar;
using System.Speech.Synthesis;

namespace speechReco
{
    /// <summary>
    /// Interaction logic for Window1.xaml
    /// </summary>

public partial class Window1 : System.Windows.Window
    {
        private SpeechRecognizer sharedRecognizer;
        private SpeechRecognitionEngine appRecognizer;
        private SrgsDocument sdCmnrules;

public Window1()
{
InitializeComponent();

sharedRecognizer = new SpeechRecognizer();
            sharedRecognizer.AudioLevelUpdated += new EventHandler<AudioLevelUpdatedEventArgs>(sharedRecognizer_AudioLevelUpdated);
            sharedRecognizer.AudioSignalProblemOccurred += new EventHandler<AudioSignalProblemOccurredEventArgs>(sharedRecognizer_AudioSignalProblemOccurred);
            sharedRecognizer.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sharedRecognizer_AudioStateChanged);
            sharedRecognizer.EmulateRecognizeCompleted += new EventHandler<EmulateRecognizeCompletedEventArgs>(sharedRecognizer_EmulateRecognizeCompleted);
            sharedRecognizer.LoadGrammarCompleted += new EventHandler<LoadGrammarCompletedEventArgs>(sharedRecognizer_LoadGrammarCompleted);
            sharedRecognizer.RecognizerUpdateReached += new EventHandler<RecognizerUpdateReachedEventArgs>(sharedRecognizer_RecognizerUpdateReached);
            sharedRecognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(sharedRecognizer_SpeechDetected);
            sharedRecognizer.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(sharedRecognizer_SpeechHypothesized);
            sharedRecognizer.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(sharedRecognizer_SpeechRecognitionRejected);
            sharedRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sharedRecognizer_SpeechRecognized);
            sharedRecognizer.StateChanged += new EventHandler<System.Speech.Recognition.StateChangedEventArgs>(sharedRecognizer_StateChanged);

//load SRGS library
            byte[] ba = speechReco.Properties.Resources.cmnrules;
            MemoryStream ms = new MemoryStream(ba);
            ms.Position = 0;
            XmlReader xr = XmlReader.Create(ms);
            sdCmnrules = new SrgsDocument(xr);
            //populate ComboBox
            foreach(SrgsRule rule in sdCmnrules.Rules)
            {
                if (rule.Scope == SrgsRuleScope.Public)
                {
                    cbRules.Items.Add(rule.Id);
                }
            }
            //default to integer rule
            cbRules.SelectedValue = "integer";
            cbRules.SelectionChanged += new SelectionChangedEventHandler(cbRules_SelectionChanged);

this.btnSharedColor.Click += new RoutedEventHandler(btnSharedColor_Click);
            this.btnInProcColor.Click += new RoutedEventHandler(btnInProcColor_Click);
            this.btnTapDictation.PreviewMouseLeftButtonDown += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonDown);
            this.btnTapDictation.PreviewMouseLeftButtonUp += new MouseButtonEventHandler(btnTapDictation_PreviewMouseLeftButtonUp);
            this.btnSrgs.Click += new RoutedEventHandler(btnSrgs_Click);
            this.btnAdvGrammarBuilder.Click += new RoutedEventHandler(btnAdvGrammarBuilder_Click);
            this.btnWavFile.Click += new RoutedEventHandler(btnWavFile_Click);
            this.btnSynthPhonemes.Click += new RoutedEventHandler(btnSynthPhonemes_Click);
            this.btnEnable.Click += new RoutedEventHandler(btnEnable_Click);
            this.btnDisable.Click += new RoutedEventHandler(btnDisable_Click);
            this.btnUnload.Click += new RoutedEventHandler(btnUnload_Click);
            this.btnEmulate.Click += new RoutedEventHandler(btnEmulate_Click);
        }

void btnEmulate_Click(object sender, RoutedEventArgs e)
        {
            //sharedRecognizer.EmulateRecognize("green");
            sharedRecognizer.EmulateRecognizeAsync("green");
            //sharedRecognizer.EmulateRecognize("stop listening");
        }

void btnUnload_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.UnloadAllGrammars();
        }

void btnDisable_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = false;
        }

void btnEnable_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
        }

string recoPhonemes;
        void btnSynthPhonemes_Click(object sender, RoutedEventArgs e)
        {
            //this is a trick to figure out phonemes used by synthesis engine

            //txt to wav
            MemoryStream audioStream = new MemoryStream();
            SpeechSynthesizer synth = new SpeechSynthesizer();
            synth.SetOutputToWaveStream(audioStream);
            PromptBuilder pb = new PromptBuilder();
            pb.AppendBreak(PromptBreak.ExtraSmall); //‘e‘ wont be recognized if this is large, or non-existent?
            synth.Speak(pb);
            string textToSpeak = this.txtSynthTxt.Text.Trim();
            synth.Speak(textToSpeak);
            //synth.Speak(pb);
            synth.SetOutputToNull();
            audioStream.Position = 0;

//now wav to txt (for reco phonemes)
            recoPhonemes = String.Empty;
            GrammarBuilder gb = new GrammarBuilder(textToSpeak);
            Grammar g = new Grammar(gb); //TODO the hard letters to recognize are ‘g‘ and ‘e‘
            SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
            reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
            reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
            reco.UnloadAllGrammars(); //only use the one word grammar
            reco.LoadGrammar(g);
            reco.SetInputToWaveStream(audioStream);
            RecognitionResult rr = reco.Recognize();
            reco.SetInputToNull();
            if (rr != null)
            {
                recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
            }
            txtRecoPho.Text = recoPhonemes;
        }

void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

void btnWavFile_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = false;

appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToWaveFile("spoken.wav");
            appRecognizer.LoadGrammar(new DictationGrammar());
            RecognitionResult rr = appRecognizer.Recognize();
            appRecognizer.SetInputToNull();
            if (rr == null)
            {
                MessageBox.Show("null result?");
            }
            else
            {
                //NOTE in-process recognir cannot send feedback to microphone bar
                //SpeechUI.SendTextFeedback(rr, rr.Text, true);

//show phoneme result
string phonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
txtRecoPho.Text = phonemes;

//show text result
                MessageBox.Show(rr.Text);
            }
            appRecognizer.Dispose();
        }

public enum WordType
        {
            Text,
            Normalized = Text,
            Lexical,
            Pronunciation
        }

public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
        {
            string text = "";
            foreach (RecognizedWordUnit word in words)
            {
                string wordText = "";
                if (type == WordType.Text || type == WordType.Normalized)
                {
                    wordText = word.Text;
                }
                else if (type == WordType.Lexical)
                {
                    wordText = word.LexicalForm;
                }
                else if (type == WordType.Pronunciation)
                {
                    wordText = word.Pronunciation;
                }
                else
                {
                    throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
                }
                //Use display attribute

if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
                {
                    wordText += " ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
                {
                    wordText += " ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
                {
                    wordText = wordText.TrimStart();
                }
                if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
                {
                    wordText = wordText.TrimEnd();
                }

text += wordText;

}
return text;
}

void btnAdvGrammarBuilder_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

//from http://msdn.microsoft.com/msdnmag/issues/06/01/speechinWindowsVista/#S5
//[I‘d like] a [<size>] [<crust>] [<topping>] pizza [please]

//build the core set of choices
            Choices sizes = new Choices("small", "regular", "large");
            Choices crusts = new Choices("thin crust", "thick crust");
            Choices toppings = new Choices("vegetarian", "pepperoni", "cheese");

SemanticResultKey srkSize = new SemanticResultKey("size", sizes.ToGrammarBuilder());
            SemanticResultKey srkCrust = new SemanticResultKey("crust", crusts.ToGrammarBuilder());
            SemanticResultKey srkTopping = new SemanticResultKey("topping", toppings.ToGrammarBuilder());
            SemanticResultValue srvSize = new SemanticResultValue(srkSize, "regular");
            SemanticResultValue srvCrust = new SemanticResultValue(srkCrust, "thick crust");

//build the permutations of choices...
            //choose all three
            GrammarBuilder sizeCrustTopping = new GrammarBuilder();
            //sizeCrustTopping.AppendChoices(sizes, "size");
            //sizeCrustTopping.AppendChoices(crusts, "crust");
            //sizeCrustTopping.AppendChoices(toppings, "topping");
            sizeCrustTopping.Append(srkSize);
            sizeCrustTopping.Append(srkCrust);
            sizeCrustTopping.Append(srkTopping);

//choose size and topping, and assume thick crust
            GrammarBuilder sizeAndTopping = new GrammarBuilder();
            //sizeAndTopping.AppendChoices(sizes, "size");
            //sizeAndTopping.AppendChoices(toppings, "topping");
            //sizeAndTopping.AppendResultKeyValue("crust", "thick crust");
            sizeAndTopping.Append(srkSize);
            sizeAndTopping.Append(srkTopping);
            //TODO how to set default semantic value for "crust"?
            //sizeAndTopping.Append(srvCrust);
            //sizeAndTopping.Append(new SemanticResultValue(crusts.ToGrammarBuilder(), "thick crust"));
            //sizeAndTopping.Append(new SemanticResultValue("crust", "thick crust"));
            //sizeAndTopping.Append(new SemanticResultValue("thick crust"));
            //sizeAndTopping.Append(new SemanticResultKey("crust", "thick crust"));

//choose topping only, and assume the rest
            GrammarBuilder toppingOnly = new GrammarBuilder();
            //toppingOnly.AppendChoices(toppings, "topping");
            //toppingOnly.AppendResultKeyValue("size", "regular");
            //toppingOnly.AppendResultKeyValue("crust", "thick crust");
            toppingOnly.Append(srkTopping);
            //TODO how to set default semantic value for "size" and "crust"?
            //toppingOnly.Append(srvSize);
            //toppingOnly.Append(srvCrust);
            //toppingOnly.Append(new SemanticResultKey("size", "regular"));
            //toppingOnly.Append(new SemanticResultKey("crust", "thick crust"));

//assemble the permutations
            Choices permutations = new Choices();
            permutations.Add(sizeCrustTopping);
            permutations.Add(sizeAndTopping);
            permutations.Add(toppingOnly);

//now build the complete pattern...
            GrammarBuilder pizzaRequest = new GrammarBuilder();
            //pre-amble "[I‘d like] a"
            pizzaRequest.Append(new Choices("I‘d like a", "a"));
            //permutations "[<size>] [<crust>] [<topping>]"
            pizzaRequest.Append(permutations);
            //post-amble "pizza [please]"
            pizzaRequest.Append(new Choices("pizza", "pizza please"));
            string debug = pizzaRequest.DebugShowPhrases;

//create the pizza grammar
Grammar pizzaGrammar = new Grammar(pizzaRequest);

//attach the event handler
pizzaGrammar.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(pizzaGrammar_SpeechRecognized);

//load the grammar into the recognizer
sharedRecognizer.LoadGrammar(pizzaGrammar);

}

void pizzaGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            StringBuilder resultString = new StringBuilder();
            resultString.Append("Raw text result: ");
            resultString.AppendLine(e.Result.Text);
            resultString.Append("Size: ");
            resultString.AppendLine(e.Result.Semantics["size"].Value.ToString());
            resultString.Append("Crust: ");
            resultString.AppendLine(e.Result.Semantics["crust"].Value.ToString());
            resultString.Append("Topping: ");
            resultString.AppendLine(
                e.Result.Semantics["topping"].Value.ToString());
            MessageBox.Show(resultString.ToString());
        }

void cbRules_SelectionChanged(object sender, SelectionChangedEventArgs e)
        {
            //TODO
        }

void btnSrgs_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

string ruleName = (string) cbRules.SelectedValue;
//SrgsRule rule = sdCmnrules.Rules[ruleName];

Grammar grammarSrgs = new Grammar(sdCmnrules, ruleName);
grammarSrgs.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarSrgs_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarSrgs);
MessageBox.Show("listening for user input based on the selected rule : " + ruleName);
}

void grammarSrgs_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //send text to microphone bar
            SpeechUI.SendTextFeedback(e.Result, e.Result.Text, true);
            //send actual numeric value to TextBox on form
            if (e.Result.Semantics.Value != null)
            {
                this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Semantics.Value.ToString());
            }
        }

void btnTapDictation_PreviewMouseLeftButtonDown(object sender, MouseButtonEventArgs e)
{
sharedRecognizer.Enabled = false;

dictationResult = String.Empty;
            appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToDefaultAudioDevice();
            appRecognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(appRecognizer_SpeechRecognized);
            DictationGrammar dg;
            if (cbSpelling.IsChecked == false)
            {
                dg = new DictationGrammar();
            }
            else
            {
                dg = new DictationGrammar("grammar:dictation#spelling");
            }
            appRecognizer.LoadGrammar(dg);
            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);
        }

string dictationResult;
        void appRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //on UI thread
            dictationResult += e.Result.Text;
            txtReco.Text = dictationResult;
        }

void btnTapDictation_PreviewMouseLeftButtonUp(object sender, MouseButtonEventArgs e)
        {
            appRecognizer.RecognizeAsyncStop();
            appRecognizer.Dispose();
        }

void btnInProcColor_Click(object sender, RoutedEventArgs e)
{
sharedRecognizer.Enabled = false;

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);
Grammar grammarColors = new Grammar(gb);
grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

appRecognizer = new SpeechRecognitionEngine();
            appRecognizer.SetInputToDefaultAudioDevice();
            appRecognizer.LoadGrammar(grammarColors);
            appRecognizer.LoadGrammar(new DictationGrammar());
            appRecognizer.RecognizeAsync(RecognizeMode.Multiple);

MessageBox.Show("listening for you to say a color (e.g. Green)");
}

private Choices GetColorChoices()
        {
            //build a grammar list of colors
            Choices cColor = new Choices();

Type t = typeof(Colors);
            MemberInfo[] mia = t.GetMembers(BindingFlags.Public | BindingFlags.Static);
            foreach (MemberInfo mi in mia)
            {
                if (mi.Name.StartsWith("get_") == true)
                    continue;
                cColor.Add(mi.Name);
            }

return cColor;
}

void btnSharedColor_Click(object sender, RoutedEventArgs e)
        {
            sharedRecognizer.Enabled = true;
            sharedRecognizer.UnloadAllGrammars();

Choices cColor = GetColorChoices();

GrammarBuilder gb = new GrammarBuilder(cColor);
Grammar grammarColors = new Grammar(gb);
grammarColors.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(grammarColors_SpeechRecognized);

sharedRecognizer.LoadGrammar(grammarColors);
MessageBox.Show("listening for you to say a color (e.g. Green)");
}

void grammarColors_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //not on UI thread
            //txtReco.Text = e.Result.Text;
            //need to use Dispatcher to get back on UI thread

//TODO cannot convert from ‘anonymous method‘ to ‘System.Delegate‘ ... WTF?
            //this.Dispatcher.Invoke(DispatcherPriority.Render,
            //    delegate()
            //    {
            //        txtReco.Text = e.Result.Text;
            //    });

//http://romanski.livejournal.com/1761.html
            this.Dispatcher.Invoke(DispatcherPriority.Render,
            (System.Windows.Forms.MethodInvoker) delegate
            {
                txtReco.Text = e.Result.Text;
            });

            //this.Dispatcher.Invoke(DispatcherPriority.Render, new UpdateTxtRecoDelegate(UpdateTextReco), e.Result.Text);
        }

        delegate void UpdateTxtRecoDelegate(string arg);
        public void UpdateTextReco(string arg)
        {
            txtReco.Text = arg;
        }

#region SHARED_RECOGNIZER_EVENTS
        void sharedRecognizer_StateChanged(object sender, System.Speech.Recognition.StateChangedEventArgs e)
        {
            System.Console.WriteLine("StateChanged : " + e.RecognizerState.ToString());
        }

void sharedRecognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            //on UI thread
            System.Console.WriteLine("SpeechRecognized : " + e.Result.Text);
            //txtReco.Text = e.Result.Text;
        }

void sharedRecognizer_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            System.Console.WriteLine("SpeechRecognitionRejected : " + e.Result.Text);
        }

void sharedRecognizer_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            System.Console.WriteLine("SpeechHypothesized : " + e.Result.Text);
        }

void sharedRecognizer_SpeechDetected(object sender, SpeechDetectedEventArgs e)
        {
            System.Console.WriteLine("SpeechDetected : " + e.AudioPosition.TotalMilliseconds.ToString());
        }

void sharedRecognizer_RecognizerUpdateReached(object sender, RecognizerUpdateReachedEventArgs e)
        {
            System.Console.WriteLine("RecognizerUpdateReached : " + e.AudioPosition.TotalMilliseconds.ToString());
        }

void sharedRecognizer_LoadGrammarCompleted(object sender, LoadGrammarCompletedEventArgs e)
        {
            System.Console.WriteLine("LoadGrammarCompleted : " + e.Grammar.Name);
        }

void sharedRecognizer_EmulateRecognizeCompleted(object sender, EmulateRecognizeCompletedEventArgs e)
        {
            if (e.Result != null)
            {
                System.Console.WriteLine("EmulateRecognizeCompleted : " + e.Result.Text);
            }
            else
            {
                System.Console.WriteLine("EmulateRecognizeCompleted : null result");
            }
        }

void sharedRecognizer_AudioStateChanged(object sender, AudioStateChangedEventArgs e)
        {
            System.Console.WriteLine("AudioStateChanged : " + e.AudioState.ToString());
        }

void sharedRecognizer_AudioSignalProblemOccurred(object sender, AudioSignalProblemOccurredEventArgs e)
        {
            System.Console.WriteLine("AudioSignalProblemOccurred : " + e.AudioSignalProblem.ToString());
        }

void sharedRecognizer_AudioLevelUpdated(object sender, AudioLevelUpdatedEventArgs e)
        {
            //System.Console.WriteLine("AudioLevelUpdated : " + e.AudioLevel.ToString());
        }
        #endregion

}
}

需要的留下Email,我给大家发

再分享一下我老师大神的人工智能教程吧。零基础！通俗易懂！风趣幽默！还带黄段子！希望你也加入到我们人工智能的队伍中来！https://blog.csdn.net/jiangjunshow

原文地址：https://www.cnblogs.com/wicnwicnwh/p/10308278.html

时间： 2025-01-05 05:24:52

一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码

一个基于Windows Vista speech API5 3以及WPF技术的语音识别代码的相关文章

一个基于Windows Vista speech API5 3以及WPF技术的语音朗读代码

Wizard Framework：一个自己开发的基于Windows Forms的向导开发框架

雨林木风 Windows Vista ULTIMATE With SP1 精简版 Y2.1

Windows Vista，生而伟大

封装一个基于NLog+NLog.Mongo的日志记录工具类LogUtil

基于Windows Server 2012 R2部署KMS服务器

在 Windows Vista、Windows 7 和 Windows Server 2008 上设置 SharePoint 2010 开发环境

基于Windows Sdk 与visual C++2008 在微软平台上构架自己的语音识别引擎（适用于windows 2

【Neko C Engine】一个基于 C 语言的游戏基础代码库