content/browser/speech/speech_recognition_manager_impl.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
   6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
   7
   8 #include <map>
   9 #include <string>
  10
  11 #include "base/basictypes.h"
  12 #include "base/callback.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/memory/weak_ptr.h"
  15 #include "content/browser/renderer_host/media/media_stream_requester.h"
  16 #include "content/public/browser/speech_recognition_event_listener.h"
  17 #include "content/public/browser/speech_recognition_manager.h"
  18 #include "content/public/browser/speech_recognition_session_config.h"
  19 #include "content/public/browser/speech_recognition_session_context.h"
  20 #include "content/public/common/speech_recognition_error.h"
  21
  22 namespace media {
  23 class AudioManager;
  24 }
  25
  26 namespace content {
  27 class BrowserMainLoop;
  28 class MediaStreamManager;
  29 class MediaStreamUIProxy;
  30 class SpeechRecognitionManagerDelegate;
  31 class SpeechRecognizer;
  32
  33 // This is the manager for speech recognition. It is a single instance in
  34 // the browser process and can serve several requests. Each recognition request
  35 // corresponds to a session, initiated via |CreateSession|.
  36 //
  37 // In any moment, the manager has a single session known as the primary session,
  38 // |primary_session_id_|.
  39 // This is the session that is capturing audio, waiting for user permission,
  40 // etc. There may also be other, non-primary, sessions living in parallel that
  41 // are waiting for results but not recording audio.
  42 //
  43 // The SpeechRecognitionManager has the following responsibilities:
  44 //  - Handles requests received from various render views and makes sure only
  45 //    one of them accesses the audio device at any given time.
  46 //  - Handles the instantiation of SpeechRecognitionEngine objects when
  47 //    requested by SpeechRecognitionSessions.
  48 //  - Relays recognition results/status/error events of each session to the
  49 //    corresponding listener (demuxing on the base of their session_id).
  50 //  - Relays also recognition results/status/error events of every session to
  51 //    the catch-all snoop listener (optionally) provided by the delegate.
  52 class CONTENT_EXPORT SpeechRecognitionManagerImpl :
  53     public NON_EXPORTED_BASE(SpeechRecognitionManager),
  54     public SpeechRecognitionEventListener {
  55  public:
  56   // Returns the current SpeechRecognitionManagerImpl or NULL if the call is
  57   // issued when it is not created yet or destroyed (by BrowserMainLoop).
  58   static SpeechRecognitionManagerImpl* GetInstance();
  59
  60   // SpeechRecognitionManager implementation.
  61   virtual int CreateSession(
  62       const SpeechRecognitionSessionConfig& config) OVERRIDE;
  63   virtual void StartSession(int session_id) OVERRIDE;
  64   virtual void AbortSession(int session_id) OVERRIDE;
  65   virtual void AbortAllSessionsForListener(
  66         SpeechRecognitionEventListener* listener) OVERRIDE;
  67   virtual void AbortAllSessionsForRenderView(int render_process_id,
  68                                              int render_view_id) OVERRIDE;
  69   virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
  70   virtual const SpeechRecognitionSessionConfig& GetSessionConfig(
  71       int session_id) const OVERRIDE;
  72   virtual SpeechRecognitionSessionContext GetSessionContext(
  73       int session_id) const OVERRIDE;
  74   virtual int GetSession(int render_process_id,
  75                          int render_view_id,
  76                          int request_id) const OVERRIDE;
  77   virtual bool HasAudioInputDevices() OVERRIDE;
  78   virtual base::string16 GetAudioInputDeviceModel() OVERRIDE;
  79   virtual void ShowAudioInputSettings() OVERRIDE;
  80
  81   // SpeechRecognitionEventListener methods.
  82   virtual void OnRecognitionStart(int session_id) OVERRIDE;
  83   virtual void OnAudioStart(int session_id) OVERRIDE;
  84   virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
  85   virtual void OnSoundStart(int session_id) OVERRIDE;
  86   virtual void OnSoundEnd(int session_id) OVERRIDE;
  87   virtual void OnAudioEnd(int session_id) OVERRIDE;
  88   virtual void OnRecognitionEnd(int session_id) OVERRIDE;
  89   virtual void OnRecognitionResults(
  90       int session_id, const SpeechRecognitionResults& result) OVERRIDE;
  91   virtual void OnRecognitionError(
  92       int session_id, const SpeechRecognitionError& error) OVERRIDE;
  93   virtual void OnAudioLevelsChange(int session_id, float volume,
  94                                    float noise_volume) OVERRIDE;
  95
  96   SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); }
  97
  98  protected:
  99   // BrowserMainLoop is the only one allowed to istantiate and free us.
 100   friend class BrowserMainLoop;
 101   // Needed for dtor.
 102   friend struct base::DefaultDeleter<SpeechRecognitionManagerImpl>;
 103   SpeechRecognitionManagerImpl(media::AudioManager* audio_manager,
 104                                MediaStreamManager* media_stream_manager);
 105   virtual ~SpeechRecognitionManagerImpl();
 106
 107  private:
 108   // Data types for the internal Finite State Machine (FSM).
 109   enum FSMState {
 110     SESSION_STATE_IDLE = 0,
 111     SESSION_STATE_CAPTURING_AUDIO,
 112     SESSION_STATE_WAITING_FOR_RESULT,
 113     SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT
 114   };
 115
 116   enum FSMEvent {
 117     EVENT_ABORT = 0,
 118     EVENT_START,
 119     EVENT_STOP_CAPTURE,
 120     EVENT_AUDIO_ENDED,
 121     EVENT_RECOGNITION_ENDED,
 122     EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED
 123   };
 124
 125   struct Session {
 126     Session();
 127     ~Session();
 128
 129     int id;
 130     bool abort_requested;
 131     bool listener_is_active;
 132     SpeechRecognitionSessionConfig config;
 133     SpeechRecognitionSessionContext context;
 134     scoped_refptr<SpeechRecognizer> recognizer;
 135     scoped_ptr<MediaStreamUIProxy> ui;
 136   };
 137
 138   // Callback issued by the SpeechRecognitionManagerDelegate for reporting
 139   // asynchronously the result of the CheckRecognitionIsAllowed call.
 140   void RecognitionAllowedCallback(int session_id,
 141                                   bool ask_user,
 142                                   bool is_allowed);
 143
 144   // Callback to get back the result of a media request. |devices| is an array
 145   // of devices approved to be used for the request, |devices| is empty if the
 146   // users deny the request.
 147   void MediaRequestPermissionCallback(int session_id,
 148                                       const MediaStreamDevices& devices,
 149                                       scoped_ptr<MediaStreamUIProxy> stream_ui);
 150
 151   // Entry point for pushing any external event into the session handling FSM.
 152   void DispatchEvent(int session_id, FSMEvent event);
 153
 154   // Defines the behavior of the session handling FSM, selecting the appropriate
 155   // transition according to the session, its current state and the event.
 156   void ExecuteTransitionAndGetNextState(Session* session,
 157                                         FSMState session_state,
 158                                         FSMEvent event);
 159
 160   // Retrieves the state of the session, enquiring directly the recognizer.
 161   FSMState GetSessionState(int session_id) const;
 162
 163   // The methods below handle transitions of the session handling FSM.
 164   void SessionStart(const Session& session);
 165   void SessionAbort(const Session& session);
 166   void SessionStopAudioCapture(const Session& session);
 167   void ResetCapturingSessionId(const Session& session);
 168   void SessionDelete(Session* session);
 169   void NotFeasible(const Session& session, FSMEvent event);
 170
 171   bool SessionExists(int session_id) const;
 172   Session* GetSession(int session_id) const;
 173   SpeechRecognitionEventListener* GetListener(int session_id) const;
 174   SpeechRecognitionEventListener* GetDelegateListener() const;
 175   int GetNextSessionID();
 176
 177   media::AudioManager* audio_manager_;
 178   MediaStreamManager* media_stream_manager_;
 179   typedef std::map<int, Session*> SessionsTable;
 180   SessionsTable sessions_;
 181   int primary_session_id_;
 182   int last_session_id_;
 183   bool is_dispatching_event_;
 184   scoped_ptr<SpeechRecognitionManagerDelegate> delegate_;
 185
 186   // Used for posting asynchronous tasks (on the IO thread) without worrying
 187   // about this class being destroyed in the meanwhile (due to browser shutdown)
 188   // since tasks pending on a destroyed WeakPtr are automatically discarded.
 189   base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_;
 190 };
 191
 192 }  // namespace content
 193
 194 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_