working example (add cylinder, clone, delete... rest is trickier)

1 year ago · 5de00a7da6
parent a6588256fe
commit 5de00a7da6
2 changed files with 94 additions and 39 deletions
--- a/index.html
+++ b/index.html
@ -23,7 +23,7 @@
    <!-- still experimenting, see webdav.html -->
    <script src='dependencies/webdav.js'></script> 

-    <script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.5/dist/vosk.js"></script> 
+    <script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.8/dist/vosk.js"></script>

    <!-- replacing with local copies as CDNs are like unpkg tend to be slow
    <script type="module" src="https://unpkg.com/immers-client/dist/destination.bundle.js"></script>
@ -2165,9 +2165,10 @@ function addAllPrimitives(){
 }

 function startExperience(){
+	startVoiceRecognition()
        if (AFRAME.utils.device.checkHeadsetConnected())
 		AFRAME.scenes[0].enterVR();
-	document.querySelector("#snapping-sound").components.sound.playSound();
+	//document.querySelector("#snapping-sound").components.sound.playSound();
 	document.querySelector("#mainbutton").style.display = "none"
 }

@ -2318,42 +2319,58 @@ consider pick then apply, i.e changeColorLastId() but for next Id

 */
 
-async function init() { 
-    const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/../content/vosk-model-small-en-us-0.15.zip'); 
-    //const model = await Vosk.createModel('model.tar.gz'); 
- 
-    const recognizer = new model.KaldiRecognizer(); 
-    recognizer.on("result", (message) => { 
-        console.log(`Result: ${message.result.text}`); 
-    }); 
-    recognizer.on("partialresult", (message) => { 
-        console.log(`Partial result: ${message.result.partial}`); 
-    }); 
-     
-    const mediaStream = await navigator.mediaDevices.getUserMedia({ 
-        video: false, 
-        audio: { 
-            echoCancellation: true, 
-            noiseSuppression: true, 
-            channelCount: 1, 
-            sampleRate: 16000 
-        }, 
-    }); 
-     
-    const audioContext = new AudioContext(); 
-    const recognizerNode = audioContext.createScriptProcessor(4096, 1, 1) 
-    recognizerNode.onaudioprocess = (event) => { 
-        try { 
-            recognizer.acceptWaveform(event.inputBuffer) 
-        } catch (error) { 
-            console.error('acceptWaveform failed', error) 
-        } 
-    } 
-    const source = audioContext.createMediaStreamSource(mediaStream); 
-    source.connect(recognizerNode); 
-} 
- 
-window.onload = init; 
+async function startVoiceRecognition() {
+/* requires
+    recognizer-processor.js
+    https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz
+
+    from https://github.com/ccoreilly/vosk-browser/tree/master/examples/modern-vanilla
+*/
+
+    const SpeechRecognition = window.webkitSpeechRecognition; 
+     (SpeechRecognition) ? console.log('should switch back to native WebSpeech API from speech branch') : console.log('polyfilling WebSpeech API')
+
+    const channel = new MessageChannel();
+    // const model = await Vosk.createModel('model.tar.gz');
+    const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz');
+    model.registerPort(channel.port1);
+
+    const sampleRate = 48000;
+    
+    const recognizer = new model.KaldiRecognizer(sampleRate);
+    recognizer.setWords(true);
+
+    recognizer.on("result", (message) => {
+        const result = message.result;
+        if (result) console.log(JSON.stringify(result, null, 2));
+        
+    });
+    recognizer.on("partialresult", (message) => {
+        const partial = message.result.partial;
+
+	if (partial) console.log(partial)
+    });
+    
+    const mediaStream = await navigator.mediaDevices.getUserMedia({
+        video: false,
+        audio: {
+            echoCancellation: true,
+            noiseSuppression: true,
+            channelCount: 1,
+            sampleRate
+        },
+    });
+    
+    const audioContext = new AudioContext();
+    await audioContext.audioWorklet.addModule('recognizer-processor.js')
+
+    const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 1 });
+    recognizerProcessor.port.postMessage({action: 'init', recognizerId: recognizer.id}, [ channel.port2 ])
+    recognizerProcessor.connect(audioContext.destination);
+    
+    const source = audioContext.createMediaStreamSource(mediaStream);
+    source.connect(recognizerProcessor);
+}

 </script>
 <div id="observablehq-key">
@ -2378,7 +2395,6 @@ window.onload = init;
      </a-assets>

      <a-entity id="rig">
-		<a-sound src="../content/street-crowd-ambience.mp3" autoplay=true loop=true volume=0.2></a-sound><!-- warning skipped on Quest, does autoplay there -->
 		<a-sound id="snapping-sound" src="url(../content/magnets_snap.mp3)"></a-sound>
 		<a-entity id="player" networked="template:#avatar-template;attachTemplateToLocal:false;" 
 			hud camera look-controls wasd-controls waistattach="target: .movebypinch" position="0 1.6 0"></a-entity>
--- a/recognizer-processor.js
+++ b/recognizer-processor.js
@ -0,0 +1,39 @@
+class RecognizerAudioProcessor extends AudioWorkletProcessor {
+    constructor(options) {
+        super(options);
+        
+        this.port.onmessage = this._processMessage.bind(this);
+    }
+    
+    _processMessage(event) {
+        // console.debug(`Received event ${JSON.stringify(event.data, null, 2)}`);
+        if (event.data.action === "init") {
+            this._recognizerId = event.data.recognizerId;
+            this._recognizerPort = event.ports[0];
+        }
+    }
+    
+    process(inputs, outputs, parameters) {
+        const data = inputs[0][0];
+        if (this._recognizerPort && data) {
+            // AudioBuffer samples are represented as floating point numbers between -1.0 and 1.0 whilst
+            // Kaldi expects them to be between -32768 and 32767 (the range of a signed int16)
+            const audioArray = data.map((value) => value * 0x8000);
+        
+            this._recognizerPort.postMessage(
+                {
+                    action: "audioChunk",
+                    data: audioArray,
+                    recognizerId: this._recognizerId,
+                    sampleRate, // Part of AudioWorkletGlobalScope
+                },
+                {
+                    transfer: [audioArray.buffer],
+                }
+            );
+        }
+        return true;
+    }
+}
+
+registerProcessor('recognizer-processor', RecognizerAudioProcessor)