working example (add cylinder, clone, delete... rest is trickier)

stt
Fabien Benetou 6 months ago
parent a6588256fe
commit 5de00a7da6
  1. 94
      index.html
  2. 39
      recognizer-processor.js

@ -23,7 +23,7 @@
<!-- still experimenting, see webdav.html --> <!-- still experimenting, see webdav.html -->
<script src='dependencies/webdav.js'></script> <script src='dependencies/webdav.js'></script>
<script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.5/dist/vosk.js"></script> <script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.8/dist/vosk.js"></script>
<!-- replacing with local copies as CDNs are like unpkg tend to be slow <!-- replacing with local copies as CDNs are like unpkg tend to be slow
<script type="module" src="https://unpkg.com/immers-client/dist/destination.bundle.js"></script> <script type="module" src="https://unpkg.com/immers-client/dist/destination.bundle.js"></script>
@ -2165,9 +2165,10 @@ function addAllPrimitives(){
} }
function startExperience(){ function startExperience(){
startVoiceRecognition()
if (AFRAME.utils.device.checkHeadsetConnected()) if (AFRAME.utils.device.checkHeadsetConnected())
AFRAME.scenes[0].enterVR(); AFRAME.scenes[0].enterVR();
document.querySelector("#snapping-sound").components.sound.playSound(); //document.querySelector("#snapping-sound").components.sound.playSound();
document.querySelector("#mainbutton").style.display = "none" document.querySelector("#mainbutton").style.display = "none"
} }
@ -2318,42 +2319,58 @@ consider pick then apply, i.e changeColorLastId() but for next Id
*/ */
async function init() { async function startVoiceRecognition() {
const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/../content/vosk-model-small-en-us-0.15.zip'); /* requires
//const model = await Vosk.createModel('model.tar.gz'); recognizer-processor.js
https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz
const recognizer = new model.KaldiRecognizer();
recognizer.on("result", (message) => { from https://github.com/ccoreilly/vosk-browser/tree/master/examples/modern-vanilla
console.log(`Result: ${message.result.text}`); */
});
recognizer.on("partialresult", (message) => { const SpeechRecognition = window.webkitSpeechRecognition;
console.log(`Partial result: ${message.result.partial}`); (SpeechRecognition) ? console.log('should switch back to native WebSpeech API from speech branch') : console.log('polyfilling WebSpeech API')
});
const channel = new MessageChannel();
const mediaStream = await navigator.mediaDevices.getUserMedia({ // const model = await Vosk.createModel('model.tar.gz');
video: false, const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz');
audio: { model.registerPort(channel.port1);
echoCancellation: true,
noiseSuppression: true, const sampleRate = 48000;
channelCount: 1,
sampleRate: 16000 const recognizer = new model.KaldiRecognizer(sampleRate);
}, recognizer.setWords(true);
});
recognizer.on("result", (message) => {
const audioContext = new AudioContext(); const result = message.result;
const recognizerNode = audioContext.createScriptProcessor(4096, 1, 1) if (result) console.log(JSON.stringify(result, null, 2));
recognizerNode.onaudioprocess = (event) => {
try { });
recognizer.acceptWaveform(event.inputBuffer) recognizer.on("partialresult", (message) => {
} catch (error) { const partial = message.result.partial;
console.error('acceptWaveform failed', error)
} if (partial) console.log(partial)
} });
const source = audioContext.createMediaStreamSource(mediaStream);
source.connect(recognizerNode); const mediaStream = await navigator.mediaDevices.getUserMedia({
} video: false,
audio: {
window.onload = init; echoCancellation: true,
noiseSuppression: true,
channelCount: 1,
sampleRate
},
});
const audioContext = new AudioContext();
await audioContext.audioWorklet.addModule('recognizer-processor.js')
const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 1 });
recognizerProcessor.port.postMessage({action: 'init', recognizerId: recognizer.id}, [ channel.port2 ])
recognizerProcessor.connect(audioContext.destination);
const source = audioContext.createMediaStreamSource(mediaStream);
source.connect(recognizerProcessor);
}
</script> </script>
<div id="observablehq-key"> <div id="observablehq-key">
@ -2378,7 +2395,6 @@ window.onload = init;
</a-assets> </a-assets>
<a-entity id="rig"> <a-entity id="rig">
<a-sound src="../content/street-crowd-ambience.mp3" autoplay=true loop=true volume=0.2></a-sound><!-- warning skipped on Quest, does autoplay there -->
<a-sound id="snapping-sound" src="url(../content/magnets_snap.mp3)"></a-sound> <a-sound id="snapping-sound" src="url(../content/magnets_snap.mp3)"></a-sound>
<a-entity id="player" networked="template:#avatar-template;attachTemplateToLocal:false;" <a-entity id="player" networked="template:#avatar-template;attachTemplateToLocal:false;"
hud camera look-controls wasd-controls waistattach="target: .movebypinch" position="0 1.6 0"></a-entity> hud camera look-controls wasd-controls waistattach="target: .movebypinch" position="0 1.6 0"></a-entity>

@ -0,0 +1,39 @@
class RecognizerAudioProcessor extends AudioWorkletProcessor {
constructor(options) {
super(options);
this.port.onmessage = this._processMessage.bind(this);
}
_processMessage(event) {
// console.debug(`Received event ${JSON.stringify(event.data, null, 2)}`);
if (event.data.action === "init") {
this._recognizerId = event.data.recognizerId;
this._recognizerPort = event.ports[0];
}
}
process(inputs, outputs, parameters) {
const data = inputs[0][0];
if (this._recognizerPort && data) {
// AudioBuffer samples are represented as floating point numbers between -1.0 and 1.0 whilst
// Kaldi expects them to be between -32768 and 32767 (the range of a signed int16)
const audioArray = data.map((value) => value * 0x8000);
this._recognizerPort.postMessage(
{
action: "audioChunk",
data: audioArray,
recognizerId: this._recognizerId,
sampleRate, // Part of AudioWorkletGlobalScope
},
{
transfer: [audioArray.buffer],
}
);
}
return true;
}
}
registerProcessor('recognizer-processor', RecognizerAudioProcessor)
Loading…
Cancel
Save