Compare commits

..

No commits in common. '89ed593d2025a10b085a948b045c11f49e4089a9' and '81a028b706002f205fb7be8a3362e237ec424dd6' have entirely different histories.

  1. 120
      index.html
  2. 39
      recognizer-processor.js

@ -9,7 +9,6 @@
<script src='dependencies/webdav.js'></script>
<script src='jxr-core.js?1234'></script>
<script src='jxr-postitnote.js?13235'></script>
<script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.8/dist/vosk.js"></script>
</head>
<body>
@ -41,8 +40,6 @@ function getPoly(hashid){
//________________________________________________________________
// used for keywords like LAST / PREVIOUS / ALL
let addedContent = []
let speechCommands = []
let deletedContent = []
function getAllPrimitives(){
const other_primitives = ["camera", "cursor", "sky", "light", "sound", "videosphere"]
@ -56,88 +53,25 @@ function getAllPrimitives(){
.map( (i,j) => i )
} // adapted from https://git.benetou.fr/utopiah/text-code-xr-engine/src/commit/0e1f297ec0cd17b0356811dfa0ab55f1e2629e7c/index.html#L2101
// should test first
const SpeechRecognition = window.webkitSpeechRecognition;
//(SpeechRecognition) ? console.log('should switch back to native WebSpeech API from speech branch') : console.log('polyfilling WebSpeech API')
(SpeechRecognition) ? nativeSpeechRecognition( parseSpeech ) : startVoiceRecognition( parseSpeech )
function nativeSpeechRecognition(callbackOnComplete){
recognizer = new SpeechRecognition();
recognizer.interimResults = true;
recognizer.continuous = true;
// does not work recognizer.lang = 'fr-FR';
recognizer.lang = 'en-US';
recognizer.onresult = (event) => {
let result = event.results[event.resultIndex]
if (result.isFinal) {
console.log('You said: ' + result[0].transcript )
let speechContent = result[0].transcript
callbackOnComplete( speechContent )
}
}
}
// recognizer.start();
async function startVoiceRecognition( callbackOnComplete ) {
/* requires
recognizer-processor.js
https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz
from https://github.com/ccoreilly/vosk-browser/tree/master/examples/modern-vanilla
*/
const channel = new MessageChannel();
// const model = await Vosk.createModel('model.tar.gz');
const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz');
model.registerPort(channel.port1);
const sampleRate = 48000;
const recognizer = new model.KaldiRecognizer(sampleRate);
recognizer.setWords(true);
recognizer.on("result", (message) => {
const result = message.result;
if (result) console.log(JSON.stringify(result, null, 2));
callbackOnComplete( result.text )
});
recognizer.on("partialresult", (message) => {
const partial = message.result.partial;
if (partial) console.log(partial)
});
const mediaStream = await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1,
sampleRate
},
});
const audioContext = new AudioContext();
await audioContext.audioWorklet.addModule('recognizer-processor.js')
const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 1 });
recognizerProcessor.port.postMessage({action: 'init', recognizerId: recognizer.id}, [ channel.port2 ])
recognizerProcessor.connect(audioContext.destination);
const source = audioContext.createMediaStreamSource(mediaStream);
source.connect(recognizerProcessor);
}
recognizer = new SpeechRecognition();
recognizer.interimResults = true;
recognizer.continuous = true;
// does not work recognizer.lang = 'fr-FR';
recognizer.lang = 'en-US';
const aframeprimitives = getAllPrimitives()
const speechactions = [ "add", "apply", "delete", "clone", "model", "undo" ]
const speechcustomcomponents = [ "target", "teleporter" ]
function parseSpeech( speechContent ) {
let speechCommands = []
recognizer.onresult = (event) => {
let result = event.results[event.resultIndex]
if (result.isFinal) {
console.log('You said: ' + result[0].transcript )
let speechContent = result[0].transcript
let latest = addedContent.at(-1)
let cmd_words = speechContent.split(" ").map( i => i.toLowerCase() )
let el
@ -157,19 +91,21 @@ function parseSpeech( speechContent ) {
case speechactions[1] :
console.log("recognized apply command")
latest.setAttribute( cmd_words[1], cmd_words[2]) // assuming fixed order for now
// should preserve attribute before modification for undoing
speechCommands.push( speechContent )
break;
case speechactions[2] :
latest.flushToDOM(true)
deletedContent.push( latest.cloneNode(true) )
deleteTarget( latest )
speechCommands.push( speechContent )
addedContent.pop()
break;
case speechactions[3] :
latest.flushToDOM(true)
el = latest.cloneNode(true) // seems to preserve most component but somehow not rotation
el = latest.cloneNode(true) // does not work properly, losing some attributes, in particular scale can be problematic
//["scale", "position", "rotation", "wireframe", "target", "material"].map( prop => el.setAttribute(prop, latest.getAttribute(prop) ) )
//["scale", "position", "rotation", "target" ].map( prop => el.setAttribute(prop, latest.getAttribute(prop) ) )
el.setAttribute("scale", latest.getAttribute("scale") )
el.setAttribute("position", latest.getAttribute("position") )
el.setAttribute("rotation", latest.getAttribute("rotation") )
el.setAttribute("target", latest.getAttribute("target") )
// untested
if (cmd_words[1]) console.log('could clone',cmd_words[1],'n times instead')
// could optionally add a number of times
@ -187,33 +123,23 @@ function parseSpeech( speechContent ) {
let prev_cmd_words = speechCommands.at(-1).split(" ").map( i => i.toLowerCase() )
switch(prev_cmd_words[0]) {
case speechactions[0]:
case speechactions[3] :
case speechactions[4] :
console.log( "undoing", speechCommands.at(-1) )
deleteTarget( latest )
addedContent.pop()
break;
case speechactions[2] :
console.log( "undoing", speechCommands.at(-1) )
addedContent.push( deletedContent.at(-1) )
AFRAME.scenes[0].appendChild( deletedContent.at(-1) )
deletedContent.pop()
break;
default:
console.log( "can't undo", speechCommands.at(-1) )
// note that not all commands might be undo-able
}
}
// speechCommands.pop() not needed as, for now, undo is not part of the command stack
// to consider for redo
break;
default:
addedContent.push( addNewNoteAsPostItNote(speechContent, "0 1.2 -.5") )
// could become jxr code proper later, also allowing to re-execute a command again
}
}
}
//------------------------------------------------------------------------------------------------------------------------------
recognizer.start();
var forceXaxis
// setInterval( _ => console.log(forceXaxis), 1000)

@ -1,39 +0,0 @@
class RecognizerAudioProcessor extends AudioWorkletProcessor {
constructor(options) {
super(options);
this.port.onmessage = this._processMessage.bind(this);
}
_processMessage(event) {
// console.debug(`Received event ${JSON.stringify(event.data, null, 2)}`);
if (event.data.action === "init") {
this._recognizerId = event.data.recognizerId;
this._recognizerPort = event.ports[0];
}
}
process(inputs, outputs, parameters) {
const data = inputs[0][0];
if (this._recognizerPort && data) {
// AudioBuffer samples are represented as floating point numbers between -1.0 and 1.0 whilst
// Kaldi expects them to be between -32768 and 32767 (the range of a signed int16)
const audioArray = data.map((value) => value * 0x8000);
this._recognizerPort.postMessage(
{
action: "audioChunk",
data: audioArray,
recognizerId: this._recognizerId,
sampleRate, // Part of AudioWorkletGlobalScope
},
{
transfer: [audioArray.buffer],
}
);
}
return true;
}
}
registerProcessor('recognizer-processor', RecognizerAudioProcessor)
Loading…
Cancel
Save