visual representation

pseudoreamls-as-scaffoldings
Fabien Benetou 6 months ago
parent 825388b59e
commit 39886e9b57
  1. 494
      index.html
  2. 39
      recognizer-processor.js

@ -9,498 +9,15 @@
<script src='dependencies/webdav.js'></script>
<script src='jxr-core.js?1234'></script>
<script src='jxr-postitnote.js?13235'></script>
<script type="application/javascript" src="https://cdn.jsdelivr.net/npm/vosk-browser@0.0.8/dist/vosk.js"></script>
</head>
<body>
<script type="module">
import { Wllama } from 'https://github.ngxson.com/wllama/esm/index.js';
(async () => {
const ROOT_PATH = 'https://github.ngxson.com/wllama/examples/basic/'
const CONFIG_PATHS = {
'single-thread/wllama.js' : ROOT_PATH+'../../esm/single-thread/wllama.js',
'single-thread/wllama.wasm' : ROOT_PATH+'../../esm/single-thread/wllama.wasm',
'multi-thread/wllama.js' : ROOT_PATH+'../../esm/multi-thread/wllama.js',
'multi-thread/wllama.wasm' : ROOT_PATH+'../../esm/multi-thread/wllama.wasm',
'multi-thread/wllama.worker.mjs': ROOT_PATH+'../../esm/multi-thread/wllama.worker.mjs',
};
// Automatically switch between single-thread and multi-thread version based on browser support
// If you want to enforce single-thread, add { "n_threads": 1 } to LoadModelConfig
window.wllama = new Wllama(CONFIG_PATHS);await wllama.loadModelFromUrl(
//"https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf"
'https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q4_0.gguf'
);
})();
</script>
<script>
function noteFromLastSpeech(){
let text = addedContent.at(-1)?.getAttribute("value")
if (text) noteFromPrompt( text )
}
function noteFromPrompt(prompt){
let y = 1.5 + Math.random() - .5
wllama.createCompletion(prompt, {
nPredict: parseInt(10),
sampling: { temp: 0.5, top_k: 40, top_p: 0.9, },
onNewToken: (token, piece, currentText) => {
console.log( currentText );
},
}).then( finalText => addNewNoteAsPostItNote(finalText, '0.2 '+y+' -.5') )
}
//________________________________________________________________
const endpointDomainOrIP = '192.168.0.129' // note that if the certificate is NOT proper, then consider opening it first to accept it on device
// e.g https://hmd.link/?https://192.168.0.129:8443/
const endpoint = 'https://'+endpointDomainOrIP+':8443/' // unfortunately not public yet due to API usage on poly.pizza
function getModelFromKeyword(keyword){
fetch(endpoint+"search?keyword="+keyword)
.then(r=>r.json())
.then(r=>getPoly(r.results[0].Download.replace('.glb','').replace(/.*\//,'')))
}
function getPoly(hashid){
fetch(endpoint+'getpoly?id='+hashid)
// assumes it went well
setTimeout( _ => {
let el = document.createElement("a-entity")
el.setAttribute('gltf-model', endpoint+'static/'+hashid+'.glb')
el.setAttribute('position','0 1 -.5')
el.setAttribute('scale','.001 .001 .001')
el.setAttribute('target','true')
AFRAME.scenes[0].appendChild(el)
}, 1000)
}
//________________________________________________________________
// used for keywords like LAST / PREVIOUS / ALL
let addedContent = []
let speechCommands = []
let deletedContent = []
function getAllPrimitives(){
const other_primitives = ["camera", "cursor", "sky", "light", "sound", "videosphere"]
const other_primitives_with_param_needed = ["text", "gltf-model", "obj-model", "troika-text"]
return Object.getOwnPropertyNames(AFRAME.primitives.primitives)
// thanks to https://github.com/Utopiah/aframe-inVR-blocks-based-editor/blob/master/aframe-invr-inspect.js
.map( i => i.replace("a-",""))
.filter( i => other_primitives.indexOf(i) < 0 )
.filter( i => other_primitives_with_param_needed.indexOf(i) < 0 ) // temporarilty disabled
// .map( (i,j) => addPrimitive( i, ""+ j/7 + " 1.4 -0.5" ) )
.map( (i,j) => i )
} // adapted from https://git.benetou.fr/utopiah/text-code-xr-engine/src/commit/0e1f297ec0cd17b0356811dfa0ab55f1e2629e7c/index.html#L2101
// should test first
const SpeechRecognition = window.webkitSpeechRecognition;
//(SpeechRecognition) ? console.log('should switch back to native WebSpeech API from speech branch') : console.log('polyfilling WebSpeech API')
(SpeechRecognition) ? nativeSpeechRecognition( parseSpeech ) : startVoiceRecognition( parseSpeech )
function nativeSpeechRecognition(callbackOnComplete){
recognizer = new SpeechRecognition();
recognizer.interimResults = true;
recognizer.continuous = true;
// does not work recognizer.lang = 'fr-FR';
recognizer.lang = 'en-US';
recognizer.onresult = (event) => {
let result = event.results[event.resultIndex]
if (result.isFinal) {
console.log('You said: ' + result[0].transcript )
let speechContent = result[0].transcript
callbackOnComplete( speechContent )
}
}
}
// recognizer.start();
async function startVoiceRecognition( callbackOnComplete ) {
/* requires
recognizer-processor.js
https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz
from https://github.com/ccoreilly/vosk-browser/tree/master/examples/modern-vanilla
*/
const channel = new MessageChannel();
// const model = await Vosk.createModel('model.tar.gz');
const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-model-small-en-us-0.15.tar.gz');
// more models https://alphacephei.com/vosk/models including French
// e.g here const model = await Vosk.createModel('https://fabien.benetou.fr/pub/home/future_of_text_demo/engine/vosk-browser/vosk-browser/vosk-model-small-fr-pguyot-0.3.zip');
model.registerPort(channel.port1);
const sampleRate = 48000;
const recognizer = new model.KaldiRecognizer(sampleRate);
recognizer.setWords(true);
recognizer.on("result", (message) => {
const result = message.result;
if (result) console.log(JSON.stringify(result, null, 2));
callbackOnComplete( result.text )
});
recognizer.on("partialresult", (message) => {
const partial = message.result.partial;
if (partial) console.log(partial)
});
const mediaStream = await navigator.mediaDevices.getUserMedia({
video: false,
audio: {
echoCancellation: true,
noiseSuppression: true,
channelCount: 1,
sampleRate
},
});
const audioContext = new AudioContext();
await audioContext.audioWorklet.addModule('recognizer-processor.js')
const recognizerProcessor = new AudioWorkletNode(audioContext, 'recognizer-processor', { channelCount: 1, numberOfInputs: 1, numberOfOutputs: 1 });
recognizerProcessor.port.postMessage({action: 'init', recognizerId: recognizer.id}, [ channel.port2 ])
recognizerProcessor.connect(audioContext.destination);
const source = audioContext.createMediaStreamSource(mediaStream);
source.connect(recognizerProcessor);
}
const aframeprimitives = getAllPrimitives()
const speechactions = [ "add", "apply", "delete", "clone", "model", "undo" ]
const speechcustomcomponents = [ "target", "teleporter" ]
function parseSpeech( speechContent ) {
let latest = addedContent.at(-1)
let cmd_words = speechContent.split(" ").map( i => i.toLowerCase() )
let el
switch(cmd_words[0]) {
case speechactions[0]:
console.log("recognized proper command")
let primitive = cmd_words[1] // assuming fixed order for now
el = document.createElement("a-"+primitive)
el.setAttribute("target", "")
el.setAttribute("scale", ".1 .1 .1")
el.setAttribute("position", "0 1.5 -0.4")
addedContent.push(el)
AFRAME.scenes[0].appendChild( el )
speechCommands.push( speechContent )
break;
case speechactions[1] :
console.log("recognized apply command")
latest.setAttribute( cmd_words[1], cmd_words[2]) // assuming fixed order for now
// should preserve attribute before modification for undoing
speechCommands.push( speechContent )
break;
case speechactions[2] :
latest.flushToDOM(true)
deletedContent.push( latest.cloneNode(true) )
deleteTarget( latest )
speechCommands.push( speechContent )
addedContent.pop()
break;
case speechactions[3] :
latest.flushToDOM(true)
el = latest.cloneNode(true) // seems to preserve most component but somehow not rotation
// untested
if (cmd_words[1]) console.log('could clone',cmd_words[1],'n times instead')
// could optionally add a number of times
addedContent.push(el)
AFRAME.scenes[0].appendChild( el )
el.object3D.translateX(10) // due to scaling
speechCommands.push( speechContent )
break;
case speechactions[4] :
getModelFromKeyword( cmd_words[1] ) // requires the backend (proxy, LAN only for now, waiting for API clarification on 403)
speechCommands.push( speechContent )
break;
case speechactions[5] :
if ( speechCommands.at(-1) ){
let prev_cmd_words = speechCommands.at(-1).split(" ").map( i => i.toLowerCase() )
switch(prev_cmd_words[0]) {
case speechactions[0]:
case speechactions[3] :
case speechactions[4] :
console.log( "undoing", speechCommands.at(-1) )
deleteTarget( latest )
addedContent.pop()
break;
case speechactions[2] :
console.log( "undoing", speechCommands.at(-1) )
addedContent.push( deletedContent.at(-1) )
AFRAME.scenes[0].appendChild( deletedContent.at(-1) )
deletedContent.pop()
break;
default:
console.log( "can't undo", speechCommands.at(-1) )
// note that not all commands might be undo-able
}
}
// speechCommands.pop() not needed as, for now, undo is not part of the command stack
// to consider for redo
break;
default:
if (speechContent) addedContent.push( addNewNoteAsPostItNote(speechContent, "0 1.2 -.5") )
// could become jxr code proper later, also allowing to re-execute a command again
}
}
//------------------------------------------------------------------------------------------------------------------------------
var forceXaxis
// setInterval( _ => console.log(forceXaxis), 1000)
var translatingTargets = false
var clearRot
function toggleTranslateTargets(){
translatingTargets = !translatingTargets
let scene = AFRAME.scenes[0].object3D
if (translatingTargets){
let anchor = new THREE.Object3D()
let latest = selectedElements[selectedElements.length-1].element
latest.object3D.add( anchor )
// also inherits rotation, could try cancel it as the opposite of latest rotation
// might be easier to copy the position only every few ms instead
anchor.position.sub( latest.object3D.position )
//targets.map( t => anchor.attach(t.object3D) )
// should attach all BUT the current moving entity!
Array.from(document.querySelectorAll('.mab')).map( t => anchor.attach(t.object3D) )
// they don't move... despite
} else {
clearInterval( clearRot )
Array.from(document.querySelectorAll('.mab')).map( t => scene.attach(t.object3D) )
//targets.map( t => scene.attach(t.object3D) )
// could delete anchor, cleaner
}
}
var attachToPlayer = false
function toggleAttachToSelf(){
attachToPlayer = !attachToPlayer
attachToPlayer ? parent=document.querySelector("#player") : parent=AFRAME.scenes[0]
targets.map( t => parent.object3D.attach(t.object3D) )
}
function checkIntersection(latest, nearby){
//let latest = selectedElements[selectedElements.length-1].element
//let nearby = getClosestTargetElements( latest.getAttribute('position') )
// https://threejs.org/docs/?q=box#api/en/math/Box3.containsBox
// https://threejs.org/docs/?q=box#api/en/math/Box3.expandByObject
let a = new THREE.Box3().expandByObject( latest.object3D ) // consider mesh.geometry.computeBoundingBox() first
let b = new THREE.Box3().expandByObject( nearby.object3D )
console.log(a,b, a.containsBox(b))
// testable as checkIntersection( document.querySelector("[color='yellow']"), document.querySelector("[color='purple']") )
// <a-box scale=".1 .1 .1" position=".5 .8 -.3" color="purple" ></a-box>
// <a-box scale=".2 .2 .2" position=".5 .8 -.3" color="yellow" ></a-box>
}
setTimeout( _ => {
let newPostIt = addNewNoteAsPostItNote("jxr console.log(222);", "0 1.2 -.5")
.setAttribute("onreleased", "grammarBasedSnap()")
let otherPostIt = addNewNoteAsPostItNote("jxr console.log(111);", "0 1.4 -.5")
.setAttribute("onreleased", "grammarBasedSnap()")
let postIt = addNewNoteAsPostItNote("hi this is a post-it note.", "0 1.6 -.5")
.setAttribute("onreleased", "runClosestJXR(); grammarBasedSnap()") // dunno how to share the event context back here...
// .setAttribute("onreleased", "snapNext()") // does NOT support multiple instances for now
// see https://aframe.io/docs/1.5.0/core/component.html#multiple
// maybe bind could help
//let cloneMe = addNewNote('jxr clone me from corner', '0 0 .1', '1 1 1', 'cmd')
// should rebind parent...
//setTimeout( _ => { _ => cloneMe.object3D.parent = postIt.object3D }, 1000 )
// should try object3D.attach() instead
//.addEventListener('loaded',
// entityIndexes( document.querySelector("[color='blue']").object3D.children[0] )
}, 1000 )
// e.g document.querySelector("[color='blue']").object3D.children[0]
function entityIndexes(mesh){ // needs a mesh with a geometry, not a group
// could also traverse
let gp = mesh.geometry.attributes.position;
let wPos = [];
for(let i = 0;i < gp.count; i++){
let p = new THREE.Vector3().fromBufferAttribute(gp, i); // set p from `position`
mesh.localToWorld(p); // p has wordl coords
wPos.push(p);
}
// many are duplicates, i.e a a cube will return 24 indexes (4 per 6 faces), not 8
//let l = [...new Set(wPos)].length; console.log( l )
[...new Set(wPos)].map( p => addNewNote("x", p))
console.log( [...new Set(wPos)].length )
// seems to add the duplicates again
// try to "de-dup" via .distanceTo() below a threshold instead
}
function snapToGrid(gridSize=1){ // default as 1 decimeter
let latest = selectedElements[selectedElements.length-1].element
latest.setAttribute("rotation", "0 0 0")
let pos = latest.getAttribute("position")
pos.multiplyScalar(gridSize*10).round().divideScalar(gridSize*10)
latest.setAttribute("position", pos )
}
// deeper question, making the rules themselves manipulable? JXR?
// So the result of the grammar becomes manipulable, but could you make the rules of the grammar itself visual? Even manipulable?
// could start by visualizing examples first e.g https://writer.com/wp-content/uploads/2024/03/grammar-1.webp
function snapMAB(){
// multibase arithmetic blocks aka MAB cf https://en.wikipedia.org/wiki/Base_ten_block
let latest = selectedElements[selectedElements.length-1].element
let nearby = getClosestTargetElements( latest.getAttribute('position') )
let linked = []
if (nearby.length>0){
latest.setAttribute("rotation", AFRAME.utils.coordinates.stringify( nearby[0].el.getAttribute("rotation") ) )
latest.setAttribute("position", AFRAME.utils.coordinates.stringify( nearby[0].el.getAttribute("position") ) )
latest.object3D.translateX( 1/10 )
linked.push( latest )
linked.push( nearby[0].el )
let overlap = Array.from( document.querySelectorAll(".mab") ).filter( e => e.object3D.position.distanceTo( latest.object3D.position ) < 0.01 && e!=latest )
while (overlap.length > 0 ){
latest.object3D.translateX( 1/10 )
linked.push( overlap[0] )
overlap = Array.from( document.querySelectorAll(".mab") ).filter( e => e.object3D.position.distanceTo( latest.object3D.position ) < 0.01 && e!=latest )
}
// do something special if it becomes 10, e.g become a single line, removing the "ridges"
if (linked.length > 3)
linked.map( e => Array.from( e.querySelectorAll("a-box") ).setAttribute("color", "orange") )
// also need to go backward too to see if it's the latest added
}
}
function snapRightOf(){
let latest = selectedElements[selectedElements.length-1].element
let nearby = getClosestTargetElements( latest.getAttribute('position') )
if (nearby.length>0){
latest.setAttribute("rotation", AFRAME.utils.coordinates.stringify( nearby[0].el.getAttribute("rotation") ) )
latest.setAttribute("position", AFRAME.utils.coordinates.stringify( nearby[0].el.getAttribute("position") ) )
latest.object3D.translateX( 1/10 )
// somehow... works only the 2nd time, not the 1st?!
}
}
function grammarBasedSnap(){
// verify if snappable, e.g of same type (or not)
// e.g check if both have .getAttribute('value').match(prefix) or not
let latest = selectedElements[selectedElements.length-1].element
let nearby = getClosestTargetElements( latest.getAttribute('position') )
if (nearby.length>0){
let closest = nearby[0].el
let latestTypeJXR = latest.getAttribute('value').match(prefix)
let closestTypeJXR = latest.getAttribute('value').match(prefix)
latest.setAttribute("rotation", AFRAME.utils.coordinates.stringify( closest.getAttribute("rotation") ) )
latest.setAttribute("position", AFRAME.utils.coordinates.stringify( closest.getAttribute("position") ) )
if ( latestTypeJXR && closestTypeJXR )
latest.object3D.translateX( 1/10 ) // same JXR type, snap close
else
latest.object3D.translateX( 2/10 ) // different types, snap away
// somehow... works only the 2nd time, not the 1st?!
}
}
function cloneTarget(target){
let el = target.cloneNode(true)
if (!el.id)
el.id = "clone_" + crypto.randomUUID()
else
el.id += "_clone_" + crypto.randomUUID()
AFRAME.scenes[0].appendChild(el)
}
function deleteTarget(target){
targets = targets.filter( e => e != target)
target.remove()
}
function runClosestJXR(){
// ideally this would come from event details
let latest = selectedElements[selectedElements.length-1].element
let nearby = getClosestTargetElements( latest.getAttribute('position') )
// if (nearby.length>0){ interpretJXR( nearby[0].el.getAttribute("value") ) }
nearby.map( n => interpretJXR( n.el.getAttribute("value") ) )
}
function notesFromArray(data, generatorName="", field="title", offset=1, step=1/10, depth=-.5 ){
data.slice(0,maxItemsFromSources).map( (n,i) => {
addNewNote( n[field], "0 "+(offset+i*step)+" "+depth, ".1 .1 .1", null, generatorName )
.setAttribute("onreleased","spreadItemsFromCollection('getcsljson', 1.5)")
})
}
function spreadItemsFromCollection( generatorName, offset=1, step=1/10, depth=-.5 ){
getArrayFromClass(generatorName).sort((a,b)=>a.getAttribute('position').y-b.getAttribute('position').y).map( (n,i) => {
n.setAttribute('position', "0 "+(offset+i*step)+" "+depth)
n.setAttribute('rotation', "0 0 0") // could also be based on the average of all items, the first item, last one, etc
// see also snap-on-pinchended component
})
let items = getArrayFromClass(generatorName).sort((b,a)=>a.getAttribute('position').y-b.getAttribute('position').y).map( n => n.getAttribute('value') )
shareLiveEvent('modified list', items)
}
AFRAME.registerComponent('onemptypinch', { // changed from ondrop to be coherent with event name
init: function(){
AFRAME.scenes[0].addEventListener('enter-vr', e => {
console.log('entered vr')
document.querySelector("[cursor]").setAttribute("visible", "true")
document.querySelector("[camera]").setAttribute("cursor", "")
})
},
// could support multi
events: {
emptypinch: function (e) {
// works with AFRAME.scenes[0].emit('emptypinch', {position:"0 0 0"})
let code = this.el.getAttribute('onemptypinch')
// if multi, should also look for onreleased__ not just onreleased
try {
eval( code ) // should be jxr too e.g if (txt.match(prefix)) interpretJXR(txt)
} catch (error) {
console.error(`Evaluation failed with ${error}`);
}
}
}
AFRAME.registerComponent('realm', {
// beyond the visual and self-editable aspect of https://git.benetou.fr/utopiah/text-code-xr-engine/src/branch/scaffolding generalizing it and make it, hopefully, safe.
})
function onHoveredTeleport(){
// iterate over targets
// see instead of teleportable https://aframe.io/docs/1.5.0/components/cursor.html#configuring-the-cursor-through-the-raycaster-component
Array.from( document.querySelectorAll("[teleporter]") ).map( target => {
if ( target.states.includes( "cursor-hovered" ) ){
target.setAttribute("material", "color", "magenta") // visited
document.getElementById('rig').setAttribute('position', target.getAttribute("position") )
}
})
}
AFRAME.registerComponent('teleporter', {
init: function(){
this.el.setAttribute("opacity", .5)
if (window.location.hash && document.querySelector(window.location.hash+"[teleporter]"))
document.getElementById('rig').setAttribute('position', document.querySelector(window.location.hash+"[teleporter]").getAttribute("position") )
},
events: {
mouseenter: function (e) { this.el.setAttribute("opacity", .8) },
mouseleave: function (e) { this.el.setAttribute("opacity", .5) },
click: function (e) {
let posTarget = new THREE.Vector3()
this.el.object3D.getWorldPosition( posTarget )
console.log( posTarget)
document.getElementById('rig').setAttribute('position', posTarget)
// seems to work, maybe inteference with others teleporters activated unknowingly, e.g in succession
}
// this.el.getAttribute("position") ) }
// does not get proper world position
// makes it compatible with mouse on desktop ... but also somehow enable the wrist shortcut?!
}
});
AFRAME.registerComponent('scaffolding', {
init: function(){
console.log(this.el.innerHTML)
@ -568,9 +85,10 @@ setTimeout( _ => {
<a-troika-text anchor=left value="jxr location.reload()" target position=" -0.3 1.30 0" rotation="0 40 0" scale="0.1 0.1 0.1"></a-troika-text>
<a-troika-text anchor=left value="jxr makeAnchorsVisibleOnTargets()" target position=" -0.3 1.20 0" rotation="0 40 0" scale="0.1 0.1 0.1"></a-troika-text>
<a-troika-text anchor=left value="jxr noteFromPrompt( 'hi said the penguin' )" target position=" -0.3 1.45 0" rotation="0 40 0" scale="0.1 0.1 0.1"></a-troika-text>
<a-troika-text anchor=left value="jxr noteFromLastSpeech()" target position=" -0.3 1.55 0" rotation="0 40 0" scale="0.1 0.1 0.1"></a-troika-text>
<a-box realm width=10 height=6 depth=10 wireframe="true" position="10 3 1"><a-troika-text scale="10 10 10" value="realm West"></a-troika-text></a-box>
<a-box realm width=10 height=6 depth=10 wireframe="true" position="-10 3 1"><a-troika-text scale="10 10 10" value="realm East"></a-troika-text></a-box>
<a-box realm width=10 height=6 depth=10 wireframe="true" position="0 3 11"><a-troika-text scale="10 10 10" value="realm North"></a-troika-text></a-box>
<a-box realm width=10 height=6 depth=10 wireframe="true" position="0 3 -11"><a-troika-text scale="10 10 10" value="realm South"></a-troika-text></a-box>
</a-scene>
</body>

@ -1,39 +0,0 @@
class RecognizerAudioProcessor extends AudioWorkletProcessor {
constructor(options) {
super(options);
this.port.onmessage = this._processMessage.bind(this);
}
_processMessage(event) {
// console.debug(`Received event ${JSON.stringify(event.data, null, 2)}`);
if (event.data.action === "init") {
this._recognizerId = event.data.recognizerId;
this._recognizerPort = event.ports[0];
}
}
process(inputs, outputs, parameters) {
const data = inputs[0][0];
if (this._recognizerPort && data) {
// AudioBuffer samples are represented as floating point numbers between -1.0 and 1.0 whilst
// Kaldi expects them to be between -32768 and 32767 (the range of a signed int16)
const audioArray = data.map((value) => value * 0x8000);
this._recognizerPort.postMessage(
{
action: "audioChunk",
data: audioArray,
recognizerId: this._recognizerId,
sampleRate, // Part of AudioWorkletGlobalScope
},
{
transfer: [audioArray.buffer],
}
);
}
return true;
}
}
registerProcessor('recognizer-processor', RecognizerAudioProcessor)
Loading…
Cancel
Save