Audio Recording Action Button 🎤

After many attempts, I managed to insert an audio button action to transcribe voice to text. I share it here.

What it does is use Google Chrome’s free speech recognition API to transcribe your voice to text in the text box, after which it shows you the live transcription in the text box, and finally sends the transcription automatically, simulating an enter, here is the embed code, inject it into the footer section:

<script>
(function() {
  const MIC_CLASS = 'mic-button';

  // La función que crea e inyecta el botón de micrófono
  function injectMic() {
    document.querySelectorAll('div.flex.items-center.gap-x-2').forEach(container => {
      // Si ya existe, no duplicamos
      if (container.querySelector(`button.${MIC_CLASS}`)) return;

      const btn = document.createElement('button');
      btn.className = `${MIC_CLASS} outline-none w-8 h-8 flex items-center justify-center rounded-full duration-200 transition-colors ease-in-out`;
      btn.style.backgroundColor = 'rgba(0, 0, 0, 0.063)';
      btn.style.marginLeft = '4px';
      btn.innerHTML = `
        <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"
             class="h-6 w-6 shrink-0 duration-200 transition-colors ease-in-out"
             style="color: rgba(0, 0, 0, 0.5);">
          <path fill="currentColor"
                d="M12 14a3 3 0 0 0 3-3V6a3 3 0 0 0-6 0v5a3 3 0 0 0 3 3Zm5-3a5 5 0 0 1-10 0H5a7 7 0 0 0 14 0h-2ZM11 21h2v-2h-2v2Z"/>
        </svg>
      `;

      btn.addEventListener('click', () => {
        const ta = document.querySelector('textarea.resize-none');
        if (!ta || !('webkitSpeechRecognition' in window)) {
          return alert('Tu navegador no soporta SpeechRecognition');
        }

        let finalTranscript = '';
        const rec = new webkitSpeechRecognition();
        rec.lang = 'es-ES';
        rec.interimResults = true;
        rec.maxAlternatives = 1;
        rec.start();

        rec.onresult = e => {
          let interim = '';
          for (let i = e.resultIndex; i < e.results.length; i++) {
            const r = e.results[i];
            if (r.isFinal) finalTranscript += r[0].transcript + ' ';
            else interim += r[0].transcript;
          }
          const text = (finalTranscript + interim).trimEnd();
          // Actualiza el textarea en vivo
          const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value').set;
          setter.call(ta, text);
          ta.dispatchEvent(new Event('input', { bubbles: true }));
        };

        rec.onerror = err => console.error('SpeechRecognition Error:', err.error);

        rec.onend = () => {
          // Simula Enter para enviar automáticamente
          ta.dispatchEvent(new KeyboardEvent('keydown', {
            bubbles: true, cancelable: true,
            key: 'Enter', code: 'Enter', which: 13, keyCode: 13
          }));
        };
      });

      container.appendChild(btn);
    });
  }

  // Inyección inicial
  injectMic();

  // Intentamos observar el contenedor principal del chat para reinyectar si cambia
  const chatRoot = document.querySelector('div.fixed.flex.w-full.flex-col');
  const target = chatRoot || document.body;
  new MutationObserver(injectMic)
    .observe(target, { childList: true, subtree: true });
})();
</script>

Try it and tell me how it is, @nathaniel @admin_mike Is there any possibility of adding it natively?

~ all glory belongs to God

13 Likes

Wow, this is really cool – I’m assuming this would only work if the end user were using Chrome as their browser?

1 Like

Hello, very good observation. In that case, you can change the use of the Chrome API for Whisper or Deepgram, with this it will work in any browser.

1 Like

@bruno12345 this is pretty cool. Thank you so much for sharing this.

Great work Bruno. I use a slightly different approach but yours is simpler… it does make my PAs more functional.

It looks super awesome! Yet, I’ve tried to paste the code in the Footer Section under Settings tab but it doesn’t seem to work. I’m not an IT guy :slight_smile: Maybe I pasted in the wrong are? Where this code should go?

I’m using my pickaxes embedded to coaching platfrom. Maybe this scrip is only for studios? :thinking:

Spot - on… This script area is only for Studios.

Bruno, Thank you so much! This works like a charm. I did convert mine to English. :slight_smile:

For anyone wanting to use this you can follow these steps:

  1. Click on the studio where you want to apply the voice functionality.
  2. Click on Settings and scroll down to Footer
  3. Copy in the code (you can use this one for English)
<script>
(function () {
  const MIC_CLASS = 'mic-button';

  // Function that creates and injects the microphone button
  function injectMic() {
    document.querySelectorAll('div.flex.items-center.gap-x-2').forEach(container => {
      // Avoid duplicating if the button already exists
      if (container.querySelector(`button.${MIC_CLASS}`)) return;

      const btn = document.createElement('button');
      btn.className = `${MIC_CLASS} outline-none w-8 h-8 flex items-center justify-center rounded-full duration-200 transition-colors ease-in-out`;
      btn.style.backgroundColor = 'rgba(0, 0, 0, 0.063)';
      btn.style.marginLeft = '4px';
      btn.innerHTML = `
        <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"
             class="h-6 w-6 shrink-0 duration-200 transition-colors ease-in-out"
             style="color: rgba(0, 0, 0, 0.5);">
          <path fill="currentColor"
                d="M12 14a3 3 0 0 0 3-3V6a3 3 0 0 0-6 0v5a3 3 0 0 0 3 3Zm5-3a5 5 0 0 1-10 0H5a7 7 0 0 0 14 0h-2ZM11 21h2v-2h-2v2Z"/>
        </svg>
      `;

      btn.addEventListener('click', () => {
        const ta = document.querySelector('textarea.resize-none');
        if (!ta || !('webkitSpeechRecognition' in window)) {
          return alert('Your browser does not support SpeechRecognition.');
        }

        let finalTranscript = '';
        const rec = new webkitSpeechRecognition();
        rec.lang = 'en-US';            // main change: recognize English
        rec.interimResults = true;
        rec.maxAlternatives = 1;
        rec.start();

        rec.onresult = e => {
          let interim = '';
          for (let i = e.resultIndex; i < e.results.length; i++) {
            const r = e.results[i];
            if (r.isFinal) finalTranscript += r[0].transcript + ' ';
            else interim += r[0].transcript;
          }
          const text = (finalTranscript + interim).trimEnd();

          // Live‑update the textarea
          const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value').set;
          setter.call(ta, text);
          ta.dispatchEvent(new Event('input', { bubbles: true }));
        };

        rec.onerror = err => console.error('SpeechRecognition error:', err.error);

        rec.onend = () => {
          // Simulate Enter key to send automatically
          ta.dispatchEvent(new KeyboardEvent('keydown', {
            bubbles: true,
            cancelable: true,
            key: 'Enter',
            code: 'Enter',
            which: 13,
            keyCode: 13
          }));
        };
      });

      container.appendChild(btn);
    });
  }

  // Initial injection
  injectMic();

  // Observe the chat root so we can re‑inject if the DOM changes
  const chatRoot = document.querySelector('div.fixed.flex.w-full.flex-col');
  const target = chatRoot || document.body;
  new MutationObserver(injectMic).observe(target, { childList: true, subtree: true });
})();
</script>
  1. It autosaves and is ready to use.
  2. Open one of the pickaxes in that studio and click the mic to start. You will need to grant permission the first time you use it.
3 Likes