GetUser Media and Web Audio API are coming fast – time to play around and combine some tutorial to something (not entirely) new. You can synthesize sounds in very few lines of code, playing a sequence of notes:

var song = "x-y-z--";

var AudioContex = window.AudioContext ||
window.webkitAudioContext ||
window.mozAudioContext;
var audio = new AudioContex();
var position = 0;
var scale = {
  x: 440,
  y: 880,
  z: 1500
};

setInterval(play, 200);

function createOscillator(freq) {
  var duration = 200;
  var osc = audio.createOscillator();

  osc.frequency.value = freq;
  osc.type = "square"; // "sine" works best but sounds worst
  osc.connect(audio.destination);
  osc.start(0);

  setTimeout(function() {
    osc.stop(0);
    osc.disconnect(audio.destination);
  }, duration);
}

function play() {
  var note = song.charAt(position++);
  var freq = scale[note];
  if (position >= song.length) {
    position = 0;
  }
  if (freq) {
    createOscillator(freq);
  }
}

These are no actual notes (I think) but some frequencies that work well on speakers while having a nice distance. You could choose something else. For a detailed introduction to Oscillators and all this audio generation stuff make sure to check out Keith Peters’ awesome Tutorial “Audio Synthesis in JavaScript“.

The next step is to recognize which note exactly is playing. This is a bit more complicated:

var AudioContext = window.AudioContext ||
window.webkitAudioContext ||
window.mozAudioContext;
var audioContext = new AudioContext();
var analyser = audioContext.createAnalyser();
var status = document.getElementById('status');
var notes = {
x: 20,
y: 41,
z: 70
};

// cropping frequencies to the needed area only
var begin = 18;
var end = 72;

function analyze() {
  // get the buffer
  requestAnimationFrame(analyze);
  var buffer = new Float32Array(analyser.frequencyBinCount);
  analyser.getFloatFrequencyData(buffer);

  // get maximum value and position
  var max = 0;
  var maxi = 0;
  var sum = 0;
  for (var i = begin; i < end; i++) {
    var normalized = buffer[i] + 128;
    if (normalized > max) {
      max = normalized;
      maxi = i;
    }
  sum += normalized;
  }

  // get average value
  var average = sum / (end - begin);
  var threshold = 2;
  for (var note in notes) {
    if (max > average*1.3 &&
      maxi < notes[note]+threshold &&
      maxi > notes[note]-threshold) {
      // we found a note!
      status.innerHTML = note;
      break;
    }
  }
}

var microphoneError = function(e) {
  alert('Microphone error!', e);
};

// try to access the microphone
var getUserMedia = navigator.getUserMedia ||
  navigator.webkitGetUserMedia ||
  navigator.mozGetUserMedia;
if (getUserMedia) {
  getUserMedia.call(navigator, {audio: true}, function(stream) {
    var source = audioContext.createMediaStreamSource(stream);
    source.connect(analyser);
    analyze();
  }, microphoneError);
} else {
  alert('Your browser does not support webkitGetUserMedia.');
}

I have honestly no idea where these numbers on the top come from – it could be the scaled frequency. I just logged the maximum value for each played note and used these values which worked fine. Putting all together you can play any pattern of the three notes on the one side and display these notes on the other side. With this concept you could transfer data e.g. from your desktop browser to your mobile browser (at least when using chrome mobile beta on android and without much ambient noise). Just encode zeros and ones using these three notes and you could transfer binary data. Well, the bit rate might be a bit low.

I have prepared a working example for recognizing the notes (with a small spectrum analyzer) and synthesizing the notes. This is only tested in the current release of Chrome (v 30) and Chrome mobile for Android.

A way better sounding native iPhone app  already uses this concept.