From 5029b9b36e9357fcffc247bbed394ae805bc120b Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 01:33:11 +0300 Subject: [PATCH 1/7] remodel audio stream Completely changed the file. Made the audio to be a callback. Took only the first bin as a stream of scalars to feed into an OPF model. This opposed to taking the whole FFT vector and feeding directly into a temporal pooler. Added anomaly and likelihood computations, and plotted them. --- audiostream/audiostream_tp.py | 437 ++++++++++++++-------------------- audiostream/model_params.py | 224 +++++++++++++++++ audiostream/model_params.pyc | Bin 0 -> 1807 bytes 3 files changed, 403 insertions(+), 258 deletions(-) create mode 100644 audiostream/model_params.py create mode 100644 audiostream/model_params.pyc diff --git a/audiostream/audiostream_tp.py b/audiostream/audiostream_tp.py index 6d8a73f..72989af 100755 --- a/audiostream/audiostream_tp.py +++ b/audiostream/audiostream_tp.py @@ -32,266 +32,187 @@ """ import numpy import pyaudio -import matplotlib.pyplot as plt -from nupic.encoders.sparse_pass_through_encoder import SparsePassThroughEncoder -from nupic.research.TP10X2 import TP10X2 as TP - - - -class Visualizations: - - def calcAnomaly(self, actual, predicted): - """ - Calculates the anomaly of two SDRs - - Uses the equation presented on the wiki: - https://github.com/numenta/nupic/wiki/Anomaly-Score-Memo - - To put this in terms of the temporal pooler: - A is the actual input array at a given timestep - P is the predicted array that was produced from the previous timestep(s) - [A - (A && P)] / [A] - Rephrasing as questions: - What bits are on in A that are not on in P? - How does that compare to total on bits in A? - - Outputs 0 is there's no difference between P and A. - Outputs 1 if P and A are totally distinct. - - Not a perfect metric - it doesn't credit proximity - Next step: combine with a metric for a spatial pooler - """ - combined = numpy.logical_and(actual, predicted) - delta = numpy.logical_xor(actual,combined) - delta_score = sum(delta) - actual_score = float(sum(actual)) - return delta_score / actual_score - - - def compareArray(self, actual, predicted): - """ - Produce an array that compares the actual & predicted - - 'A' - actual - 'P' - predicted - 'E' - expected (both actual & predicted - ' ' - neither an input nor predicted - """ - compare = [] - for i in range(actual.size): - if actual[i] and predicted[i]: - compare.append('E') - elif actual[i]: - compare.append('A') - elif predicted[i]: - compare.append('P') - else: - compare.append(' ') - return compare - - - def hashtagAnomaly(self, anomaly): - """ - Basic printout method to visualize the anomaly score (scale: 1 - 50 #'s) - """ - hashcount = '#' - for i in range(int(anomaly / 0.02)): - hashcount += '#' - for j in range(int((1 - anomaly) / 0.02)): - hashcount += '.' - return hashcount +import matplotlib +matplotlib.use("TkAgg") +import matplotlib.pyplot as plt +from collections import deque +from nupic.data.inference_shifter import InferenceShifter +from nupic.frameworks.opf.modelfactory import ModelFactory +from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood +import model_params + +WINDOW = 60 + +class AudioPrediction: + + def __init__(self): + + """ + Setup the plot, interactive mode on, title, etc. + Rescale the y-axis + """ + plt.ion() + fig = plt.figure() + plt.title('Audio Stream example') + plt.xlabel('Time') + plt.ylabel('Frequency Level [dB]') + yLimit = 200 + plt.ylim(0, yLimit) + + + """ + Create model, set predicted field, and likelihood + """ + model = ModelFactory.create(model_params.MODEL_PARAMS) + model.enableInference({'predictedField' : 'binAmplitude'}) + + likelihoods = AnomalyLikelihood() + + shifter = InferenceShifter() + + actHistory = deque([0.0] * WINDOW, maxlen = 60) + predHistory = deque([0.0] * WINDOW, maxlen = 60) + anomHistory = deque([0.0] * WINDOW, maxlen = 60) + likeHistory = deque([0.0] * WINDOW, maxlen = 60) + + actline, = plt.plot(range(WINDOW), actHistory) + predline, = plt.plot(range(WINDOW), predHistory) + anomline, = plt.plot(range(WINDOW), anomHistory) + likeline, = plt.plot(range(WINDOW), likeHistory) + + """ + Instance of the class to stream audio + """ + audio = AudioStream() + while audio.start==False:1 + + + while True: + + + inputLevel = audio.audioFFT[1] + + # Clip input + maxLevel = model_params.MODEL_PARAMS['modelParams']['sensorParams']['encoders']['binAmplitude']['maxval'] + if inputLevel > maxLevel: + inputLevel = maxLevel + + modelInput = {'binAmplitude' : inputLevel} + result = shifter.shift(model.run(modelInput)) + + inference = result.inferences['multiStepBestPredictions'][5] + anomaly = result.inferences['anomalyScore'] + likelihood = likelihoods.anomalyProbability(inputLevel, anomaly) + + if anomaly is not None: + actHistory .append(result.rawInput['binAmplitude']) + predHistory.append(inference) + anomHistory.append(anomaly * yLimit/2) + likeHistory.append(likelihood * yLimit/2) + + + actline .set_ydata(actHistory) + predline.set_ydata(predHistory) + anomline.set_ydata(anomHistory) + likeline.set_ydata(likeHistory) + + plt.draw() + plt.legend(('actual','predicted', 'anomaly', 'likelihood')) class AudioStream: - def __init__(self): - """ - Instantiate temporal pooler, encoder, audio sampler, filter, & freq plot - """ - self.vis = Visualizations() - - """ - The number of columns in the input and therefore the TP - 2**9 = 512 - Trial and error pulled that out - numCols should be tested during benchmarking - """ - self.numCols = 2**9 - sparsity = 0.10 - self.numInput = int(self.numCols * sparsity) - - """ - Create a bit map encoder - - From the encoder's __init__ method: - 1st arg: the total bits in input - 2nd arg: the number of bits used to encode each input bit - """ - self.e = SparsePassThroughEncoder(self.numCols, 1) - - """ - Sampling details - rate: The sampling rate in Hz of my soundcard - buffersize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) - secToRecord: The length of each sampling - buffersToRecord: how many multiples of buffers are we recording? - """ - rate=44100 - secToRecord=.1 - self.buffersize=2**12 - self.buffersToRecord=int(rate*secToRecord/self.buffersize) - if not self.buffersToRecord: - self.buffersToRecord=1 - - """ - Filters in Hertz - highHertz: lower limit of the bandpass filter, in Hertz - lowHertz: upper limit of the bandpass filter, in Hertz - max lowHertz = (buffersize / 2 - 1) * rate / buffersize - """ - highHertz = 500 - lowHertz = 10000 - - """ - Convert filters from Hertz to bins - highpass: convert the highHertz into a bin for the FFT - lowpass: convert the lowHertz into a bin for the FFt - NOTES: - highpass is at least the 1st bin since most mics only pick up >=20Hz - lowpass is no higher than buffersize/2 - 1 (highest array index) - passband needs to be wider than size of numInput - not checking for that - """ - self.highpass = max(int(highHertz * self.buffersize / rate),1) - self.lowpass = min(int(lowHertz * self.buffersize / rate), self.buffersize/2 - 1) - - """ - The call to create the temporal pooler region - """ - self.tp = TP(numberOfCols=self.numCols, cellsPerColumn=4, - initialPerm=0.5, connectedPerm=0.5, - minThreshold=10, newSynapseCount=10, - permanenceInc=0.1, permanenceDec=0.07, - activationThreshold=8, - globalDecay=0.02, burnIn=2, - checkSynapseConsistency=False, - pamLength=100) - - """ - Creating the audio stream from our mic - """ - p = pyaudio.PyAudio() - self.inStream = p.open(format=pyaudio.paInt32,channels=1,rate=rate,input=True,frames_per_buffer=self.buffersize) - - """ - Setting up the array that will handle the timeseries of audio data from our input - """ - self.audio = numpy.empty((self.buffersToRecord*self.buffersize),dtype="uint32") - - """ - Print out the inputs - """ - print "Number of columns:\t" + str(self.numCols) - print "Max size of input:\t" + str(self.numInput) - print "Sampling rate (Hz):\t" + str(rate) - print "Passband filter (Hz):\t" + str(highHertz) + " - " + str(lowHertz) - print "Passband filter (bin):\t" + str(self.highpass) + " - " + str(self.lowpass) - print "Bin difference:\t\t" + str(self.lowpass - self.highpass) - print "Buffersize:\t\t" + str(self.buffersize) - - """ - Setup the plot - Use the bandpass filter frequency range as the x-axis - Rescale the y-axis - """ - plt.ion() - bin = range(self.highpass,self.lowpass) - xs = numpy.arange(len(bin))*rate/self.buffersize + highHertz - self.freqPlot = plt.plot(xs,xs)[0] - plt.ylim(0, 10**12) - - while True: - self.processAudio() - - - def processAudio (self): - """ - Sample audio, encode, send it to the TP - - Pulls the audio from the mic - Conditions that audio as an SDR - Computes a prediction via the TP - Update the visualizations - """ - - """ - Cycle through the multiples of the buffers we're sampling - Sample audio to store for each frame in buffersize - Mic voltage-level timeseries is saved as 32-bit binary - Convert that 32-bit binary into integers, and save to array for the FFT - """ - for i in range(self.buffersToRecord): - try: - audioString = self.inStream.read(self.buffersize) - except IOError: - print "Overflow error from 'audiostring = inStream.read(buffersize)'. Try decreasing buffersize." - quit() - self.audio[i*self.buffersize:(i + 1)*self.buffersize] = numpy.fromstring(audioString,dtype = "uint32") - - """ - Get int array of strength for each bin of frequencies via fast fourier transform - Get the indices of the strongest frequencies (the top 'numInput') - Scale the indices so that the frequencies fit to within numCols - Pick out the unique indices (we've reduced the mapping, so we likely have multiples) - Encode those indices into an SDR via the SparsePassThroughEncoder - Cast the SDR as a float for the TP - """ - ys = self.fft(self.audio, self.highpass, self.lowpass) - fs = numpy.sort(ys.argsort()[-self.numInput:]) - rfs = fs.astype(numpy.float32) / (self.lowpass - self.highpass) * self.numCols - ufs = numpy.unique(rfs) - actualInt = self.e.encode(ufs) - actual = actualInt.astype(numpy.float32) - - """ - Pass the SDR to the TP - Collect the prediction SDR from the TP - Pass the prediction & actual SDRS to the anomaly calculator & array comparer - Update the frequency plot - """ - self.tp.compute(actual, enableLearn = True, computeInfOutput = True) - predictedInt = self.tp.getPredictedState().max(axis=1) - compare = self.vis.compareArray(actualInt, predictedInt) - anomaly = self.vis.calcAnomaly(actualInt, predictedInt) - print "." . join(compare) - print self.vis.hashtagAnomaly(anomaly) - self.freqPlot.set_ydata(ys) - plt.show(block = False) - plt.draw() - - - def fft(self, audio, highpass, lowpass): - """ - Fast fourier transform conditioning - - Output: - 'output' contains the strength of each frequency in the audio signal - frequencies are marked by its position in 'output': - frequency = index * rate / buffesize - output.size = buffersize/2 - Method: - Use numpy's FFT (numpy.fft.fft) - Find the magnitude of the complex numbers returned (abs value) - Split the FFT array in half, because we have mirror frequencies - (they're the complex conjugates) - Use just the first half to apply the bandpass filter - - Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result - """ - left,right = numpy.split(numpy.abs(numpy.fft.fft(audio)),2) - output = left[highpass:lowpass] - return output - - - -audiostream = AudioStream() + def __init__(self): + + """ + Sampling details + rate: The sampling rate in Hz of my soundcard + buffersize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) + secToRecord: The length of each sampling + buffersToRecord: how many multiples of buffers are we recording? + """ + rate =44100 + self.bufferSize =2**12 + bitResolution = 16 + binSize = int(rate/self.bufferSize) + self.start = False + + + """ + Setting up the array that will handle the timeseries of audio data from our input + """ + if bitResolution == 8: + width = 1 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int8") + print "Using 8 bits" + if bitResolution == 16: + width = 2 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int16") + print "Using 16 bits" + if bitResolution == 32: + width = 4 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int32") + print "Using 32 bits" + + + """ + Creating the audio stream from our mic. This includes callback function for + non blocking mode. This means the callback executes everytime whenever it needs + new audio data (to play) and/or when there is new (recorded) audio data available. + Note that PyAudio calls the callback function in a separate thread. + """ + p = pyaudio.PyAudio() + + def callback(in_data, frame_count, time_info, status): + """ + Replaces processAudio() + """ + self.audioIn = numpy.fromstring(in_data, dtype = numpy.int16) + self.audioFFT = self.fft(self.audioIn) + # Get the frequency levels in dBs + self.audioFFT = 20*numpy.log10(self.audioFFT) + self.start = True + return (self.audioFFT, pyaudio.paContinue) + + self.inStream = p.open(format =p.get_format_from_width(width, unsigned = False), + channels =1, + rate =rate, + input =True, + frames_per_buffer= self.bufferSize, + stream_callback = callback) + + + """ + Print out the inputs + """ + print "Sampling rate (Hz):\t" + str(rate) + print "Bit Depth:\t\t" + str(bitResolution) + print "Buffersize:\t\t" + str(self.bufferSize) + + + + + + def fft(self, audio): + + """ + Fast Fourier Transform - + + Output: + 'output' - the transform of the audio input into frequency domain. + Contains the strength of each frequency in the audio signal + frequencies are marked by its position in 'output': + frequency = index * rate / buffesize + output.size = buffersize/2 + Use only first half of vector since the second is repeated due to + symmetry. + + Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result + """ + output = numpy.abs(numpy.fft.fft(audio)) + return output [0:int(self.bufferSize/2)] + + + +audiostream = AudioPrediction() diff --git a/audiostream/model_params.py b/audiostream/model_params.py new file mode 100644 index 0000000..a56b219 --- /dev/null +++ b/audiostream/model_params.py @@ -0,0 +1,224 @@ +# ---------------------------------------------------------------------- +# Numenta Platform for Intelligent Computing (NuPIC) +# Copyright (C) 2013, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- + +MODEL_PARAMS = { + # Type of model that the rest of these parameters apply to. + 'model': "CLA", + + # Version that specifies the format of the config. + 'version': 1, + + 'predictAheadTime': None, + + # Model parameter dictionary. + 'modelParams': { + # The type of inference that this model will perform + 'inferenceType': 'TemporalAnomaly', + + 'sensorParams': { + # Sensor diagnostic output verbosity control; + # if > 0: sensor region will print out on screen what it's sensing + # at each step 0: silent; >=1: some info; >=2: more info; + # >=3: even more info (see compute() in py/regions/RecordSensor.py) + 'verbosity' : 0, + + # CPU usage encoder. + 'encoders': { + 'binAmplitude': { + 'fieldname': u'binAmplitude', + 'n': 200, + 'name': u'binAmplitude', + 'type': 'ScalarEncoder', + 'minval': 0.0, + 'maxval': 200.0, + 'w': 21 + } + }, + + # A dictionary specifying the period for automatically-generated + # resets from a RecordSensor; + # + # None = disable automatically-generated resets (also disabled if + # all of the specified values evaluate to 0). + # Valid keys is the desired combination of the following: + # days, hours, minutes, seconds, milliseconds, microseconds, weeks + # + # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12), + 'sensorAutoReset' : None, + }, + + 'spEnable': True, + + 'spParams': { + # SP diagnostic output verbosity control; + # 0: silent; >=1: some info; >=2: more info; + 'spVerbosity' : 0, + + 'globalInhibition': 1, + + # Number of cell columns in the cortical region (same number for + # SP and TP) + # (see also tpNCellsPerCol) + 'columnCount': 2048, + + 'inputWidth': 0, + + # SP inhibition control (absolute value); + # Maximum number of active columns in the SP region's output (when + # there are more, the weaker ones are suppressed) + 'numActiveColumnsPerInhArea': 40, + + 'seed': 1956, + + # potentialPct + # What percent of the columns's receptive field is available + # for potential synapses. At initialization time, we will + # choose potentialPct * (2*potentialRadius+1)^2 + 'potentialPct': 0.5, + + # The default connected threshold. Any synapse whose + # permanence value is above the connected threshold is + # a "connected synapse", meaning it can contribute to the + # cell's firing. Typical value is 0.10. Cells whose activity + # level before inhibition falls below minDutyCycleBeforeInh + # will have their own internal synPermConnectedCell + # threshold set below this default value. + # (This concept applies to both SP and TP and so 'cells' + # is correct here as opposed to 'columns') + 'synPermConnected': 0.1, + + 'synPermActiveInc': 0.1, + + 'synPermInactiveDec': 0.01, + }, + + # Controls whether TP is enabled or disabled; + # TP is necessary for making temporal predictions, such as predicting + # the next inputs. Without TP, the model is only capable of + # reconstructing missing sensor inputs (via SP). + 'tpEnable' : True, + + 'tpParams': { + # TP diagnostic output verbosity control; + # 0: silent; [1..6]: increasing levels of verbosity + # (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py) + 'verbosity': 0, + + # Number of cell columns in the cortical region (same number for + # SP and TP) + # (see also tpNCellsPerCol) + 'columnCount': 2048, + + # The number of cells (i.e., states), allocated per column. + 'cellsPerColumn': 32, + + 'inputWidth': 2048, + + 'seed': 1960, + + # Temporal Pooler implementation selector (see _getTPClass in + # CLARegion.py). + 'temporalImp': 'cpp', + + # New Synapse formation count + # NOTE: If None, use spNumActivePerInhArea + # + # TODO: need better explanation + 'newSynapseCount': 20, + + # Maximum number of synapses per segment + # > 0 for fixed-size CLA + # -1 for non-fixed-size CLA + # + # TODO: for Ron: once the appropriate value is placed in TP + # constructor, see if we should eliminate this parameter from + # description.py. + 'maxSynapsesPerSegment': 32, + + # Maximum number of segments per cell + # > 0 for fixed-size CLA + # -1 for non-fixed-size CLA + # + # TODO: for Ron: once the appropriate value is placed in TP + # constructor, see if we should eliminate this parameter from + # description.py. + 'maxSegmentsPerCell': 128, + + # Initial Permanence + # TODO: need better explanation + 'initialPerm': 0.21, + + # Permanence Increment + 'permanenceInc': 0.1, + + # Permanence Decrement + # If set to None, will automatically default to tpPermanenceInc + # value. + 'permanenceDec' : 0.1, + + 'globalDecay': 0.0, + + 'maxAge': 0, + + # Minimum number of active synapses for a segment to be considered + # during search for the best-matching segments. + # None=use default + # Replaces: tpMinThreshold + 'minThreshold': 12, + + # Segment activation threshold. + # A segment is active if it has >= tpSegmentActivationThreshold + # connected synapses that are active due to infActiveState + # None=use default + # Replaces: tpActivationThreshold + 'activationThreshold': 16, + + 'outputType': 'normal', + + # "Pay Attention Mode" length. This tells the TP how many new + # elements to append to the end of a learned sequence at a time. + # Smaller values are better for datasets with short sequences, + # higher values are better for datasets with long sequences. + 'pamLength': 1, + }, + + 'clParams': { + # Classifier implementation selection. + 'implementation': 'cpp', + + 'regionName' : 'SDRClassifierRegion', + + # Classifier diagnostic output verbosity control; + # 0: silent; [1..6]: increasing levels of verbosity + 'verbosity' : 0, + + # This controls how fast the classifier learns/forgets. Higher values + # make it adapt faster and forget older patterns faster. + 'alpha': 0.0001, + + # This is set after the call to updateConfigFromSubConfig and is + # computed from the aggregationInfo and predictAheadTime. + 'steps': '5', + }, + + 'trainSPNetOnlyIfRequested': False, + }, +} diff --git a/audiostream/model_params.pyc b/audiostream/model_params.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3e3748f7949a16f9076b4c33ae1471cdb39a5a4 GIT binary patch literal 1807 zcmZWpOLrSJ6uy2WPU5C1ZCYq3P~L%(npe%M)S238F(i(UZRj~{dgPH}>!6W_ktU7H zh6TTX9jsZfV$Xs-hhNdMWeGokPa3Dq0nd>>-K+c7y{`Pdy7JqVf9}LI|J8uoSoZr8 zk*pFY;w5VGGVuyvpLms;a*6l?@kN{}#B0Qta4M0+X_@#6M0T-~I#De9g02iq#O2ju{a3z?28caC91bBw{vjBK`4seM4}hZ&3iuF!%sv9Z^mV|;#BTsv#J2$v@tXkT&<1o61WE@FJD_e6zYUm&^a=4U zs85N12G~W^P};+>N6jx$=zSV)_VG5Zf~~z9xdmWixKJoQ*#7MfMP{YZ?hMXpCb*1U zG!=}GWFp)em?M#-CSxknCShvkR>70{Kx9J4Vl+#I1cTf%HrN^)E8Q$V1!^pHl%z_! zoQrUMAcf+ZB|`r0HGBx#Rs9ZLuse*IV%d(45n~pv92`s_5`JzGkVt*V)SM@`3m!WH zPoBqhFniDaFMuW7A!2XXhP- z6J^Fsb@fz^r9%xRK_y7WM&*fan_N4$0!^vY+})AfP30y?w}DOPNfbMIDB3~T_C*F$ zQ6`xD2@eypw?goFrVm;n!qhmSon)#XyUC9~_}{Os5RIK_m`K`2YY{uaPcjQfT^&!p zS^wqs!~5U0+*zMcq0-eX1k!o5j(=4?}*H^_qRl<(uJa29&CW%k&Td(4y^VbD0c!Sr;!}~t`KU%?$ z)TlZNfuuw8q&P6`yAp~1hvQT8Pgpz|C^kE8oMtNkgV^~%4F2gHZ~ literal 0 HcmV?d00001 From d1b4bbc6e874dc41efc97d718836bc8c94b6d9ca Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 02:31:40 +0300 Subject: [PATCH 2/7] changed names of variables changed style --- audiostream/audiostream_tp.py | 53 +++++++++++++---------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/audiostream/audiostream_tp.py b/audiostream/audiostream_tp.py index 72989af..5a056a7 100755 --- a/audiostream/audiostream_tp.py +++ b/audiostream/audiostream_tp.py @@ -42,8 +42,6 @@ import model_params -WINDOW = 60 - class AudioPrediction: def __init__(self): @@ -58,6 +56,7 @@ def __init__(self): plt.xlabel('Time') plt.ylabel('Frequency Level [dB]') yLimit = 200 + xLimit = 60 plt.ylim(0, yLimit) @@ -71,15 +70,15 @@ def __init__(self): shifter = InferenceShifter() - actHistory = deque([0.0] * WINDOW, maxlen = 60) - predHistory = deque([0.0] * WINDOW, maxlen = 60) - anomHistory = deque([0.0] * WINDOW, maxlen = 60) - likeHistory = deque([0.0] * WINDOW, maxlen = 60) + actHistory = deque([0.0] * xLimit, maxlen = 60) + predHistory = deque([0.0] * xLimit, maxlen = 60) + anomHistory = deque([0.0] * xLimit, maxlen = 60) + likeHistory = deque([0.0] * xLimit, maxlen = 60) - actline, = plt.plot(range(WINDOW), actHistory) - predline, = plt.plot(range(WINDOW), predHistory) - anomline, = plt.plot(range(WINDOW), anomHistory) - likeline, = plt.plot(range(WINDOW), likeHistory) + actline, = plt.plot(range(xLimit), actHistory) + predline, = plt.plot(range(xLimit), predHistory) + anomline, = plt.plot(range(xLimit), anomHistory) + likeline, = plt.plot(range(xLimit), likeHistory) """ Instance of the class to stream audio @@ -105,12 +104,11 @@ def __init__(self): anomaly = result.inferences['anomalyScore'] likelihood = likelihoods.anomalyProbability(inputLevel, anomaly) - if anomaly is not None: - actHistory .append(result.rawInput['binAmplitude']) - predHistory.append(inference) - anomHistory.append(anomaly * yLimit/2) - likeHistory.append(likelihood * yLimit/2) - + #if anomaly is not None: + actHistory .append(result.rawInput['binAmplitude']) + predHistory.append(inference) + anomHistory.append(anomaly * yLimit/2) + likeHistory.append(likelihood * yLimit/2) actline .set_ydata(actHistory) predline.set_ydata(predHistory) @@ -129,13 +127,11 @@ def __init__(self): Sampling details rate: The sampling rate in Hz of my soundcard buffersize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) - secToRecord: The length of each sampling - buffersToRecord: how many multiples of buffers are we recording? + bitResolution: Bit depth of every sample """ rate =44100 self.bufferSize =2**12 bitResolution = 16 - binSize = int(rate/self.bufferSize) self.start = False @@ -165,9 +161,7 @@ def __init__(self): p = pyaudio.PyAudio() def callback(in_data, frame_count, time_info, status): - """ - Replaces processAudio() - """ + self.audioIn = numpy.fromstring(in_data, dtype = numpy.int16) self.audioFFT = self.fft(self.audioIn) # Get the frequency levels in dBs @@ -175,6 +169,7 @@ def callback(in_data, frame_count, time_info, status): self.start = True return (self.audioFFT, pyaudio.paContinue) + self.inStream = p.open(format =p.get_format_from_width(width, unsigned = False), channels =1, rate =rate, @@ -191,28 +186,20 @@ def callback(in_data, frame_count, time_info, status): print "Buffersize:\t\t" + str(self.bufferSize) - - - def fft(self, audio): - """ Fast Fourier Transform - - - Output: - 'output' - the transform of the audio input into frequency domain. - Contains the strength of each frequency in the audio signal + Output: the transform of the audio input to frequency domain. + Contains the amplitude of each frequency in the audio signal frequencies are marked by its position in 'output': frequency = index * rate / buffesize - output.size = buffersize/2 + output.size = bufferSize/2 Use only first half of vector since the second is repeated due to symmetry. - Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result """ output = numpy.abs(numpy.fft.fft(audio)) return output [0:int(self.bufferSize/2)] - audiostream = AudioPrediction() From ab56812aaa9e0e0440ae6e84a0e4924c924f9946 Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 13:41:56 +0300 Subject: [PATCH 3/7] Changed name To reflect that it no longer uses the tp distinctive --- .../{audiostream_tp.py => audiostream.py} | 61 +++++++++++-------- 1 file changed, 37 insertions(+), 24 deletions(-) rename audiostream/{audiostream_tp.py => audiostream.py} (79%) diff --git a/audiostream/audiostream_tp.py b/audiostream/audiostream.py similarity index 79% rename from audiostream/audiostream_tp.py rename to audiostream/audiostream.py index 5a056a7..42b40b5 100755 --- a/audiostream/audiostream_tp.py +++ b/audiostream/audiostream.py @@ -24,11 +24,7 @@ """ """ -numpy - the language of pyaudio (& everything else) -pyaudio - access to the mic via the soundcard -pyplot - to plot the sound frequencies -bitmaparray - encodes an array of indices into an SDR -TP10X2 - the C++ optimized temporal pooler (TP) +Example of audio stream to compute predictions, anomaly and likelihood """ import numpy import pyaudio @@ -36,6 +32,7 @@ matplotlib.use("TkAgg") import matplotlib.pyplot as plt from collections import deque + from nupic.data.inference_shifter import InferenceShifter from nupic.frameworks.opf.modelfactory import ModelFactory from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood @@ -47,8 +44,7 @@ class AudioPrediction: def __init__(self): """ - Setup the plot, interactive mode on, title, etc. - Rescale the y-axis + Setup the plot, interactive mode on, title, and ylimits """ plt.ion() fig = plt.figure() @@ -61,35 +57,42 @@ def __init__(self): """ - Create model, set predicted field, and likelihood + Create model, set predicted field, likelihoods and shifter """ model = ModelFactory.create(model_params.MODEL_PARAMS) model.enableInference({'predictedField' : 'binAmplitude'}) - likelihoods = AnomalyLikelihood() - shifter = InferenceShifter() + """ + Create vectors to hold data + """ actHistory = deque([0.0] * xLimit, maxlen = 60) predHistory = deque([0.0] * xLimit, maxlen = 60) anomHistory = deque([0.0] * xLimit, maxlen = 60) likeHistory = deque([0.0] * xLimit, maxlen = 60) + """ + 4 Lines to plot the Actual input, Predicted input, Anomaly and Likelihood + """ actline, = plt.plot(range(xLimit), actHistory) predline, = plt.plot(range(xLimit), predHistory) anomline, = plt.plot(range(xLimit), anomHistory) likeline, = plt.plot(range(xLimit), likeHistory) """ - Instance of the class to stream audio + Start the execution of audio stream """ audio = AudioStream() - while audio.start==False:1 - while True: - + """ + The input is the second bin ([1]), which represents the amplitude of + frequencies ranging from (n*sr / bufferSize) to ((n+1)*sr / bufferSize) + where n is the bin number selected as input. + In this case n = 1 and the range is from 10.67Hz to 21.53Hz + """ inputLevel = audio.audioFFT[1] # Clip input @@ -97,19 +100,23 @@ def __init__(self): if inputLevel > maxLevel: inputLevel = maxLevel + # Run the input through the model and shift the resulting prediction. modelInput = {'binAmplitude' : inputLevel} result = shifter.shift(model.run(modelInput)) + # Get inference, anomaly and likelihood from the model inference = result.inferences['multiStepBestPredictions'][5] anomaly = result.inferences['anomalyScore'] likelihood = likelihoods.anomalyProbability(inputLevel, anomaly) - #if anomaly is not None: + # Add values to the end of corresponding vector to plot them + # Scale anomaly and likelihood to be visible in the plot actHistory .append(result.rawInput['binAmplitude']) predHistory.append(inference) anomHistory.append(anomaly * yLimit/2) likeHistory.append(likelihood * yLimit/2) + # Update plot and draw actline .set_ydata(actHistory) predline.set_ydata(predHistory) anomline.set_ydata(anomHistory) @@ -118,22 +125,19 @@ def __init__(self): plt.draw() plt.legend(('actual','predicted', 'anomaly', 'likelihood')) - class AudioStream: def __init__(self): """ Sampling details - rate: The sampling rate in Hz of my soundcard - buffersize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) + rate: The sampling rate in Hz of the audio interface being used. + bufferSize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) bitResolution: Bit depth of every sample """ - rate =44100 - self.bufferSize =2**12 + rate = 44100 + self.bufferSize = 2**12 bitResolution = 16 - self.start = False - """ Setting up the array that will handle the timeseries of audio data from our input @@ -154,7 +158,7 @@ def __init__(self): """ Creating the audio stream from our mic. This includes callback function for - non blocking mode. This means the callback executes everytime whenever it needs + non blocking mode. This means the callback executes whenever it needs new audio data (to play) and/or when there is new (recorded) audio data available. Note that PyAudio calls the callback function in a separate thread. """ @@ -177,9 +181,18 @@ def callback(in_data, frame_count, time_info, status): frames_per_buffer= self.bufferSize, stream_callback = callback) + # Wait for the FFT vector to be created in the first callback execution + while 1: + try: + self.audioFFT + except AttributeError: + pass + else: + print "Audiostream started" + break """ - Print out the inputs + Print out the audio streamd details """ print "Sampling rate (Hz):\t" + str(rate) print "Bit Depth:\t\t" + str(bitResolution) From 8f4cffde5615eb193f932d70067c713793e8d70c Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 14:02:38 +0300 Subject: [PATCH 4/7] Changed Readme to reflect change to opf The main file has changed and now uses an OPF model. It also changed the way of receiving audio, it now uses a callback non blocking method with pyaudio. The name of the main file changed be fit these changes. --- audiostream/README.md | 55 ++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/audiostream/README.md b/audiostream/README.md index 1b9a358..5a90d56 100644 --- a/audiostream/README.md +++ b/audiostream/README.md @@ -1,9 +1,15 @@ # Audio Stream Example -A simple example that streams your mic input into the temporal pooler (TP), -and outputs an anomaly score, based on how familiar the TP has become to that -particular mic input sequence. Think of it as being able to recognize a song, -or become more familiar with your speech pattern. +A simple example that streams your mic input into the Online Prediction Framework (OPF), +and outputs a prediction, an anomaly score and a likelihood score, based on how familiar t +he model has become to that particular mic input sequence. Think of it as being able to +recognize a sound, or become more familiar with your speech pattern, and its ability to +predict what level is next. + +The audio is transformed into the frequency domain using a Fast Fourier Transform (FFT), +and only one frequency bin is taken as input to the model. Meaning that the amplitud of the +selected bin (frequency) is streamed into the model and then it starts analyzing that +particular frequency for anomalies and predictions. ## Requirements @@ -13,36 +19,31 @@ or become more familiar with your speech pattern. ## Usage - python audiostream_tp.py + python audiostream.py This script will run automatically & forever. To stop it, use KeyboardInterrupt (CRTL+C). +The model also uses a model_params.py file that includes the +parameters to use in the analysis. + ## General algorithm: 1. Mic input is received (voltages in the time domain) -2. Mic input is transformed into the frequency domain, using fast fourier transform -3. The few strongest frequencies (in Hz) are identified -4. Those frequencies are encoded into an SDR -5. That SDR is passed to the temporal pooler -6. The temporal pooler provides a prediction -7. An anomaly score is calculated off that prediction against the next input - A low anomaly score means that the temporal pooler is properly predicting - the next frequency pattern. - -## Print outs include: - -1. An array comparing the actual and predicted TP inputs - A - actual - P - predicted - E - expected (both A & P) -2. A hashbar representing the anomaly score -3. Plot of the frequency domain in real-time +2. Mic input is transformed into the frequency domain, using fast fourier transform (FFT) +3. A frequency range (bin) is selected +4. That changing bin value is fed into the opf model in every iteration of the script +5. The model computes prediction, anomaly and likelihood values. +6. All 4 values (input, prediction, anomaly and likelihood) are plotted. + +## Plot includes: +4 time changing lines corresponding to: + +1. Raw input +2. Predicted value +3. Anomaly score +4. Likelihood ## Next steps: -1. Benchmark different parameters (especially TP parameters) - Use annoying_test and Online Tone Generator http://onlinetonegenerator.com/ -2. Implement anomaly smoothing -3. Implement spatial pooler -4. Look into better algorithms to pick out the frequency peaks (sound fingerprinting) +1. Look into better algorithms to pick out the frequency peaks (sound fingerprinting). This could be application specific, and user can determine how to select frequency bins. From 981b6d65bcf61d1f172ee32db5da03d3583036d6 Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 14:04:15 +0300 Subject: [PATCH 5/7] Deleted compiled file to upload to github --- audiostream/model_params.pyc | Bin 1807 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 audiostream/model_params.pyc diff --git a/audiostream/model_params.pyc b/audiostream/model_params.pyc deleted file mode 100644 index d3e3748f7949a16f9076b4c33ae1471cdb39a5a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1807 zcmZWpOLrSJ6uy2WPU5C1ZCYq3P~L%(npe%M)S238F(i(UZRj~{dgPH}>!6W_ktU7H zh6TTX9jsZfV$Xs-hhNdMWeGokPa3Dq0nd>>-K+c7y{`Pdy7JqVf9}LI|J8uoSoZr8 zk*pFY;w5VGGVuyvpLms;a*6l?@kN{}#B0Qta4M0+X_@#6M0T-~I#De9g02iq#O2ju{a3z?28caC91bBw{vjBK`4seM4}hZ&3iuF!%sv9Z^mV|;#BTsv#J2$v@tXkT&<1o61WE@FJD_e6zYUm&^a=4U zs85N12G~W^P};+>N6jx$=zSV)_VG5Zf~~z9xdmWixKJoQ*#7MfMP{YZ?hMXpCb*1U zG!=}GWFp)em?M#-CSxknCShvkR>70{Kx9J4Vl+#I1cTf%HrN^)E8Q$V1!^pHl%z_! zoQrUMAcf+ZB|`r0HGBx#Rs9ZLuse*IV%d(45n~pv92`s_5`JzGkVt*V)SM@`3m!WH zPoBqhFniDaFMuW7A!2XXhP- z6J^Fsb@fz^r9%xRK_y7WM&*fan_N4$0!^vY+})AfP30y?w}DOPNfbMIDB3~T_C*F$ zQ6`xD2@eypw?goFrVm;n!qhmSon)#XyUC9~_}{Os5RIK_m`K`2YY{uaPcjQfT^&!p zS^wqs!~5U0+*zMcq0-eX1k!o5j(=4?}*H^_qRl<(uJa29&CW%k&Td(4y^VbD0c!Sr;!}~t`KU%?$ z)TlZNfuuw8q&P6`yAp~1hvQT8Pgpz|C^kE8oMtNkgV^~%4F2gHZ~ From 0bf6287dbcce2dadd12f525d9667e7f9cb59fd69 Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 14:08:16 +0300 Subject: [PATCH 6/7] deleted blank lines --- audiostream/audiostream_tp.py | 215 ++++++++++++++++++++++++++++++++++ audiostream/model_params.pyc | Bin 0 -> 1807 bytes 2 files changed, 215 insertions(+) create mode 100644 audiostream/audiostream_tp.py create mode 100644 audiostream/model_params.pyc diff --git a/audiostream/audiostream_tp.py b/audiostream/audiostream_tp.py new file mode 100644 index 0000000..fb4e13c --- /dev/null +++ b/audiostream/audiostream_tp.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# ---------------------------------------------------------------------- +# Numenta Platform for Intelligent Computing (NuPIC) +# Copyright (C) 2013, Numenta, Inc. Unless you have an agreement +# with Numenta, Inc., for a separate license for this software code, the +# following terms and conditions apply: +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero Public License version 3 as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero Public License for more details. +# +# You should have received a copy of the GNU Affero Public License +# along with this program. If not, see http://www.gnu.org/licenses. +# +# http://numenta.org/licenses/ +# ---------------------------------------------------------------------- +""" +See README.md for details. +""" + +""" +Example of audio stream to compute predictions, anomaly and likelihood +""" +import numpy +import pyaudio +import matplotlib +matplotlib.use("TkAgg") +import matplotlib.pyplot as plt +from collections import deque + +from nupic.data.inference_shifter import InferenceShifter +from nupic.frameworks.opf.modelfactory import ModelFactory +from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood + +import model_params + +class AudioPrediction: + + def __init__(self): + + """ + Setup the plot, interactive mode on, title, and ylimits + """ + plt.ion() + fig = plt.figure() + plt.title('Audio Stream example') + plt.xlabel('Time') + plt.ylabel('Frequency Level [dB]') + yLimit = 200 + xLimit = 60 + plt.ylim(0, yLimit) + + """ + Create model, set predicted field, likelihoods and shifter + """ + model = ModelFactory.create(model_params.MODEL_PARAMS) + model.enableInference({'predictedField' : 'binAmplitude'}) + likelihoods = AnomalyLikelihood() + shifter = InferenceShifter() + + """ + Create vectors to hold data + """ + actHistory = deque([0.0] * xLimit, maxlen = 60) + predHistory = deque([0.0] * xLimit, maxlen = 60) + anomHistory = deque([0.0] * xLimit, maxlen = 60) + likeHistory = deque([0.0] * xLimit, maxlen = 60) + + """ + 4 Lines to plot the Actual input, Predicted input, Anomaly and Likelihood + """ + actline, = plt.plot(range(xLimit), actHistory) + predline, = plt.plot(range(xLimit), predHistory) + anomline, = plt.plot(range(xLimit), anomHistory) + likeline, = plt.plot(range(xLimit), likeHistory) + + """ + Start the execution of audio stream + """ + audio = AudioStream() + + while True: + + """ + The input is the second bin ([1]), which represents the amplitude of + frequencies ranging from (n*sr / bufferSize) to ((n+1)*sr / bufferSize) + where n is the bin number selected as input. + In this case n = 1 and the range is from 10.67Hz to 21.53Hz + """ + inputLevel = audio.audioFFT[1] + + # Clip input + maxLevel = model_params.MODEL_PARAMS['modelParams']['sensorParams']['encoders']['binAmplitude']['maxval'] + if inputLevel > maxLevel: + inputLevel = maxLevel + + # Run the input through the model and shift the resulting prediction. + modelInput = {'binAmplitude' : inputLevel} + result = shifter.shift(model.run(modelInput)) + + # Get inference, anomaly and likelihood from the model + inference = result.inferences['multiStepBestPredictions'][5] + anomaly = result.inferences['anomalyScore'] + likelihood = likelihoods.anomalyProbability(inputLevel, anomaly) + + # Add values to the end of corresponding vector to plot them + # Scale anomaly and likelihood to be visible in the plot + actHistory .append(result.rawInput['binAmplitude']) + predHistory.append(inference) + anomHistory.append(anomaly * yLimit/2) + likeHistory.append(likelihood * yLimit/2) + + # Update plot and draw + actline .set_ydata(actHistory) + predline.set_ydata(predHistory) + anomline.set_ydata(anomHistory) + likeline.set_ydata(likeHistory) + + plt.draw() + plt.legend(('actual','predicted', 'anomaly', 'likelihood')) + +class AudioStream: + + def __init__(self): + + """ + Sampling details + rate: The sampling rate in Hz of the audio interface being used. + bufferSize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) + bitResolution: Bit depth of every sample + """ + rate = 44100 + self.bufferSize = 2**12 + bitResolution = 16 + + """ + Setting up the array that will handle the timeseries of audio data from our input + """ + if bitResolution == 8: + width = 1 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int8") + print "Using 8 bits" + if bitResolution == 16: + width = 2 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int16") + print "Using 16 bits" + if bitResolution == 32: + width = 4 + self.audioIn = numpy.empty((self.bufferSize), dtype = "int32") + print "Using 32 bits" + + """ + Creating the audio stream from our mic. This includes callback function for + non blocking mode. This means the callback executes whenever it needs + new audio data (to play) and/or when there is new (recorded) audio data available. + Note that PyAudio calls the callback function in a separate thread. + """ + p = pyaudio.PyAudio() + + def callback(in_data, frame_count, time_info, status): + + self.audioIn = numpy.fromstring(in_data, dtype = numpy.int16) + self.audioFFT = self.fft(self.audioIn) + # Get the frequency levels in dBs + self.audioFFT = 20*numpy.log10(self.audioFFT) + self.start = True + return (self.audioFFT, pyaudio.paContinue) + + self.inStream = p.open(format =p.get_format_from_width(width, unsigned = False), + channels =1, + rate =rate, + input =True, + frames_per_buffer= self.bufferSize, + stream_callback = callback) + + # Wait for the FFT vector to be created in the first callback execution + while 1: + try: + self.audioFFT + except AttributeError: + pass + else: + print "Audiostream started" + break + + """ + Print out the audio streamd details + """ + print "Sampling rate (Hz):\t" + str(rate) + print "Bit Depth:\t\t" + str(bitResolution) + print "Buffersize:\t\t" + str(self.bufferSize) + + + def fft(self, audio): + """ + Fast Fourier Transform - + Output: the transform of the audio input to frequency domain. + Contains the amplitude of each frequency in the audio signal + frequencies are marked by its position in 'output': + frequency = index * rate / buffesize + output.size = bufferSize/2 + Use only first half of vector since the second is repeated due to + symmetry. + Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result + """ + output = numpy.abs(numpy.fft.fft(audio)) + return output [0:int(self.bufferSize/2)] + + +audiostream = AudioPrediction() diff --git a/audiostream/model_params.pyc b/audiostream/model_params.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3e3748f7949a16f9076b4c33ae1471cdb39a5a4 GIT binary patch literal 1807 zcmZWpOLrSJ6uy2WPU5C1ZCYq3P~L%(npe%M)S238F(i(UZRj~{dgPH}>!6W_ktU7H zh6TTX9jsZfV$Xs-hhNdMWeGokPa3Dq0nd>>-K+c7y{`Pdy7JqVf9}LI|J8uoSoZr8 zk*pFY;w5VGGVuyvpLms;a*6l?@kN{}#B0Qta4M0+X_@#6M0T-~I#De9g02iq#O2ju{a3z?28caC91bBw{vjBK`4seM4}hZ&3iuF!%sv9Z^mV|;#BTsv#J2$v@tXkT&<1o61WE@FJD_e6zYUm&^a=4U zs85N12G~W^P};+>N6jx$=zSV)_VG5Zf~~z9xdmWixKJoQ*#7MfMP{YZ?hMXpCb*1U zG!=}GWFp)em?M#-CSxknCShvkR>70{Kx9J4Vl+#I1cTf%HrN^)E8Q$V1!^pHl%z_! zoQrUMAcf+ZB|`r0HGBx#Rs9ZLuse*IV%d(45n~pv92`s_5`JzGkVt*V)SM@`3m!WH zPoBqhFniDaFMuW7A!2XXhP- z6J^Fsb@fz^r9%xRK_y7WM&*fan_N4$0!^vY+})AfP30y?w}DOPNfbMIDB3~T_C*F$ zQ6`xD2@eypw?goFrVm;n!qhmSon)#XyUC9~_}{Os5RIK_m`K`2YY{uaPcjQfT^&!p zS^wqs!~5U0+*zMcq0-eX1k!o5j(=4?}*H^_qRl<(uJa29&CW%k&Td(4y^VbD0c!Sr;!}~t`KU%?$ z)TlZNfuuw8q&P6`yAp~1hvQT8Pgpz|C^kE8oMtNkgV^~%4F2gHZ~ literal 0 HcmV?d00001 From 5dff420c1566222535eaa7d3faa0bdf145aebdb7 Mon Sep 17 00:00:00 2001 From: Roberto Date: Wed, 5 Oct 2016 15:12:34 +0300 Subject: [PATCH 7/7] Deleted lefovers --- audiostream/audiostream_tp.py | 215 ---------------------------------- audiostream/model_params.pyc | Bin 1807 -> 0 bytes 2 files changed, 215 deletions(-) delete mode 100644 audiostream/audiostream_tp.py delete mode 100644 audiostream/model_params.pyc diff --git a/audiostream/audiostream_tp.py b/audiostream/audiostream_tp.py deleted file mode 100644 index fb4e13c..0000000 --- a/audiostream/audiostream_tp.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python -# ---------------------------------------------------------------------- -# Numenta Platform for Intelligent Computing (NuPIC) -# Copyright (C) 2013, Numenta, Inc. Unless you have an agreement -# with Numenta, Inc., for a separate license for this software code, the -# following terms and conditions apply: -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero Public License version 3 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero Public License for more details. -# -# You should have received a copy of the GNU Affero Public License -# along with this program. If not, see http://www.gnu.org/licenses. -# -# http://numenta.org/licenses/ -# ---------------------------------------------------------------------- -""" -See README.md for details. -""" - -""" -Example of audio stream to compute predictions, anomaly and likelihood -""" -import numpy -import pyaudio -import matplotlib -matplotlib.use("TkAgg") -import matplotlib.pyplot as plt -from collections import deque - -from nupic.data.inference_shifter import InferenceShifter -from nupic.frameworks.opf.modelfactory import ModelFactory -from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood - -import model_params - -class AudioPrediction: - - def __init__(self): - - """ - Setup the plot, interactive mode on, title, and ylimits - """ - plt.ion() - fig = plt.figure() - plt.title('Audio Stream example') - plt.xlabel('Time') - plt.ylabel('Frequency Level [dB]') - yLimit = 200 - xLimit = 60 - plt.ylim(0, yLimit) - - """ - Create model, set predicted field, likelihoods and shifter - """ - model = ModelFactory.create(model_params.MODEL_PARAMS) - model.enableInference({'predictedField' : 'binAmplitude'}) - likelihoods = AnomalyLikelihood() - shifter = InferenceShifter() - - """ - Create vectors to hold data - """ - actHistory = deque([0.0] * xLimit, maxlen = 60) - predHistory = deque([0.0] * xLimit, maxlen = 60) - anomHistory = deque([0.0] * xLimit, maxlen = 60) - likeHistory = deque([0.0] * xLimit, maxlen = 60) - - """ - 4 Lines to plot the Actual input, Predicted input, Anomaly and Likelihood - """ - actline, = plt.plot(range(xLimit), actHistory) - predline, = plt.plot(range(xLimit), predHistory) - anomline, = plt.plot(range(xLimit), anomHistory) - likeline, = plt.plot(range(xLimit), likeHistory) - - """ - Start the execution of audio stream - """ - audio = AudioStream() - - while True: - - """ - The input is the second bin ([1]), which represents the amplitude of - frequencies ranging from (n*sr / bufferSize) to ((n+1)*sr / bufferSize) - where n is the bin number selected as input. - In this case n = 1 and the range is from 10.67Hz to 21.53Hz - """ - inputLevel = audio.audioFFT[1] - - # Clip input - maxLevel = model_params.MODEL_PARAMS['modelParams']['sensorParams']['encoders']['binAmplitude']['maxval'] - if inputLevel > maxLevel: - inputLevel = maxLevel - - # Run the input through the model and shift the resulting prediction. - modelInput = {'binAmplitude' : inputLevel} - result = shifter.shift(model.run(modelInput)) - - # Get inference, anomaly and likelihood from the model - inference = result.inferences['multiStepBestPredictions'][5] - anomaly = result.inferences['anomalyScore'] - likelihood = likelihoods.anomalyProbability(inputLevel, anomaly) - - # Add values to the end of corresponding vector to plot them - # Scale anomaly and likelihood to be visible in the plot - actHistory .append(result.rawInput['binAmplitude']) - predHistory.append(inference) - anomHistory.append(anomaly * yLimit/2) - likeHistory.append(likelihood * yLimit/2) - - # Update plot and draw - actline .set_ydata(actHistory) - predline.set_ydata(predHistory) - anomline.set_ydata(anomHistory) - likeline.set_ydata(likeHistory) - - plt.draw() - plt.legend(('actual','predicted', 'anomaly', 'likelihood')) - -class AudioStream: - - def __init__(self): - - """ - Sampling details - rate: The sampling rate in Hz of the audio interface being used. - bufferSize: The size of the array to which we will save audio segments (2^12 = 4096 is very good) - bitResolution: Bit depth of every sample - """ - rate = 44100 - self.bufferSize = 2**12 - bitResolution = 16 - - """ - Setting up the array that will handle the timeseries of audio data from our input - """ - if bitResolution == 8: - width = 1 - self.audioIn = numpy.empty((self.bufferSize), dtype = "int8") - print "Using 8 bits" - if bitResolution == 16: - width = 2 - self.audioIn = numpy.empty((self.bufferSize), dtype = "int16") - print "Using 16 bits" - if bitResolution == 32: - width = 4 - self.audioIn = numpy.empty((self.bufferSize), dtype = "int32") - print "Using 32 bits" - - """ - Creating the audio stream from our mic. This includes callback function for - non blocking mode. This means the callback executes whenever it needs - new audio data (to play) and/or when there is new (recorded) audio data available. - Note that PyAudio calls the callback function in a separate thread. - """ - p = pyaudio.PyAudio() - - def callback(in_data, frame_count, time_info, status): - - self.audioIn = numpy.fromstring(in_data, dtype = numpy.int16) - self.audioFFT = self.fft(self.audioIn) - # Get the frequency levels in dBs - self.audioFFT = 20*numpy.log10(self.audioFFT) - self.start = True - return (self.audioFFT, pyaudio.paContinue) - - self.inStream = p.open(format =p.get_format_from_width(width, unsigned = False), - channels =1, - rate =rate, - input =True, - frames_per_buffer= self.bufferSize, - stream_callback = callback) - - # Wait for the FFT vector to be created in the first callback execution - while 1: - try: - self.audioFFT - except AttributeError: - pass - else: - print "Audiostream started" - break - - """ - Print out the audio streamd details - """ - print "Sampling rate (Hz):\t" + str(rate) - print "Bit Depth:\t\t" + str(bitResolution) - print "Buffersize:\t\t" + str(self.bufferSize) - - - def fft(self, audio): - """ - Fast Fourier Transform - - Output: the transform of the audio input to frequency domain. - Contains the amplitude of each frequency in the audio signal - frequencies are marked by its position in 'output': - frequency = index * rate / buffesize - output.size = bufferSize/2 - Use only first half of vector since the second is repeated due to - symmetry. - Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result - """ - output = numpy.abs(numpy.fft.fft(audio)) - return output [0:int(self.bufferSize/2)] - - -audiostream = AudioPrediction() diff --git a/audiostream/model_params.pyc b/audiostream/model_params.pyc deleted file mode 100644 index d3e3748f7949a16f9076b4c33ae1471cdb39a5a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1807 zcmZWpOLrSJ6uy2WPU5C1ZCYq3P~L%(npe%M)S238F(i(UZRj~{dgPH}>!6W_ktU7H zh6TTX9jsZfV$Xs-hhNdMWeGokPa3Dq0nd>>-K+c7y{`Pdy7JqVf9}LI|J8uoSoZr8 zk*pFY;w5VGGVuyvpLms;a*6l?@kN{}#B0Qta4M0+X_@#6M0T-~I#De9g02iq#O2ju{a3z?28caC91bBw{vjBK`4seM4}hZ&3iuF!%sv9Z^mV|;#BTsv#J2$v@tXkT&<1o61WE@FJD_e6zYUm&^a=4U zs85N12G~W^P};+>N6jx$=zSV)_VG5Zf~~z9xdmWixKJoQ*#7MfMP{YZ?hMXpCb*1U zG!=}GWFp)em?M#-CSxknCShvkR>70{Kx9J4Vl+#I1cTf%HrN^)E8Q$V1!^pHl%z_! zoQrUMAcf+ZB|`r0HGBx#Rs9ZLuse*IV%d(45n~pv92`s_5`JzGkVt*V)SM@`3m!WH zPoBqhFniDaFMuW7A!2XXhP- z6J^Fsb@fz^r9%xRK_y7WM&*fan_N4$0!^vY+})AfP30y?w}DOPNfbMIDB3~T_C*F$ zQ6`xD2@eypw?goFrVm;n!qhmSon)#XyUC9~_}{Os5RIK_m`K`2YY{uaPcjQfT^&!p zS^wqs!~5U0+*zMcq0-eX1k!o5j(=4?}*H^_qRl<(uJa29&CW%k&Td(4y^VbD0c!Sr;!}~t`KU%?$ z)TlZNfuuw8q&P6`yAp~1hvQT8Pgpz|C^kE8oMtNkgV^~%4F2gHZ~