Generating data with numpy

import numpy as np

Generating Data

a = np.arange(5)
b = np.arange(5,10)
c = np.arange(10,100,10)
print "a, b, and c are", a, b, c
a = np.linspace(0,5) # n points between i and j, default is 50
print "a is", a
b = np.linspace(-5,5,11) # points can be ± and floating
print "b is", b
baseline = np.zeros(5) # 5 zeros
ones = np.ones(5) # 5 ones

def f(x,y):   # some shape
    return x*y

slope = -ones*2 + [f(i,i) for i in range(5)]

print slope
print baseline
print "average change from baseline", (baseline - slope).mean()

Random numbers

a = np.random.rand(5) # 5 random numbers between 0 and 1
b = np.random.rand(2,3) # 6 random numbers in a matrix
print "a and b are", a, b
mean = 5
sigma = 2
gauss = np.random.normal(mean, sigma, 5) # generate 5 random numbers from a gaussian
print gauss
uniform = np.random.uniform(10,15,5) # generate 5 random numbers from a uniform distribution between i, j
print uniform
gauss = np.random.normal(mean, sigma, 1000) # lots of data
sample = np.random.choice(gauss, 5) # 5 random points from the data
print sample

Statistics and sumarising data

data = np.random.normal(0,2,100) # lots of data
highest = data.max()
lowest = data.min()
print highest, lowest
print data.mean(), np.median(data), data.var(), data.std()

or histogramming

hist, bins = np.histogram(data, bins = np.linspace(data.min(), data.max(), 10))
print hist 

There are some things numpy can’t do so easily…

from scipy.stats import norm
mu, sigma = norm.fit(data)
print mu, sigma

back to edition