import matplotlib.pyplot as plt
import numpy as np
ndum=1000
x = 10*np.random.random(ndum)
y = x + np.random.normal(0,3,ndum)
# and lets put in some bad data values at y=-10
derp=np.random.random(ndum)
y=np.array([-10 if a<0.1 else b for a,b in zip(derp,y)])
# plot our dataset
plt.scatter(x,y,s=5)
plt.xlabel('x')
plt.ylabel('y')
Text(0, 0.5, 'y')
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binned_statistic.html
from scipy.stats import binned_statistic
# first identify good data
good = (y!=-10)
# set up bins in x going from 0 to 10 in steps of 1
binvals = np.arange(0,11,1)
# work out how many points are in each bin (bin_num) as well as
# the average and standard deviation of the y values in the bins (bin_avg, bin_std)
bin_num, bin_edges, bin_idx = binned_statistic(x[good], y[good], statistic='count', bins=binvals)
bin_avg, bin_edges, bin_idx = binned_statistic(x[good], y[good], statistic='mean', bins=binvals)
bin_std, bin_edges, bin_idx = binned_statistic(x[good], y[good], statistic='std', bins=binvals)
# this bit of python magic works out the center of each bin
bin_cent = 0.5*(bin_edges[1:]+bin_edges[:-1])
# plot the data
plt.scatter(x,y,s=5)
plt.xlabel('x')
plt.ylabel('y')
# plot the mean and standard deviation as red points and errorbars
plt.errorbar(bin_cent, bin_avg, yerr=bin_std, fmt='o', color='red')
<ErrorbarContainer object of 3 artists>
# make a linear fit using np.polyfit, and calculate uncertainty and scatter
coeff,cov=np.polyfit(x[good],y[good],1,cov=True)
coeff_err = np.sqrt(np.diag(cov))
print(' slope = {:.3f} +/- {:.3f}'.format(coeff[0],coeff_err[0]))
print('intercept = {:.3f} +/- {:.3f}'.format(coeff[1],coeff_err[1]))
polynomial=np.poly1d(coeff)
print(' scatter = {:.3f}'.format(np.std(y[good]-polynomial(x[good]))))
# plot the data
plt.scatter(x,y,s=5)
plt.xlabel('x')
plt.ylabel('y')
# overplot your fit
xfit=np.linspace(0,10,100)
plt.plot(xfit,polynomial(xfit),color='green',lw=3)
slope = 1.001 +/- 0.035 intercept = -0.095 +/- 0.200 scatter = 3.021
[<matplotlib.lines.Line2D at 0x7f816ae42f70>]