#Here I import the relevant function libraries #This can be done in many ways #To import an entire library (e.g. scipy) so that functions accessed by typing "lib_name.func_name" import matplotlib #To import a subsection of a library import scipy.optimize #To import the whole library under a different name, so you can type "diff_name.func_name" import numpy as np import matplotlib.pyplot as plt #To import just a particular function from time import time from math import sin #This may be confusing but I'm grabbing a function usually denoted with "time.time"
#First of all we need to read the example data file into a list #I will create an empty list and append the data as I read the file #Note appending is a VERY inefficient opperation, however for less than a ~million repeats it's fine #Make an empty list: datalist =  #Open the file. Here the 'r' means 'read' f = open('ex_data.dat','r') #Now read all the lines of the file into a list called lines #For very large files this isn't always a good plan as you may not want to store the whole file in the memory #Such files should be read line by line lines = f.readlines() #Now loop over ever line and record the value as a floating point number in datalist #Notice that python's loop functionality is very flexible for line in lines: datalist.append(float(line))
#Now we want to we want to create a plot #Create a figure called 'fig' and a set of axes called 'ax' fig,ax = plt.subplots(figsize=(8,6)) #Create a dummy list of numbers to plot along the x-axis #We will create a numpy array using the 'numpy.arange()' function #The syntax for this is (start_value,non_inclusive_end_value,step_size) #Here I also use the 'len()' function which returns the length of a list/array x_vals = np.arange(0,len(datalist),1) #Now plot the data on the axes #Here I choose to plot the data as blue dots ax.plot(x_vals,datalist,'b.') #Set the limits, and label the axes ax.set_xlim(0,100) ax.set_ylim(-5,50) ax.set_xlabel('x') ax.set_ylabel('val') #Force the plot to be displayed plt.show() #Save the plot fig.savefig('ex_fig.jpg',dpi=300)
#Now make a function of a gaussian distribution def gaussian(x,mean,sigma,A): #Set the normalisation norm = A/(sigma*np.sqrt(2.*np.pi)) return norm*np.exp( -((x-mean)**2.) / (2.*sigma**2.) )
#First we will fit this the bad, slow way #Start with an impossibly large value of least_sq least_sq = 1000000000. #Loop over each parameter and over each value of x and calculate where least_sq is minimised #Make some value approximate guesses of where the min/max values of the parameters are for mean in np.arange(50.,60.,1.): for sigma in np.arange(5.,15.,1.): for A in np.arange(500.,1500.,10.): least_sq_temp = 0. for i in range(0,len(x_vals),1): #Add the current deviations to the temp value of least_sq least_sq_temp += (datalist[i] - gaussian(x_vals[i],mean,sigma,A))**2. #If least_sq_temp is a better fit than your current stored value then update the fit if least_sq_temp < least_sq: least_sq = least_sq_temp A_fit = A mean_fit = mean sigma_fit = sigma print 'A = ',A_fit print 'Mean = ',mean_fit print 'Sigma = ',sigma_fit
A = 1010.0 Mean = 56.0 Sigma = 9.0
#Reproduce the plot, but with the fit fig,ax = plt.subplots(figsize=(8,6)) x_vals = np.arange(0,len(datalist),1) #Here I select the plot the fit as a green, dotted, 2pt line ax.plot(x_vals,gaussian(x_vals,mean_fit,sigma_fit,A_fit),lw=2,ls='dotted',color='green') ax.plot(x_vals,datalist,'b.') ax.set_xlim(0,100) ax.set_ylim(-5,50) ax.set_xlabel('x') ax.set_ylabel('val') plt.show() #Save the plot fig.savefig('ex_fit1_fig.jpg',dpi=300)
#Now do the fit the smart way, by using a built in curve fitting function #The function returns the fit parameters and the covariances #The syntax is (function_to_fit, x_values, data_values, p0 = list_of_initial_guesses) fit,cov = scipy.optimize.curve_fit(gaussian, x_vals, datalist, p0 = [55.,10.,800.]) print 'Mean = ',fit print 'Sigma = ',fit print 'A = ',fit
Mean = 56.0803931594 Sigma = 8.99317585744 A = 1008.72516834
#Reproduce the plot, but with the new fit fig,ax = plt.subplots(figsize=(8,6)) x_vals = np.arange(0,len(datalist),1) ax.plot(x_vals,gaussian(x_vals,fit,fit,fit),lw=2,ls='dashed',color='red') ax.plot(x_vals,datalist,'b.') ax.set_xlim(0,100) ax.set_ylim(-5,50) ax.set_xlabel('x') ax.set_ylabel('val') plt.show() #Save the plot fig.savefig('ex_fit2_fig.jpg',dpi=300)
#This is the end of the example, but below are some useful things to note
#Array Arithmetic #When dealing with large lists (the norm in astronomy) array arithmetic is nearly always namy times faster #You should write as much of your code using numpy arrays as you possibly can #The only real drawback of arrays is that you cannot append values to them, they have fixed length #Here I will take the sine of a million element array using a loop and using array arithmetic #Look at the difference in duration #Generate an array of 10 million numbers between 0 and pi val_arr = np.random.uniform(0.,np.pi,1E6) #Recast these arrays as lists val_list = list(val_arr) start_time = time() #Note I've used the shorthand version of range here (the default start is 0 and default step is 1) for i in range(1000000): #This will overwrite the current value with it's sine val_list[i] = sin(val_list[i]) print "Loop time: ",(time()-start_time),"sec" start_time = time() val_arr = np.sin(val_arr) print "Array arithmetic time: ",(time()-start_time),"sec"
Loop time: 1.03399991989 sec Array arithmetic time: 0.0680000782013 sec
#Integer Arithmetic #If you're not used to coding, I've got some bad news for you, computer are pretty dumb #Python will identify what a number you give is by how you write it #If the number doesn't have a decimal point it will assume it's an integer print 3/2 print 3./2.
#Pass by Reference vs Pass by Value #In most cases python defaults to passing by reference #This means that when you say 'a = b' you are essentially telling the computer that they are the same thing #Whatever you do to b is also done to a a = [1,2,3,4,5] b = a print a,b #Delete the value 3 from the list a a.remove(3) print a,b
[1, 2, 3, 4, 5] [1, 2, 3, 4, 5] [1, 2, 4, 5] [1, 2, 4, 5]
#To overcome this you have to trick the computer into duplicating the list a = [1,2,3,4,5] #This will copy the array from 'start_point:end_point', the default is the start/end of the list b = a[:] a.remove(3) print a,b
[1, 2, 4, 5] [1, 2, 3, 4, 5]
#There some other tricks you can play with this a = [1,2,3,4,5] #To return the list in reverse #This is saying go through the whole list, but stepping one place backwards each time print a[::-1] #To return every other value print a[::2]
[5, 4, 3, 2, 1] [1, 3, 5]
#Another python is to use sets #If you want to remove duplicates from a list you don't have to do a complicated algorithm a = [1,1,2,3,3,3,3,4,5,5,5] #To remove duplicates you can define the list as a set #Sets are mathematical objects that cannot have duplicates #Then you can redefine as a list #Doing this all at once print list(set(a))
[1, 2, 3, 4, 5]