# coding: utf-8 # In[1]: #Collape the data! #Plot I vs wavelength/frequency/both get_ipython().magic(u'pylab inline') # In[2]: import numpy as np import matplotlib.pyplot as pl colors = ['m-', 'm-', 'b-', 'b-', 'g-', 'g-', 'y-', 'y-', 'r-', 'r-', 'k-'] colors2 = ['m--', 'm--', 'b--', 'b--', 'g--', 'g--', 'y--', 'y--', 'r--', 'r--', 'k--'] # In[3]: #Functions def normalize(array): #Normalize an array of data return array/np.max(array) def collapse(array, time): #Collapse an array of data amid = len(array)/2 #Note that I purposefully left this an integer. You can't index an array with a float. upperh = array[amid:] uppert = time[amid:] lowerh = array[-amid:] lowert = time[0:amid] + amid #This will mostly be the same as uppert, but if an array contains an odd number of #points, you will be different and you will need this for plotting. #print shape(lowerh), shape(lowert) return upperh, lowerh, uppert, lowert # In[4]: #Classes class IO: def __init__(self, filename): self.filename = filename def extract_data(self): #Don't bother with this function. It can't open csvs. It's meant for space seperated files. f = open(self.filename, "r") data = [] for line in f: row = [] for item in line.split(): flt = True try: row.append(float(item)) except ValueError: flt = False #print "Line ", line, " contains values that are not floats and will be excluded." if flt == True: data.append(row) f.close() data = np.array(data) dataCols = data.T self.x = dataCols[0] self.y = dataCols[1] return dataCols[0], dataCols[1] def extract_csv(self): #This is the function you want. f = open(self.filename, "r") data = [] for line in f: row = [] for item in line.split(","): flt = True try: row.append(float(item)) except ValueError: flt = False """Commented out the print statements because this function gets looped 7 times and raises exceptions each time since the first line in each file contains strings.""" #print "Line ", line, " contains values that are not floats and will be excluded." #print "Item that raised this exception: ", item if flt == True: data.append(row) f.close() data = np.array(data) dataCols = data.T return dataCols class Plot: def __init__(self, x, y, title="x vs y", xlabel="x", ylabel="y"): self.x = x self.y = y self.title = title self.xlabel = xlabel self.ylabel = ylabel def plot(self, lintype="b-"): # Dont use "k-" for your lintype. That is the type the axis uses. axis = np.zeros(len(self.x)) pl.plot(self.x, axis, "k-") pl.plot(self.x, self.y, lintype) pl.title(self.title) pl.xlabel(self.xlabel) pl.ylabel(self.ylabel) pl.show() pl.clf() def overplot(self, y, lbl, lintype="b-", x=0): # Use this function if you plan on overplotting more than one data set. if type(x) != numpy.ndarray: x = self.x pl.plot(x, y, lintype, label=lbl) pl.title(self.title) pl.xlabel(self.xlabel) pl.ylabel(self.ylabel) pl.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # This bbox_to_anchor setting will place the legend outside the plot. def scatter(self): axis = np.zeros(len(self.x)) pl.plot(self.x, axis) pl.scatter(self.x, self.y) pl.title(self.title) pl.xlabel(self.xlabel) pl.ylabel(self.ylabel) pl.show() pl.clf() def overscatter(self, y, lbl, colors="b"): # Use this function if you plan on overplotting more than one data set. pl.scatter(self.x, y, color=colors, label=lbl) pl.title(self.title) pl.xlabel(self.xlabel) pl.ylabel(self.ylabel) pl.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # This bbox_to_anchor setting will place the legend outside the plot. # In[5]: filenames = ["Max_Kourtney_160422_trial_1_cirrus.csv", "Max_Kourtney_160422_trial_2_clear.csv", "Max_Kourtney_160422_trial_3_clear.csv", "Max_Kourtney_160422_trial_4_clear.csv", "Max_Kourtney_160422_trial_5_clear.csv", "Max_Kourtney_160422_trial_6_clear.csv", "Max_Kourtney_160422_trial_7_cirrus.csv"] trials = [] for item in filenames: #print item File = IO(item) data = File.extract_csv() trials.append(data) #trials = np.array(trials) wavelengths_nm = np.array([405.6, 441.9, 467.6, 493.7, 538.2, 610.4, 638.7, 654.9, 702.3, 750.5]) for i in xrange(len(trials)): s = trials[i][0] Title = "I vs time: Trial %i [Raw Data]" % (i+1) p = Plot(s, trials[i][1], title=Title, xlabel="Time [s]", ylabel="I") for j in xrange(len(trials[i]) - 1): p.overplot((trials[i][j+1]), lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) pl.show() pl.clf() # In[6]: for i in xrange(len(trials)): s = trials[i][0] Title = "I/I_max vs time: Trial %i [Raw Data]" % (i+1) p = Plot(s, trials[i][1], title=Title, xlabel="Time [s]", ylabel="I/I_max") for j in xrange(len(trials[i]) - 1): p.overplot(normalize(trials[i][j+1]), lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) pl.show() pl.clf() # In[8]: margin = 0.106 trials = [] for item in filenames: File = IO(item) data = File.extract_csv() trials.append(data) #Here I want to try to put a mask on the data so that we only see the times when the sun is in the detector. #For this I need to remove the data points where the normalized data is below a certain percentage given by "margin." #The only way I could do this was by unpackaging the data, masking it, and repackaging it, so I needed this big, long loop. newTrials = [] for i in xrange(len(trials)): for k in xrange(len(trials[i])): trials1 = [] trials2 = [] mins = [] maxs = [] for j in xrange(len(trials[i])-1): mins.append(np.min(np.where(normalize(trials[i][j+1]) > margin)[0])) maxs.append(np.max(np.where(normalize(trials[i][j+1]) > margin)[0])) lmin = np.min(np.array(mins)) umax = np.max(np.array(maxs)) dmin = np.linspace(0, lmin, lmin+1) dmax = np.linspace((umax-lmin), (len(trials[i][0])-lmin), int((len(trials[i][0])-lmin)-(umax-lmin))) for j in xrange(len(trials[i])): trials3 = np.delete(trials[i][j], dmin) trials4 = np.delete(trials3, dmax).tolist() trials2.append(trials4) trials1.append(trials2) newTrials.append(np.array(trials2)) #Now I take all the time arrays and set the start time back to zero for i in xrange(len(newTrials)): newTrials[i][0] = newTrials[i][0] - np.min(newTrials[i][0]) for i in xrange(len(newTrials)): s = newTrials[i][0] Title = "I vs Time: Trial %i" % (i+1) p = Plot(s, newTrials[i][1], title=Title, xlabel="Time [s]", ylabel="I") for j in xrange(len(newTrials[i]) - 1): p.overplot((newTrials[i][j+1]), lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) pl.show() pl.clf() # In[9]: for i in xrange(len(newTrials)): s = newTrials[i][0] mu = np.sort(np.cos((90./np.max(newTrials[i][0]))*s - 90)) Title = "I/I_max vs mu: Trial %i" % (i+1) p = Plot(mu, np.sort(normalize(newTrials[i][1])), title=Title, xlabel="mu", ylabel="I") #p = Plot(s, normalize(newTrials[i][1]), title=Title, xlabel="Time [s]", ylabel="I") for j in xrange(len(newTrials[i]) - 1): p.overplot((np.sort(normalize(newTrials[i][j+1]))), lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) pl.show() pl.clf() # In[9]: # Collapsed Data for i in xrange(len(trials)): s = trials[i][0] #mu = timetomu(trials[i][0]) Title = "Collapsed I vs time: Trial %i [Raw Data]" % (i+1) p = Plot(s, trials[i][1], title=Title, xlabel="Time [s]", ylabel="I") for j in xrange(len(trials[i]) - 1): up, lw, ut, lt = collapse(trials[i][j+1], s) p.overplot(up, x=ut, lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) p.overplot(lw, x=lt, lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors2[j]) pl.show() pl.clf() # In[13]: for i in xrange(len(newTrials)): s = newTrials[i][0] #mu = timetomu(trials[i][0]) Title = "Collapsed I vs time: Trial %i [Raw Data]" % (i+1) p = Plot(s, trials[i][1], title=Title, xlabel="Time [s]", ylabel="I") for j in xrange(len(trials[i]) - 1): up, lw, ut, lt = collapse(newTrials[i][j+1], s) p.overplot(up, x=ut, lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors[j]) p.overplot(lw, x=lt, lbl=("%.1f nm" % wavelengths_nm[j]), lintype=colors2[j]) pl.show() pl.clf() # In[12]: #As you can see, some of the collapsed data trends underneath the actual data, and some data sets don't. #I'm not quite sure why, but my guess is passing clouds fudged the data since it isn't affecting all the sets. # In[ ]: