fixing a typo
[JPSSData.git] / svm.py
blobd16603eda8bb78a05b6391f029417bbd0e785488
2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
3 # Angel Farguell (angel.farguell@gmail.com)
5 # to install:
6 # conda install scikit-learn
7 # conda install scikit-image
9 from sklearn import svm
10 from sklearn.model_selection import GridSearchCV
11 from scipy import interpolate, spatial
12 import matplotlib.pyplot as plt
13 import matplotlib.font_manager
14 import matplotlib.colors as colors
15 from mpl_toolkits.mplot3d import axes3d
16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
17 import numpy as np
18 from time import time
19 from infrared_perimeters import process_infrared_perimeters
20 import sys
21 import saveload as sl
23 def preprocess_data_svm(data, scale, minconf=80.):
24 """
25 Preprocess satellite data from JPSSD to use in Support Vector Machine directly
26 (without any interpolation as space-time 3D points)
28 :param data: dictionary of satellite data from JPSSD
29 :param scale: time scales
30 :param minconf: optional, minim fire confidence level to take into account
31 :return X: matrix of features for SVM
32 :return y: vector of labels for SVM
33 :return c: vector of confidence level for SVM
35 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
36 Angel Farguell (angel.farguell@gmail.com), 2019-09-24
37 """
39 # confidence of ground detections
40 gconf = 70.
42 # scale from seconds to days
43 tscale = 24.*3600.
45 detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
46 detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
47 bb = (detlon.min(),detlon.max(),detlat.min(),detlat.max())
48 dc = (bb[1]-bb[0],bb[3]-bb[2])
49 bf = (bb[0]-dc[0]*.5,bb[1]+dc[0]*.5,bb[2]-dc[1]*.5,bb[3]+dc[1]*.5)
50 print bf
52 # process all the points as space-time 3D nodes
53 XX = [[],[]]
54 cf = []
55 for gran in data.items():
56 print '> processing granule %s' % gran[0]
57 tt = (gran[1]['time_num']-scale[0])/tscale
58 conf = gran[1]['conf_fire']>=minconf
59 xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
60 print 'fire detections: %g' % len(xf)
61 XX[0].append(xf)
62 mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
63 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
64 np.logical_and(gran[1]['lat_nofire'] >= bf[2],
65 gran[1]['lat_nofire'] <= bf[3])))
66 xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
67 print 'no fire detections: %g' % len(xg)
68 coarsening = np.int(1+len(xg)/min(50,5*max(len(xf),1)))
69 print 'no fire coarsening: %d' % coarsening
70 print 'no fire detections reduction: %g' % len(xg[::coarsening])
71 XX[1].append(xg[::coarsening])
72 cf.append(gran[1]['conf_fire'][conf])
74 Xf = np.concatenate(tuple(XX[0]))
75 Xg = np.concatenate(tuple(XX[1]))
76 X = np.concatenate((Xg,Xf))
77 y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
78 c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
79 print 'shape X: ', X.shape
80 print 'shape y: ', y.shape
81 print 'shape c: ', c.shape
83 return X,y,c
85 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
86 """
87 Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
89 :param lons: longitud grid
90 :param lats: latitde grid
91 :param U: upper bound grid
92 :param L: lower bound grid
93 :param T: mask grid
94 :param scale: time scales
95 :param time_num_granules: times of the granules
96 :return X: matrix of features for SVM
97 :return y: vector of labels for SVM
98 :return c: vector of confidence level for SVM
100 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
101 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
104 # Flatten coordinates
105 lon = np.ravel(lons).astype(float)
106 lat = np.ravel(lats).astype(float)
108 # Temporal scale to days
109 tscale = 24*3600
110 U = U/tscale
111 L = L/tscale
113 # Ensuring U>=L always
114 print 'U>L: ',(U>L).sum()
115 print 'U<L: ',(U<L).sum()
116 print 'U==L: ',(U==L).sum()
118 # Reshape to 1D
119 uu = np.ravel(U)
120 ll = np.ravel(L)
121 tt = np.ravel(T)
123 # Maximum and minimums to NaN data
124 uu[uu==uu.max()] = np.nan
125 ll[ll==ll.min()] = np.nan
127 # Mask created during computation of lower and upper bounds
128 mk = tt==scale[1]-scale[0]
129 # Masking upper bounds outside the mask
130 uu[mk] = np.nan
132 # Creating minimum value for the upper bounds
133 muu = uu[~np.isnan(uu)].min()
134 # Creating maximum value for the lower bounds
135 mll = ll[~np.isnan(ll)].max()
137 ### Reduction of the density of lower bounds
138 # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
139 lmk = np.copy(mk)
140 ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
141 # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
142 tlmk = lmk[~mk]
143 # set to True all the bounds less than floor of minimum of upper bounds in fire mask
144 tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
145 # set lower mask from temporary mask
146 lmk[~mk] = tlmk
147 ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
148 # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
149 kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
150 if kk:
151 # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
152 tlmk = ~lmk[~lmk]
153 # only set a subset of the lower mask values to False (coarsening)
154 tlmk[::kk] = False
155 # set lower mask form temporary mask
156 lmk[~lmk] = tlmk
157 ## Reason: We care about the maximum lower bounds which are not below upper bounds
158 # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
159 tlmk = lmk[mk]
160 # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
161 t2lmk = tlmk[~np.isnan(ll[mk])]
162 # set to False in the temporary lower mask 2 where lower bounds have maximum value
163 t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
164 # set temporary lower mask from temporary lower mask 2
165 tlmk[~np.isnan(ll[mk])] = t2lmk
166 # set lower mask from temporary lower mask
167 lmk[mk] = tlmk
168 ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
169 # set subset outside of the fire mask for the rest
170 # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
171 kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
172 if kk:
173 # temporary lower mask, values of the current lower mask outside of the original fire mask
174 tlmk = lmk[mk]
175 # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
176 t2lmk = tlmk[~np.isnan(ll[mk])]
177 # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
178 t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
179 # coarsening of the temporary lower mask 3
180 t3lmk[::kk] = False
181 # set the temporary lower mask 2 from the temporary lower mask 3
182 t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
183 # set the temporary lower mask from the temporary lower mask 2
184 tlmk[~np.isnan(ll[mk])] = t2lmk
185 # set the lower mask from the temporary lower mask
186 lmk[mk] = tlmk
188 # Masking lower bounds from previous lower mask
189 ll[lmk] = np.nan
191 # Values different than NaN in the upper and lower bounds
192 um = np.array(~np.isnan(uu))
193 lm = np.array(~np.isnan(ll))
194 # Define all the x, y, and z components of upper and lower bounds
195 ux = lon[um]
196 uy = lat[um]
197 uz = uu[um]
198 lx = lon[lm]
199 ly = lat[lm]
200 lz = ll[lm]
202 # Create the data to call SVM3 function from svm.py
203 X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
204 y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
205 # Print the shape of the data
206 print 'shape X: ', X.shape
207 print 'shape y: ', y.shape
209 if C is None:
210 c = 80*np.ones(y.shape)
211 else:
212 c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
214 # Clean data if not in bounding box
215 bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
216 geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
217 btime = (0,(scale[1]-scale[0])/tscale)
218 time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
219 whole_mask = np.logical_and(geo_mask, time_mask)
220 X = X[whole_mask,:]
221 y = y[whole_mask]
222 c = c[whole_mask]
224 return X,y,c
226 def make_fire_mesh(fxlon, fxlat, it, nt):
228 Create a mesh of points to evaluate the decision function
230 :param fxlon: data to base x-axis meshgrid on
231 :param fxlat: data to base y-axis meshgrid on
232 :param it: data to base z-axis meshgrid on
233 :param nt: tuple of number of nodes at each direction, optional
234 :param coarse: coarsening of the fire mesh
235 :return xx, yy, zz: ndarray
237 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
238 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
241 xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
242 yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
243 tt = np.linspace(it[0],it[1],nt)
244 zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
246 return xx, yy, zz
248 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
250 Create a mesh of points to evaluate the decision function
252 :param x: data to base x-axis meshgrid on
253 :param y: data to base y-axis meshgrid on
254 :param z: data to base z-axis meshgrid on
255 :param s: tuple of number of nodes at each direction, optional
256 :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
257 :return xx, yy, zz: ndarray
259 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
260 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
261 Modified version of:
262 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
265 if not isinstance(s, tuple):
266 print '>> FAILED <<'
267 print 'The number of nodes at each direction is not a tuple: ', s
268 sys.exit(1)
269 # number of nodes in each direction
270 sx, sy, sz = np.array(s).astype(int)
271 # extra step sizes in each direction
272 brx = int(sx * exp)
273 bry = int(sy * exp)
274 brz = int(sz * exp)
275 # grid lengths in each directon
276 lx = x.max() - x.min()
277 ly = y.max() - y.min()
278 lz = z.max() - z.min()
279 # grid resolutions in each direction
280 hx = lx / (sx - 2*brx - 1)
281 hy = ly / (sy - 2*bry - 1)
282 hz = lz / (sz - 2*brz - 1)
283 # extrem values for each dimension
284 x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
285 y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
286 z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
287 # generating the mesh grid
288 xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
289 np.linspace(x_min, x_max, sx),
290 np.linspace(z_min, z_max, sz))
291 return xx, yy, zz
293 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
295 Compute the surface decision frontier for a classifier.
297 :param clf: a classifier
298 :param xx: meshgrid ndarray
299 :param yy: meshgrid ndarray
300 :param zz: meshgrid ndarray
301 :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
302 :param plot_decision: boolean of plotting decision volume
303 :param plot_poly: boolean of plotting polynomial approximation
304 :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
306 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
307 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
308 Modified version of:
309 https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
312 # Creating the 3D grid
313 XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
315 # Evaluating the decision function
316 print '>> Evaluating the decision function...'
317 sys.stdout.flush()
318 t_1 = time()
319 if using_weights:
320 from libsvm_weights.python.svmutil import svm_predict
321 _, _, p_vals = svm_predict([], XX, clf)
322 ZZ = np.array([p[0] for p in p_vals])
323 else:
324 ZZ = clf.decision_function(XX)
325 t_2 = time()
326 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
327 hist = np.histogram(ZZ)
328 print 'counts: ', hist[0]
329 print 'values: ', hist[1]
330 print 'decision function range: ', ZZ.min(), '~', ZZ.max()
332 # Reshaping decision function volume
333 Z = ZZ.reshape(xx.shape)
334 print 'decision function shape: ', Z.shape
335 sl.save((xx,yy,zz,Z),'decision')
337 if plot_decision:
338 try:
339 from skimage import measure
340 from shiftcmap import shiftedColorMap
341 verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
342 # Scale and transform to actual size of the interesting volume
343 h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
344 verts = verts * h
345 verts = verts + [xx.min(), yy.min(), zz.min()]
346 mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
347 fig = plt.figure()
348 ax = fig.gca(projection='3d')
349 fig.suptitle("Decision volume")
350 col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
351 cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=100)
352 midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
353 shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
354 X = np.ravel(xx)
355 Y = np.ravel(yy)
356 T = np.ravel(zz)
357 kk = 50
358 p = ax.scatter(X[0::kk], Y[0::kk], T[0::kk], c=ZZ[0::kk], s=.1, alpha=.4, cmap=shiftedcmap)
359 cbar = fig.colorbar(p)
360 cbar.set_label('decision function value', rotation=270, labelpad=20)
361 ax.add_collection3d(mesh)
362 ax.set_zlim([xx.min(),xx.max()])
363 ax.set_ylim([yy.min(),yy.max()])
364 ax.set_zlim([zz.min(),zz.max()])
365 ax.set_xlabel("Longitude normalized")
366 ax.set_ylabel("Latitude normalized")
367 ax.set_zlabel("Time normalized")
368 plt.savefig('decision.png')
369 except Exception as e:
370 print 'Warning: something went wrong when plotting...'
371 print e
373 if plot_poly:
374 fig = plt.figure()
375 # Computing fire arrival time from previous decision function
376 print '>> Computing fire arrival time...'
377 sys.stdout.flush()
378 t_1 = time()
379 # xx 2-dimensional array
380 Fx = xx[:, :, 0]
381 # yy 2-dimensional array
382 Fy = yy[:, :, 0]
383 # zz 1-dimensional array
384 zr = zz[0, 0]
385 # Initializing fire arrival time
386 Fz = np.zeros(Fx.shape)
387 # For each x and y
388 for k1 in range(Fx.shape[0]):
389 for k2 in range(Fx.shape[1]):
390 # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
391 pz = interpolate.CubicSpline(zr, Z[k1,k2])
392 # Compute the real roots of the the piecewise polynomial
393 rr = pz.roots()
394 # Just take the real roots between min(zz) and max(zz)
395 realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
396 if len(realr) > 0:
397 # Take the minimum root
398 Fz[k1,k2] = realr.min()
399 # Plotting the approximated polynomial with the decision function
400 if plot_poly:
401 try:
402 plt.ion()
403 plt.plot(pz(zr),zr)
404 plt.plot(Z[k1,k2],zr,'+')
405 plt.plot(np.zeros(len(realr)),realr,'o',c='g')
406 plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
407 plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
408 plt.xlabel('decision function value')
409 plt.ylabel('Z')
410 plt.legend(['polynomial','decision values','roots','fire arrival time'])
411 plt.xlim([Z.min(),Z.max()])
412 plt.ylim([zz.min(),zz.max()])
413 plt.show()
414 plt.pause(.001)
415 plt.cla()
416 except Exception as e:
417 print 'Warning: something went wrong when plotting...'
418 print e
419 else:
420 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
421 Fz[k1,k2] = np.nan
422 t_2 = time()
423 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
424 F = [Fx,Fy,Fz]
426 return F
428 def SVM3(X, y, C=1., kgam=1., search=False, norm=True, fire_grid=None, weights=None):
430 3D SuperVector Machine analysis and plot
432 :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
433 :param y: Target values
434 :param C: Weight to not having outliers (argument of svm.SVC class), optional
435 :param kgam: Scalar multiplier for gamma (capture more details increasing it)
436 :param norm: Normalize the data in the interval (0,1) in all the directions, optional
437 :param fire_grid: The longitud and latitude grid where to have the fire arrival time
438 :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
440 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
441 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
442 Modified version of:
443 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
446 t_init = time()
448 col = [(0, .5, 0), (.5, 0, 0)]
449 cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
450 col = [(1, 0, 0), (.25, 0, 0)]
451 cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
453 # Plot options
454 # plot original data
455 plot_data = False
456 # plot scaled data with artificial data
457 plot_scaled = False
458 # plot decision volume
459 plot_decision = False
460 # plot polynomial approximation
461 plot_poly = False
462 # plot full hyperplane vs detections with support vectors
463 plot_supports = False
464 # plot resulting fire arrival time vs detections
465 plot_result = False
467 # Other options
468 # number of vertical nodes per observation
469 vN = 1
470 # if not Nans in the data are wanted (all Nans are going to be replaced by the maximum value)
471 notnan = True
473 # Options better to not change
474 # number of horizontal nodes per observation (it is used if fire_grid==None)
475 hN = 5
476 # creation of under artificial lower bounds in the pre-processing
477 artil = False
478 # if artil = True: resolution of artificial lower bounds vertical to the ground detections
479 hartil = .2
480 # creation of over artificial upper bounds in the pre-processing
481 artiu = True
482 # if artiu = True: resolution of artificial upper bounds vertical to the fire detections
483 hartiu = .1
484 # creation of an artifitial mesh of down lower bounds
485 downarti = True
486 # if downarti = True: below min of z direction for lower bound artifitial creation
487 dminz = .1
488 # if downarti = True: confidence level of the artificial lower bounds
489 confal = 100.
490 # creation of an artifitial mesh of top upper bounds
491 toparti = False
492 # if toparti = True: proportion over max of z direction for upper bound artifitial creation
493 dmaxz = .1
494 # if toparti = True: confidence level of the artificial upper bounds
495 confau = 100.
497 # using different weights for the data
498 if isinstance(C,(list,tuple,np.ndarray)):
499 using_weights = True
500 from libsvm_weights.python.svm import svm_problem, svm_parameter
501 from libsvm_weights.python.svmutil import svm_train
502 from sklearn.utils import compute_class_weight
503 else:
504 using_weights = False
506 # Data inputs
507 X = np.array(X).astype(float)
508 y = np.array(y)
510 # Original data
511 oX = np.array(X).astype(float)
512 oy = np.array(y)
514 # Visualization of the data
515 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
516 if plot_data:
517 try:
518 fig = plt.figure()
519 ax = fig.gca(projection='3d')
520 fig.suptitle("Plotting the original data to fit")
521 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
522 ax.set_xlabel("Longitude")
523 ax.set_ylabel("Latitude")
524 ax.set_zlabel("Time (days)")
525 plt.savefig('original_data.png')
526 except Exception as e:
527 print 'Warning: something went wrong when plotting...'
528 print e
530 # Normalization of the data into [0,1]^3
531 if norm:
532 xmin = X0.min()
533 xlen = X0.max() - X0.min()
534 x0 = np.divide(X0 - xmin, xlen)
535 ymin = X1.min()
536 ylen = X1.max() - X1.min()
537 x1 = np.divide(X1 - ymin, ylen)
538 zmin = X2.min()
539 zlen = X2.max() - X2.min()
540 x2 = np.divide(X2 - zmin, zlen)
541 X0, X1, X2 = x0, x1, x2
542 X[:, 0] = X0
543 X[:, 1] = X1
544 X[:, 2] = X2
546 # Creation of fire and ground artificial detections
547 if artil or artiu or toparti or downarti:
548 # Extreme values at z direction
549 minz = X[:, 2].min()
550 maxz = X[:, 2].max()
551 # Division of lower and upper bounds for data and confidence level
552 fl = X[y==np.unique(y)[0]]
553 fu = X[y==np.unique(y)[1]]
555 # Artifitial extensions of the lower bounds
556 if artil:
557 # Create artificial lower bounds
558 flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-hartil),f[2])) for f in fl ])
559 # Definition of new ground detections after artificial detections added
560 Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
561 if using_weights:
562 cl = C[y==np.unique(y)[0]]
563 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
564 else:
565 Xg = fl
566 if using_weights:
567 cl = C[y==np.unique(y)[0]]
568 Cg = cl
570 # Artifitial extensions of the upper bounds
571 if artiu:
572 # Create artificial upper bounds
573 fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,hartiu),f[2])) for f in fu ])
574 # Definition of new fire detections after artificial detections added
575 Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
576 # Define new confidence levels
577 if using_weights:
578 cu = C[y==np.unique(y)[1]]
579 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
580 else:
581 Xf = fu
582 if using_weights:
583 cu = C[y==np.unique(y)[1]]
584 Cf = cu
586 # Bottom artificial lower bounds
587 if downarti:
588 # Creation of the x,y new mesh of artificial lower bounds
589 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
590 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
591 # All the artificial new mesh are going to be below the data
592 zng = np.repeat(minz-dminz,len(np.ravel(xn)))
593 # Artifitial lower bounds
594 Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
595 # Definition of new ground detections after down artificial lower detections
596 Xgn = np.concatenate((Xg,Xga))
597 # Definition of new confidence level
598 if using_weights:
599 Cga = np.ones(len(Xga))*confal
600 Cgn = np.concatenate((Cg,Cga))
601 else:
602 Xgn = Xg
603 if using_weights:
604 Cgn = Cg
606 # Top artificial upper bounds
607 if toparti:
608 # Creation of the x,y new mesh of artificial upper bounds
609 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
610 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
611 # All the artificial new mesh are going to be over the data
612 znf = np.repeat(maxz+dmaxz,len(np.ravel(xn)))
613 # Artifitial upper bounds
614 Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
615 # Definition of new fire detections after top artificial upper detections
616 Xfn = np.concatenate((Xf,Xfa))
617 # Definition of new confidence level
618 if using_weights:
619 Cfa = np.ones(len(Xfa))*confau
620 Cfn = np.concatenate((Cf,Cfa))
621 else:
622 Xfn = Xf
623 if using_weights:
624 Cfn = Cf
626 # New definition of the training vectors
627 X = np.concatenate((Xgn, Xfn))
628 # New definition of the target values
629 y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
630 # New definition of the confidence level
631 if using_weights:
632 C = np.concatenate((Cgn, Cfn))
633 # New definition of each feature vector
634 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
636 # Printing number of samples and features
637 n0 = (y==np.unique(y)[0]).sum().astype(float)
638 n1 = (y==np.unique(y)[1]).sum().astype(float)
639 n_samples, n_features = X.shape
640 print 'n_samples =', n_samples
641 print 'n_samples_{-1} =', int(n0)
642 print 'n_samples_{+1} =', int(n1)
643 print 'n_features =', n_features
645 # Visualization of scaled data
646 if plot_scaled:
647 try:
648 fig = plt.figure()
649 ax = fig.gca(projection='3d')
650 fig.suptitle("Plotting the data scaled to fit")
651 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
652 ax.set_xlabel("Longitude normalized")
653 ax.set_ylabel("Latitude normalized")
654 ax.set_zlabel("Time normalized")
655 plt.savefig('scaled_data.png')
656 except Exception as e:
657 print 'Warning: something went wrong when plotting...'
658 print e
660 # Reescaling gamma to include more detailed results
661 gamma = 1. / (n_features * X.std())
662 print 'gamma =', gamma
664 # Creating the SVM model and fitting the data using Super Vector Machine technique
665 print '>> Creating the SVM model...'
666 sys.stdout.flush()
667 if using_weights:
668 gamma = kgam*gamma
669 # Compute class balanced weights
670 cls, _ = np.unique(y, return_inverse=True)
671 class_weight = compute_class_weight("balanced", cls, y)
672 prob = svm_problem(C,y,X)
673 arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
674 cls[1], class_weight[1])
675 param = svm_parameter(arg)
676 print '>> Fitting the SVM model...'
677 t_1 = time()
678 clf = svm_train(prob,param)
679 t_2 = time()
680 else:
681 t_1 = time()
682 if search:
683 print '>> Searching for best value of C and gamma...'
684 # Grid Search
685 # Parameter Grid
686 param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
687 # Make grid search classifier
688 grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
689 print '>> Fitting the SVM model...'
690 # Train the classifier
691 grid_search.fit(X, y)
692 print "Best Parameters:\n", grid_search.best_params_
693 clf = grid_search.best_estimator_
694 print "Best Estimators:\n", clf
695 else:
696 gamma = kgam*gamma
697 clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
698 print clf
699 print '>> Fitting the SVM model...'
700 # Fitting the data using Super Vector Machine technique
701 clf.fit(X, y)
702 t_2 = time()
703 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
705 if not using_weights:
706 # Check if the classification failed
707 if clf.fit_status_:
708 print '>> FAILED <<'
709 print 'Failed fitting the data'
710 sys.exit(1)
711 print 'number of support vectors: ', clf.n_support_
712 print 'score of trained data: ', clf.score(X,y)
714 # Creating the mesh grid to evaluate the classification
715 print '>> Creating mesh grid to evaluate the classification...'
716 nnodes = np.ceil(np.power(n_samples,1./n_features))
717 if fire_grid is None:
718 # Number of necessary nodes
719 hnodes = hN*nnodes
720 vnodes = vN*nnodes
721 print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
722 print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
723 # Computing resolution of the mesh to evaluate
724 sdim = (hnodes,hnodes,vnodes)
725 print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
726 t_1 = time()
727 xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
728 t_2 = time()
729 else:
730 fxlon = np.divide(fire_grid[0] - xmin, xlen)
731 fxlat = np.divide(fire_grid[1] - ymin, ylen)
732 it = (X2.min(),X2.max())
733 vnodes = vN*nnodes
734 sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
735 print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
736 t_1 = time()
737 xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
738 t_2 = time()
739 print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
740 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
742 # Computing the 2D fire arrival time, F
743 print '>> Computing the 2D fire arrival time, F...'
744 sys.stdout.flush()
745 F = frontier(clf, xx, yy, zz, plot_decision=plot_decision, plot_poly=plot_poly, using_weights=using_weights)
747 print '>> Creating final results...'
748 sys.stdout.flush()
749 # Plotting the Separating Hyperplane of the SVM classification with the support vectors
750 if plot_supports:
751 try:
752 if using_weights:
753 supp_ind = np.sort(clf.get_sv_indices())-1
754 supp_vec = X[supp_ind]
755 else:
756 supp_ind = clf.support_
757 supp_vec = clf.support_vectors_
758 fig = plt.figure()
759 ax = fig.gca(projection='3d')
760 fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
761 # plotting the separating hyperplane
762 ax.plot_wireframe(F[0], F[1], F[2], color='orange', alpha=.5)
763 # computing the indeces where no support vectors
764 rr = np.array(range(len(y)))
765 ms = np.isin(rr,supp_ind)
766 nsupp = rr[~ms]
767 # plotting no-support vectors (smaller)
768 ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
769 # plotting support vectors (bigger)
770 ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=1, edgecolors='k', alpha=.2);
771 ax.set_xlim(xx.min(),xx.max())
772 ax.set_ylim(yy.min(),yy.max())
773 ax.set_zlim(zz.min(),zz.max())
774 ax.set_xlabel("Longitude normalized")
775 ax.set_ylabel("Latitude normalized")
776 ax.set_zlabel("Time normalized")
777 plt.savefig('support.png')
778 except Exception as e:
779 print 'Warning: something went wrong when plotting...'
780 print e
782 # Plot the fire arrival time resulting from the SVM classification normalized
783 if plot_result:
784 try:
785 Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
786 with np.errstate(invalid='ignore'):
787 Fz[Fz > X2.max()] = np.nan
788 if notnan:
789 Fz[np.isnan(Fz)] = X2.max()
790 Fz = np.minimum(Fz, X2.max())
791 fig = plt.figure()
792 ax = fig.gca(projection='3d')
793 fig.suptitle("Fire arrival time normalized")
794 # plotting fire arrival time
795 p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
796 linewidth=0, antialiased=False)
797 ax.set_xlim(xx.min(),xx.max())
798 ax.set_ylim(yy.min(),yy.max())
799 ax.set_zlim(zz.min(),zz.max())
800 cbar = fig.colorbar(p)
801 cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
802 ax.set_xlabel("Longitude normalized")
803 ax.set_ylabel("Latitude normalized")
804 ax.set_zlabel("Time normalized")
805 plt.savefig('tign_g.png')
806 except Exception as e:
807 print 'Warning: something went wrong when plotting...'
808 print e
810 # Translate the result again into initial data scale
811 if norm:
812 f0 = F[0] * xlen + xmin
813 f1 = F[1] * ylen + ymin
814 f2 = F[2] * zlen + zmin
815 FF = [f0,f1,f2]
817 # Set all the larger values at the end to be the same maximum value
818 oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
819 FFx, FFy, FFz = FF[0], FF[1], FF[2]
821 with np.errstate(invalid='ignore'):
822 FFz[FFz > oX2.max()] = np.nan
824 if notnan:
825 FFz[np.isnan(FFz)] = oX2.max()
826 FFz = np.minimum(FFz, oX2.max())
827 FF = [FFx,FFy,FFz]
829 # Plot the fire arrival time resulting from the SVM classification
830 if plot_result:
831 try:
832 # Plotting the result
833 fig = plt.figure()
834 ax = fig.gca(projection='3d')
835 fig.suptitle("Plotting the 3D graph function of a SVM")
836 FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
837 # plotting original data
838 ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
839 # plotting fire arrival time
840 ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
841 ax.set_xlabel("Longitude")
842 ax.set_ylabel("Latitude")
843 ax.set_zlabel("Time (days)")
844 plt.savefig('result.png')
845 except Exception as e:
846 print 'Warning: something went wrong when plotting...'
847 print e
849 print '>> SUCCESS <<'
850 t_final = time()
851 print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
852 plt.close()
854 return FF
857 if __name__ == "__main__":
858 # Experiment to do
859 exp = 1
860 search = False
862 # Defining ground and fire detections
863 def exp1():
864 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
865 Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
866 C = np.concatenate((10.*np.ones(len(Xg)),100.*np.ones(len(Xf))))
867 kgam = 1.
868 return Xg, Xf, C, kgam
869 def exp2():
870 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
871 [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
872 [4, 1, .5], [2, 0, .5], [2, 2, .5]]
873 Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
874 C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
875 1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
876 kgam = 5.
877 return Xg, Xf, C, kgam
879 # Creating the options
880 options = {1 : exp1, 2 : exp2}
882 # Defining the option depending on the experiment
883 Xg, Xf, C, kgam = options[exp]()
885 # Creating the data necessary to run SVM3 function
886 X = np.concatenate((Xg, Xf))
887 y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
889 # Running SVM classification
890 SVM3(X,y,C=C,kgam=kgam,search=search)