generalization of plot_svm for also temporal-spatial processing
[JPSSData.git] / svm.py
blob3aaa13ecc60b19e358d483481f83fe5b70971ee7
2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
3 # Angel Farguell (angel.farguell@gmail.com)
5 # to install:
6 # conda install scikit-learn
7 # conda install scikit-image
9 from sklearn import svm
10 from sklearn.model_selection import GridSearchCV
11 from scipy import interpolate, spatial
12 import matplotlib.pyplot as plt
13 import matplotlib.font_manager
14 import matplotlib.colors as colors
15 from mpl_toolkits.mplot3d import axes3d
16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
17 import numpy as np
18 from time import time
19 from infrared_perimeters import process_infrared_perimeters
20 import sys
21 import saveload as sl
23 def preprocess_data_svm(data, scale):
24 """
25 Preprocess satellite data from JPSSD to use in Support Vector Machine directly
26 (without any interpolation as space-time 3D points)
28 :param data: dictionary of satellite data from JPSSD
29 :param scale: time scales
30 :return X: matrix of features for SVM
31 :return y: vector of labels for SVM
32 :return c: vector of confidence level for SVM
34 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
35 Angel Farguell (angel.farguell@gmail.com), 2019-09-24
36 """
38 # minim fire confidence level to take into account
39 minconf = 70.
40 # confidence of ground detections
41 gconf = 70.
43 # scale from seconds to days
44 tscale = 24.*3600.
46 detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
47 detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
48 bb = (detlon.min(),detlon.max(),detlat.min(),detlat.max())
49 dc = (bb[1]-bb[0],bb[3]-bb[2])
50 bf = (bb[0]-dc[0]*.5,bb[1]+dc[0]*.5,bb[2]-dc[1]*.5,bb[3]+dc[1]*.5)
51 print bf
53 # process all the points as space-time 3D nodes
54 XX = [[],[]]
55 cf = []
56 for gran in data.items():
57 tt = (gran[1]['time_num']-scale[0])/tscale
58 conf = gran[1]['conf_fire']>=minconf
59 xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
60 XX[0].append(xf)
61 mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
62 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
63 np.logical_and(gran[1]['lat_nofire'] >= bf[2],
64 gran[1]['lat_nofire'] <= bf[3])))
65 xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
66 xg.shape
67 coarse = np.int(1+len(xg)/(5*max(len(xf),20)))
68 XX[1].append(xg[::coarse])
69 cf.append(gran[1]['conf_fire'][conf])
71 Xf = np.concatenate(tuple(XX[0]))
72 Xg = np.concatenate(tuple(XX[1]))
73 X = np.concatenate((Xg,Xf))
74 y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
75 c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
76 print 'shape X: ', X.shape
77 print 'shape y: ', y.shape
78 print 'shape c: ', c.shape
80 return X,y,c
82 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
83 """
84 Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
86 :param lons: longitud grid
87 :param lats: latitde grid
88 :param U: upper bound grid
89 :param L: lower bound grid
90 :param T: mask grid
91 :param scale: time scales
92 :param time_num_granules: times of the granules
93 :return X: matrix of features for SVM
94 :return y: vector of labels for SVM
95 :return c: vector of confidence level for SVM
97 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
98 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
99 """
101 # Flatten coordinates
102 lon = np.ravel(lons).astype(float)
103 lat = np.ravel(lats).astype(float)
105 # Temporal scale to days
106 tscale = 24*3600
107 U = U/tscale
108 L = L/tscale
110 # Ensuring U>=L always
111 print 'U>L: ',(U>L).sum()
112 print 'U<L: ',(U<L).sum()
113 print 'U==L: ',(U==L).sum()
115 # Reshape to 1D
116 uu = np.ravel(U)
117 ll = np.ravel(L)
118 tt = np.ravel(T)
120 # Maximum and minimums to NaN data
121 uu[uu==uu.max()] = np.nan
122 ll[ll==ll.min()] = np.nan
124 # Mask created during computation of lower and upper bounds
125 mk = tt==scale[1]-scale[0]
126 # Masking upper bounds outside the mask
127 uu[mk] = np.nan
129 # Creating minimum value for the upper bounds
130 muu = uu[~np.isnan(uu)].min()
131 # Creating maximum value for the lower bounds
132 mll = ll[~np.isnan(ll)].max()
134 ### Reduction of the density of lower bounds
135 # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
136 lmk = np.copy(mk)
137 ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
138 # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
139 tlmk = lmk[~mk]
140 # set to True all the bounds less than floor of minimum of upper bounds in fire mask
141 tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
142 # set lower mask from temporary mask
143 lmk[~mk] = tlmk
144 ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
145 # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
146 kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
147 if kk:
148 # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
149 tlmk = ~lmk[~lmk]
150 # only set a subset of the lower mask values to False (coarsening)
151 tlmk[::kk] = False
152 # set lower mask form temporary mask
153 lmk[~lmk] = tlmk
154 ## Reason: We care about the maximum lower bounds which are not below upper bounds
155 # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
156 tlmk = lmk[mk]
157 # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
158 t2lmk = tlmk[~np.isnan(ll[mk])]
159 # set to False in the temporary lower mask 2 where lower bounds have maximum value
160 t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
161 # set temporary lower mask from temporary lower mask 2
162 tlmk[~np.isnan(ll[mk])] = t2lmk
163 # set lower mask from temporary lower mask
164 lmk[mk] = tlmk
165 ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
166 # set subset outside of the fire mask for the rest
167 # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
168 kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
169 if kk:
170 # temporary lower mask, values of the current lower mask outside of the original fire mask
171 tlmk = lmk[mk]
172 # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
173 t2lmk = tlmk[~np.isnan(ll[mk])]
174 # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
175 t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
176 # coarsening of the temporary lower mask 3
177 t3lmk[::kk] = False
178 # set the temporary lower mask 2 from the temporary lower mask 3
179 t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
180 # set the temporary lower mask from the temporary lower mask 2
181 tlmk[~np.isnan(ll[mk])] = t2lmk
182 # set the lower mask from the temporary lower mask
183 lmk[mk] = tlmk
185 # Masking lower bounds from previous lower mask
186 ll[lmk] = np.nan
188 # Values different than NaN in the upper and lower bounds
189 um = np.array(~np.isnan(uu))
190 lm = np.array(~np.isnan(ll))
191 # Define all the x, y, and z components of upper and lower bounds
192 ux = lon[um]
193 uy = lat[um]
194 uz = uu[um]
195 lx = lon[lm]
196 ly = lat[lm]
197 lz = ll[lm]
199 # Create the data to call SVM3 function from svm3test.py
200 X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
201 y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
202 # Print the shape of the data
203 print 'shape X: ', X.shape
204 print 'shape y: ', y.shape
206 if C is None:
207 c = 80*np.ones(y.shape)
208 else:
209 c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
211 # Clean data if not in bounding box
212 bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
213 geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
214 btime = (0,(scale[1]-scale[0])/tscale)
215 time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
216 whole_mask = np.logical_and(geo_mask, time_mask)
217 X = X[whole_mask,:]
218 y = y[whole_mask]
219 c = c[whole_mask]
221 return X,y,c
223 def make_fire_mesh(fxlon, fxlat, it, nt):
225 Create a mesh of points to evaluate the decision function
227 :param fxlon: data to base x-axis meshgrid on
228 :param fxlat: data to base y-axis meshgrid on
229 :param it: data to base z-axis meshgrid on
230 :param nt: tuple of number of nodes at each direction, optional
231 :param coarse: coarsening of the fire mesh
232 :return xx, yy, zz: ndarray
234 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
235 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
238 xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
239 yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
240 tt = np.linspace(it[0],it[1],nt)
241 zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
243 return xx, yy, zz
245 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
247 Create a mesh of points to evaluate the decision function
249 :param x: data to base x-axis meshgrid on
250 :param y: data to base y-axis meshgrid on
251 :param z: data to base z-axis meshgrid on
252 :param s: tuple of number of nodes at each direction, optional
253 :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
254 :return xx, yy, zz: ndarray
256 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
257 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
258 Modified version of:
259 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
262 if not isinstance(s, tuple):
263 print '>> FAILED <<'
264 print 'The number of nodes at each direction is not a tuple: ', s
265 sys.exit(1)
266 # number of nodes in each direction
267 sx, sy, sz = np.array(s).astype(int)
268 # extra step sizes in each direction
269 brx = int(sx * exp)
270 bry = int(sy * exp)
271 brz = int(sz * exp)
272 # grid lengths in each directon
273 lx = x.max() - x.min()
274 ly = y.max() - y.min()
275 lz = z.max() - z.min()
276 # grid resolutions in each direction
277 hx = lx / (sx - 2*brx - 1)
278 hy = ly / (sy - 2*bry - 1)
279 hz = lz / (sz - 2*brz - 1)
280 # extrem values for each dimension
281 x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
282 y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
283 z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
284 # generating the mesh grid
285 xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
286 np.linspace(x_min, x_max, sx),
287 np.linspace(z_min, z_max, sz))
288 return xx, yy, zz
290 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
292 Compute the surface decision frontier for a classifier.
294 :param clf: a classifier
295 :param xx: meshgrid ndarray
296 :param yy: meshgrid ndarray
297 :param zz: meshgrid ndarray
298 :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
299 :param plot_decision: boolean of plotting decision volume
300 :param plot_poly: boolean of plotting polynomial approximation
301 :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
303 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
304 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
305 Modified version of:
306 https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
309 # Creating the 3D grid
310 XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
312 # Evaluating the decision function
313 print '>> Evaluating the decision function...'
314 sys.stdout.flush()
315 t_1 = time()
316 if using_weights:
317 from libsvm_weights.python.svmutil import svm_predict
318 _, _, p_vals = svm_predict([], XX, clf)
319 ZZ = np.array([p[0] for p in p_vals])
320 else:
321 ZZ = clf.decision_function(XX)
322 t_2 = time()
323 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
324 hist = np.histogram(ZZ)
325 print 'counts: ', hist[0]
326 print 'values: ', hist[1]
327 print 'decision function range: ', ZZ.min(), '~', ZZ.max()
329 # Reshaping decision function volume
330 Z = ZZ.reshape(xx.shape)
331 print 'decision function shape: ', Z.shape
332 sl.save((xx,yy,zz,Z),'decision')
334 if plot_decision:
335 try:
336 from skimage import measure
337 from shiftcmap import shiftedColorMap
338 verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
339 # Scale and transform to actual size of the interesting volume
340 h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
341 verts = verts * h
342 verts = verts + [xx.min(), yy.min(), zz.min()]
343 mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
344 fig = plt.figure()
345 ax = fig.gca(projection='3d')
346 fig.suptitle("Decision volume")
347 col = [(0, 0, 1), (.5, .5, .5), (1, 0, 0)]
348 cm = colors.LinearSegmentedColormap.from_list('BuRd',col,N=100)
349 midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
350 shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
351 X = np.ravel(xx)
352 Y = np.ravel(yy)
353 T = np.ravel(zz)
354 kk = 50
355 p = ax.scatter(X[0::kk], Y[0::kk], T[0::kk], c=ZZ[0::kk], s=.1, alpha=.4, cmap=shiftedcmap)
356 cbar = fig.colorbar(p)
357 cbar.set_label('decision function value', rotation=270, labelpad=20)
358 ax.add_collection3d(mesh)
359 ax.set_zlim([xx.min(),xx.max()])
360 ax.set_ylim([yy.min(),yy.max()])
361 ax.set_zlim([zz.min(),zz.max()])
362 ax.set_xlabel("Longitude normalized")
363 ax.set_ylabel("Latitude normalized")
364 ax.set_zlabel("Time normalized")
365 plt.savefig('decision.png')
366 except Exception as e:
367 print 'Warning: something went wrong when plotting...'
368 print e
370 if plot_poly:
371 fig = plt.figure()
372 # Computing fire arrival time from previous decision function
373 print '>> Computing fire arrival time...'
374 sys.stdout.flush()
375 t_1 = time()
376 # xx 2-dimensional array
377 Fx = xx[:, :, 0]
378 # yy 2-dimensional array
379 Fy = yy[:, :, 0]
380 # zz 1-dimensional array
381 zr = zz[0, 0]
382 # Initializing fire arrival time
383 Fz = np.zeros(Fx.shape)
384 # For each x and y
385 for k1 in range(Fx.shape[0]):
386 for k2 in range(Fx.shape[1]):
387 # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
388 pz = interpolate.CubicSpline(zr, Z[k1,k2])
389 # Compute the real roots of the the piecewise polynomial
390 rr = pz.roots()
391 # Just take the real roots between min(zz) and max(zz)
392 realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
393 if len(realr) > 0:
394 # Take the minimum root
395 Fz[k1,k2] = realr.min()
396 # Plotting the approximated polynomial with the decision function
397 if plot_poly:
398 try:
399 plt.ion()
400 plt.plot(pz(zr),zr)
401 plt.plot(Z[k1,k2],zr,'+')
402 plt.plot(np.zeros(len(realr)),realr,'o',c='g')
403 plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
404 plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
405 plt.xlabel('decision function value')
406 plt.ylabel('Z')
407 plt.legend(['polynomial','decision values','roots','fire arrival time'])
408 plt.xlim([Z.min(),Z.max()])
409 plt.ylim([zz.min(),zz.max()])
410 plt.show()
411 plt.pause(.001)
412 plt.cla()
413 except Exception as e:
414 print 'Warning: something went wrong when plotting...'
415 print e
416 else:
417 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
418 Fz[k1,k2] = np.nan
419 t_2 = time()
420 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
421 F = [Fx,Fy,Fz]
423 return F
425 def SVM3(X, y, C=1., kgam=1., search=False, norm=True, fire_grid=None, weights=None):
427 3D SuperVector Machine analysis and plot
429 :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
430 :param y: Target values
431 :param C: Weight to not having outliers (argument of svm.SVC class), optional
432 :param kgam: Scalar multiplier for gamma (capture more details increasing it)
433 :param norm: Normalize the data in the interval (0,1) in all the directions, optional
434 :param fire_grid: The longitud and latitude grid where to have the fire arrival time
435 :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
437 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
438 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
439 Modified version of:
440 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
443 t_init = time()
445 col = [(0, 1, 0), (1, 0, 0)]
446 cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
447 col = [(1, 0, 0), (.25, 0, 0)]
448 cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
450 # Plot options
451 # plot original data
452 plot_data = False
453 # plot scaled data with artificial data
454 plot_scaled = False
455 # plot decision volume
456 plot_decision = False
457 # plot polynomial approximation
458 plot_poly = False
459 # plot full hyperplane vs detections with support vectors
460 plot_supports = False
461 # plot resulting fire arrival time vs detections
462 plot_result = False
464 # Other options
465 # number of vertical nodes per observation
466 vN = 1
467 # if not Nans in the data are wanted (all Nans are going to be replaced by the maximum value)
468 notnan = True
470 # Options better to not change
471 # number of horizontal nodes per observation (it is used if fire_grid==None)
472 hN = 5
473 # creation of under artificial lower bounds in the pre-processing
474 artil = True
475 # if artil = True: resolution of artificial lower bounds vertical to the ground detections
476 hartil = .2
477 # creation of over artificial upper bounds in the pre-processing
478 artiu = True
479 # if artiu = True: resolution of artificial upper bounds vertical to the fire detections
480 hartiu = .1
481 # creation of an artifitial mesh of down lower bounds
482 downarti = True
483 # if downarti = True: below min of z direction for lower bound artifitial creation
484 dminz = .1
485 # if downarti = True: confidence level of the artificial lower bounds
486 confal = 100.
487 # creation of an artifitial mesh of top upper bounds
488 toparti = False
489 # if toparti = True: proportion over max of z direction for upper bound artifitial creation
490 dmaxz = .1
491 # if toparti = True: confidence level of the artificial upper bounds
492 confau = 100.
494 # using different weights for the data
495 if isinstance(C,(list,tuple,np.ndarray)):
496 using_weights = True
497 from libsvm_weights.python.svm import svm_problem, svm_parameter
498 from libsvm_weights.python.svmutil import svm_train
499 from sklearn.utils import compute_class_weight
500 else:
501 using_weights = False
503 # Data inputs
504 X = np.array(X).astype(float)
505 y = np.array(y)
507 # Original data
508 oX = np.array(X).astype(float)
509 oy = np.array(y)
511 # Visualization of the data
512 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
513 if plot_data:
514 try:
515 fig = plt.figure()
516 ax = fig.gca(projection='3d')
517 fig.suptitle("Plotting the original data to fit")
518 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=20, edgecolors='k', vmin=y.min(), vmax=y.max())
519 ax.set_xlabel("Longitude")
520 ax.set_ylabel("Latitude")
521 ax.set_zlabel("Time (days)")
522 plt.savefig('original_data.png')
523 except Exception as e:
524 print 'Warning: something went wrong when plotting...'
525 print e
527 # Normalization of the data into [0,1]^3
528 if norm:
529 xmin = X0.min()
530 xlen = X0.max() - X0.min()
531 x0 = np.divide(X0 - xmin, xlen)
532 ymin = X1.min()
533 ylen = X1.max() - X1.min()
534 x1 = np.divide(X1 - ymin, ylen)
535 zmin = X2.min()
536 zlen = X2.max() - X2.min()
537 x2 = np.divide(X2 - zmin, zlen)
538 X0, X1, X2 = x0, x1, x2
539 X[:, 0] = X0
540 X[:, 1] = X1
541 X[:, 2] = X2
543 # Creation of fire and ground artificial detections
544 if artil or artiu or toparti or downarti:
545 # Extreme values at z direction
546 minz = X[:, 2].min()
547 maxz = X[:, 2].max()
548 # Division of lower and upper bounds for data and confidence level
549 fl = X[y==np.unique(y)[0]]
550 fu = X[y==np.unique(y)[1]]
552 # Artifitial extensions of the lower bounds
553 if artil:
554 # Create artificial lower bounds
555 flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-hartil),f[2])) for f in fl ])
556 # Definition of new ground detections after artificial detections added
557 Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
558 if using_weights:
559 cl = C[y==np.unique(y)[0]]
560 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
561 else:
562 Xg = fl
563 if using_weights:
564 cl = C[y==np.unique(y)[0]]
565 Cg = cl
567 # Artifitial extensions of the upper bounds
568 if artiu:
569 # Create artificial upper bounds
570 fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,hartiu),f[2])) for f in fu ])
571 # Definition of new fire detections after artificial detections added
572 Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
573 # Define new confidence levels
574 if using_weights:
575 cu = C[y==np.unique(y)[1]]
576 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
577 else:
578 Xf = fu
579 if using_weights:
580 cu = C[y==np.unique(y)[1]]
581 Cf = cu
583 # Bottom artificial lower bounds
584 if downarti:
585 # Creation of the x,y new mesh of artificial lower bounds
586 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
587 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
588 # All the artificial new mesh are going to be below the data
589 zng = np.repeat(minz-dminz,len(np.ravel(xn)))
590 # Artifitial lower bounds
591 Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
592 # Definition of new ground detections after down artificial lower detections
593 Xgn = np.concatenate((Xg,Xga))
594 # Definition of new confidence level
595 if using_weights:
596 Cga = np.ones(len(Xga))*confal
597 Cgn = np.concatenate((Cg,Cga))
598 else:
599 Xgn = Xg
600 if using_weights:
601 Cgn = Cg
603 # Top artificial upper bounds
604 if toparti:
605 # Creation of the x,y new mesh of artificial upper bounds
606 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
607 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
608 # All the artificial new mesh are going to be over the data
609 znf = np.repeat(maxz+dmaxz,len(np.ravel(xn)))
610 # Artifitial upper bounds
611 Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
612 # Definition of new fire detections after top artificial upper detections
613 Xfn = np.concatenate((Xf,Xfa))
614 # Definition of new confidence level
615 if using_weights:
616 Cfa = np.ones(len(Xfa))*confau
617 Cfn = np.concatenate((Cf,Cfa))
618 else:
619 Xfn = Xf
620 if using_weights:
621 Cfn = Cf
623 # New definition of the training vectors
624 X = np.concatenate((Xgn, Xfn))
625 # New definition of the target values
626 y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
627 # New definition of the confidence level
628 if using_weights:
629 C = np.concatenate((Cgn, Cfn))
630 # New definition of each feature vector
631 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
633 # Printing number of samples and features
634 n0 = (y==np.unique(y)[0]).sum().astype(float)
635 n1 = (y==np.unique(y)[1]).sum().astype(float)
636 n_samples, n_features = X.shape
637 print 'n_samples =', n_samples
638 print 'n_samples_{-1} =', int(n0)
639 print 'n_samples_{+1} =', int(n1)
640 print 'n_features =', n_features
642 # Visualization of scaled data
643 if plot_scaled:
644 try:
645 fig = plt.figure()
646 ax = fig.gca(projection='3d')
647 fig.suptitle("Plotting the data scaled to fit")
648 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=20, edgecolors='k', vmin=y.min(), vmax=y.max())
649 ax.set_xlabel("Longitude normalized")
650 ax.set_ylabel("Latitude normalized")
651 ax.set_zlabel("Time normalized")
652 plt.savefig('scaled_data.png')
653 except Exception as e:
654 print 'Warning: something went wrong when plotting...'
655 print e
657 # Reescaling gamma to include more detailed results
658 gamma = 1. / (n_features * X.std())
659 print 'gamma =', gamma
661 # Creating the SVM model and fitting the data using Super Vector Machine technique
662 print '>> Creating the SVM model...'
663 sys.stdout.flush()
664 if using_weights:
665 gamma = kgam*gamma
666 # Compute class balanced weights
667 cls, _ = np.unique(y, return_inverse=True)
668 class_weight = compute_class_weight("balanced", cls, y)
669 prob = svm_problem(C,y,X)
670 arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
671 cls[1], class_weight[1])
672 param = svm_parameter(arg)
673 print '>> Fitting the SVM model...'
674 t_1 = time()
675 clf = svm_train(prob,param)
676 t_2 = time()
677 else:
678 t_1 = time()
679 if search:
680 print '>> Searching for best value of C and gamma...'
681 # Grid Search
682 # Parameter Grid
683 param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
684 # Make grid search classifier
685 grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
686 print '>> Fitting the SVM model...'
687 # Train the classifier
688 grid_search.fit(X, y)
689 print "Best Parameters:\n", grid_search.best_params_
690 clf = grid_search.best_estimator_
691 print "Best Estimators:\n", clf
692 else:
693 gamma = kgam*gamma
694 clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
695 print clf
696 print '>> Fitting the SVM model...'
697 # Fitting the data using Super Vector Machine technique
698 clf.fit(X, y)
699 t_2 = time()
700 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
702 if not using_weights:
703 # Check if the classification failed
704 if clf.fit_status_:
705 print '>> FAILED <<'
706 print 'Failed fitting the data'
707 sys.exit(1)
708 print 'number of support vectors: ', clf.n_support_
709 print 'score of trained data: ', clf.score(X,y)
711 # Creating the mesh grid to evaluate the classification
712 print '>> Creating mesh grid to evaluate the classification...'
713 nnodes = np.ceil(np.power(n_samples,1./n_features))
714 if fire_grid is None:
715 # Number of necessary nodes
716 hnodes = hN*nnodes
717 vnodes = vN*nnodes
718 print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
719 print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
720 # Computing resolution of the mesh to evaluate
721 sdim = (hnodes,hnodes,vnodes)
722 print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
723 t_1 = time()
724 xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
725 t_2 = time()
726 else:
727 fxlon = np.divide(fire_grid[0] - xmin, xlen)
728 fxlat = np.divide(fire_grid[1] - ymin, ylen)
729 it = (X2.min(),X2.max())
730 vnodes = vN*nnodes
731 sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
732 print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
733 t_1 = time()
734 xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
735 t_2 = time()
736 print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
737 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
739 # Computing the 2D fire arrival time, F
740 print '>> Computing the 2D fire arrival time, F...'
741 sys.stdout.flush()
742 F = frontier(clf, xx, yy, zz, plot_decision=plot_decision, plot_poly=plot_poly, using_weights=using_weights)
744 print '>> Creating final results...'
745 sys.stdout.flush()
746 # Plotting the Separating Hyperplane of the SVM classification with the support vectors
747 if plot_supports:
748 try:
749 if using_weights:
750 supp_ind = np.sort(clf.get_sv_indices())-1
751 supp_vec = X[supp_ind]
752 else:
753 supp_ind = clf.support_
754 supp_vec = clf.support_vectors_
755 fig = plt.figure()
756 ax = fig.gca(projection='3d')
757 fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
758 # plotting the separating hyperplane
759 ax.plot_wireframe(F[0], F[1], F[2], color='orange', alpha=.5)
760 # computing the indeces where no support vectors
761 rr = np.array(range(len(y)))
762 ms = np.isin(rr,supp_ind)
763 nsupp = rr[~ms]
764 # plotting no-support vectors (smaller)
765 ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
766 # plotting support vectors (bigger)
767 ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=20, edgecolors='k', alpha=.2);
768 ax.set_xlim(xx.min(),xx.max())
769 ax.set_ylim(yy.min(),yy.max())
770 ax.set_zlim(zz.min(),zz.max())
771 ax.set_xlabel("Longitude normalized")
772 ax.set_ylabel("Latitude normalized")
773 ax.set_zlabel("Time normalized")
774 plt.savefig('support.png')
775 except Exception as e:
776 print 'Warning: something went wrong when plotting...'
777 print e
779 # Plot the fire arrival time resulting from the SVM classification normalized
780 if plot_result:
781 try:
782 Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
783 with np.errstate(invalid='ignore'):
784 Fz[Fz > X2.max()] = np.nan
785 if notnan:
786 Fz[np.isnan(Fz)] = X2.max()
787 Fz = np.minimum(Fz, X2.max())
788 fig = plt.figure()
789 ax = fig.gca(projection='3d')
790 fig.suptitle("Fire arrival time normalized")
791 # plotting fire arrival time
792 p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
793 linewidth=0, antialiased=False)
794 ax.set_xlim(xx.min(),xx.max())
795 ax.set_ylim(yy.min(),yy.max())
796 ax.set_zlim(zz.min(),zz.max())
797 cbar = fig.colorbar(p)
798 cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
799 ax.set_xlabel("Longitude normalized")
800 ax.set_ylabel("Latitude normalized")
801 ax.set_zlabel("Time normalized")
802 plt.savefig('tign_g.png')
803 except Exception as e:
804 print 'Warning: something went wrong when plotting...'
805 print e
807 # Translate the result again into initial data scale
808 if norm:
809 f0 = F[0] * xlen + xmin
810 f1 = F[1] * ylen + ymin
811 f2 = F[2] * zlen + zmin
812 FF = [f0,f1,f2]
814 # Set all the larger values at the end to be the same maximum value
815 oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
816 FFx, FFy, FFz = FF[0], FF[1], FF[2]
818 with np.errstate(invalid='ignore'):
819 FFz[FFz > oX2.max()] = np.nan
821 if notnan:
822 FFz[np.isnan(FFz)] = oX2.max()
823 FFz = np.minimum(FFz, oX2.max())
824 FF = [FFx,FFy,FFz]
826 # Plot the fire arrival time resulting from the SVM classification
827 if plot_result:
828 try:
829 # Plotting the result
830 fig = plt.figure()
831 ax = fig.gca(projection='3d')
832 fig.suptitle("Plotting the 3D graph function of a SVM")
833 FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
834 # plotting original data
835 ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=2, vmin=y.min(), vmax=y.max())
836 # plotting fire arrival time
837 ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
838 ax.set_xlabel("Longitude")
839 ax.set_ylabel("Latitude")
840 ax.set_zlabel("Time (days)")
841 plt.savefig('result.png')
842 except Exception as e:
843 print 'Warning: something went wrong when plotting...'
844 print e
846 print '>> SUCCESS <<'
847 t_final = time()
848 print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
849 plt.close()
851 return FF
854 if __name__ == "__main__":
855 # Experiment to do
856 exp = 1
857 search = False
859 # Defining ground and fire detections
860 def exp1():
861 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
862 Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
863 C = np.concatenate((10.*np.ones(len(Xg)),100.*np.ones(len(Xf))))
864 kgam = 1.
865 return Xg, Xf, C, kgam
866 def exp2():
867 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
868 [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
869 [4, 1, .5], [2, 0, .5], [2, 2, .5]]
870 Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
871 C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
872 1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
873 kgam = 5.
874 return Xg, Xf, C, kgam
876 # Creating the options
877 options = {1 : exp1, 2 : exp2}
879 # Defining the option depending on the experiment
880 Xg, Xf, C, kgam = options[exp]()
882 # Creating the data necessary to run SVM3 function
883 X = np.concatenate((Xg, Xf))
884 y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
886 # Running SVM classification
887 SVM3(X,y,C=C,kgam=kgam,search=search)