fixing a typo
[JPSSData.git] / svm.py
blob640b4322dc627905cc2ac031d8a4061be82a870c
2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
3 # Angel Farguell (angel.farguell@gmail.com)
5 # to install:
6 # conda install scikit-learn
7 # conda install scikit-image
9 from sklearn import svm
10 from sklearn.model_selection import GridSearchCV
11 from scipy import interpolate, spatial
12 import matplotlib.pyplot as plt
13 import matplotlib.font_manager
14 import matplotlib.colors as colors
15 from mpl_toolkits.mplot3d import axes3d
16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
17 import numpy as np
18 from time import time
19 from infrared_perimeters import process_infrared_perimeters
20 import sys
21 import saveload as sl
23 def verify_inputs(params):
24 required_args = [("search", False), ("norm", True),
25 ("notnan", True), ("artil", False), ("hartil", 0.2),
26 ("artiu", True), ("hartiu", 0.1), ("downarti", True),
27 ("dminz", 0.1), ("confal", 100), ("toparti", False),
28 ("dmaxz", 0.1), ("confau", 100), ("plot_data", False),
29 ("plot_scaled", False), ("plot_decision", False),
30 ("plot_poly", False), ("plot_supports", False),
31 ("plot_result", False)]
32 # check each argument that should exist
33 for key, default in required_args:
34 if key not in params:
35 params.update({key: default})
36 return params
38 def preprocess_data_svm(data, scale, minconf=80.):
39 """
40 Preprocess satellite data from JPSSD to use in Support Vector Machine directly
41 (without any interpolation as space-time 3D points)
43 :param data: dictionary of satellite data from JPSSD
44 :param scale: time scales
45 :param minconf: optional, minim fire confidence level to take into account
46 :return X: matrix of features for SVM
47 :return y: vector of labels for SVM
48 :return c: vector of confidence level for SVM
50 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
51 Angel Farguell (angel.farguell@gmail.com), 2019-09-24
52 """
54 # confidence of ground detections
55 gconf = 95.
57 # scale from seconds to days
58 tscale = 24.*3600.
60 detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
61 detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
62 confs = np.concatenate([d['conf_fire'] for d in data.itervalues()])
63 bb = (detlon[confs > minconf].min(),detlon[confs > minconf].max(),detlat[confs > minconf].min(),detlat[confs > minconf].max())
64 dc = (bb[1]-bb[0],bb[3]-bb[2])
65 bf = (bb[0]-dc[0]*.3,bb[1]+dc[0]*.3,bb[2]-dc[1]*.3,bb[3]+dc[1]*.3)
66 print bf
68 # process all the points as space-time 3D nodes
69 XX = [[],[]]
70 cf = []
71 for gran in data.items():
72 print '> processing granule %s' % gran[0]
73 tt = (gran[1]['time_num']-scale[0])/tscale
74 conf = gran[1]['conf_fire']>=minconf
75 xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
76 print 'fire detections: %g' % len(xf)
77 XX[0].append(xf)
78 mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
79 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
80 np.logical_and(gran[1]['lat_nofire'] >= bf[2],
81 gran[1]['lat_nofire'] <= bf[3])))
82 xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
83 print 'no fire detections: %g' % len(xg)
84 coarsening = 1 #np.int(1+len(xg)/min(100,20*max(len(xf),1)))
85 print 'no fire coarsening: %d' % coarsening
86 print 'no fire detections reduction: %g' % len(xg[::coarsening])
87 XX[1].append(xg[::coarsening])
88 cf.append(gran[1]['conf_fire'][conf])
90 Xf = np.concatenate(tuple(XX[0]))
91 Xg = np.concatenate(tuple(XX[1]))
92 X = np.concatenate((Xg,Xf))
93 y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
94 c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
95 print 'shape X: ', X.shape
96 print 'shape y: ', y.shape
97 print 'shape c: ', c.shape
98 print 'len fire: ', len(X[y==1])
99 print 'len ground: ', len(X[y==-1])
101 return X,y,c
103 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
105 Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
107 :param lons: longitud grid
108 :param lats: latitde grid
109 :param U: upper bound grid
110 :param L: lower bound grid
111 :param T: mask grid
112 :param scale: time scales
113 :param time_num_granules: times of the granules
114 :return X: matrix of features for SVM
115 :return y: vector of labels for SVM
116 :return c: vector of confidence level for SVM
118 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
119 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
122 # Flatten coordinates
123 lon = np.ravel(lons).astype(float)
124 lat = np.ravel(lats).astype(float)
126 # Temporal scale to days
127 tscale = 24*3600
128 U = U/tscale
129 L = L/tscale
131 # Ensuring U>=L always
132 print 'U>L: ',(U>L).sum()
133 print 'U<L: ',(U<L).sum()
134 print 'U==L: ',(U==L).sum()
136 # Reshape to 1D
137 uu = np.ravel(U)
138 ll = np.ravel(L)
139 tt = np.ravel(T)
141 # Maximum and minimums to NaN data
142 uu[uu==uu.max()] = np.nan
143 ll[ll==ll.min()] = np.nan
145 # Mask created during computation of lower and upper bounds
146 mk = tt==scale[1]-scale[0]
147 # Masking upper bounds outside the mask
148 uu[mk] = np.nan
150 # Creating minimum value for the upper bounds
151 muu = uu[~np.isnan(uu)].min()
152 # Creating maximum value for the lower bounds
153 mll = ll[~np.isnan(ll)].max()
155 ### Reduction of the density of lower bounds
156 # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
157 lmk = np.copy(mk)
158 ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
159 # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
160 tlmk = lmk[~mk]
161 # set to True all the bounds less than floor of minimum of upper bounds in fire mask
162 tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
163 # set lower mask from temporary mask
164 lmk[~mk] = tlmk
165 ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
166 # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
167 kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
168 if kk:
169 # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
170 tlmk = ~lmk[~lmk]
171 # only set a subset of the lower mask values to False (coarsening)
172 tlmk[::kk] = False
173 # set lower mask form temporary mask
174 lmk[~lmk] = tlmk
175 ## Reason: We care about the maximum lower bounds which are not below upper bounds
176 # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
177 tlmk = lmk[mk]
178 # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
179 t2lmk = tlmk[~np.isnan(ll[mk])]
180 # set to False in the temporary lower mask 2 where lower bounds have maximum value
181 t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
182 # set temporary lower mask from temporary lower mask 2
183 tlmk[~np.isnan(ll[mk])] = t2lmk
184 # set lower mask from temporary lower mask
185 lmk[mk] = tlmk
186 ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
187 # set subset outside of the fire mask for the rest
188 # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
189 kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
190 if kk:
191 # temporary lower mask, values of the current lower mask outside of the original fire mask
192 tlmk = lmk[mk]
193 # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
194 t2lmk = tlmk[~np.isnan(ll[mk])]
195 # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
196 t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
197 # coarsening of the temporary lower mask 3
198 t3lmk[::kk] = False
199 # set the temporary lower mask 2 from the temporary lower mask 3
200 t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
201 # set the temporary lower mask from the temporary lower mask 2
202 tlmk[~np.isnan(ll[mk])] = t2lmk
203 # set the lower mask from the temporary lower mask
204 lmk[mk] = tlmk
206 # Masking lower bounds from previous lower mask
207 ll[lmk] = np.nan
209 # Values different than NaN in the upper and lower bounds
210 um = np.array(~np.isnan(uu))
211 lm = np.array(~np.isnan(ll))
212 # Define all the x, y, and z components of upper and lower bounds
213 ux = lon[um]
214 uy = lat[um]
215 uz = uu[um]
216 lx = lon[lm]
217 ly = lat[lm]
218 lz = ll[lm]
220 # Create the data to call SVM3 function from svm.py
221 X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
222 y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
223 # Print the shape of the data
224 print 'shape X: ', X.shape
225 print 'shape y: ', y.shape
227 if C is None:
228 c = 80*np.ones(y.shape)
229 else:
230 c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
232 # Clean data if not in bounding box
233 bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
234 geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
235 btime = (0,(scale[1]-scale[0])/tscale)
236 time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
237 whole_mask = np.logical_and(geo_mask, time_mask)
238 X = X[whole_mask,:]
239 y = y[whole_mask]
240 c = c[whole_mask]
242 return X,y,c
244 def make_fire_mesh(fxlon, fxlat, it, nt):
246 Create a mesh of points to evaluate the decision function
248 :param fxlon: data to base x-axis meshgrid on
249 :param fxlat: data to base y-axis meshgrid on
250 :param it: data to base z-axis meshgrid on
251 :param nt: tuple of number of nodes at each direction, optional
252 :param coarse: coarsening of the fire mesh
253 :return xx, yy, zz: ndarray
255 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
256 Angel Farguell (angel.farguell@gmail.com), 2019-04-01
259 xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
260 yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
261 tt = np.linspace(it[0],it[1],nt)
262 zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
264 return xx, yy, zz
266 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
268 Create a mesh of points to evaluate the decision function
270 :param x: data to base x-axis meshgrid on
271 :param y: data to base y-axis meshgrid on
272 :param z: data to base z-axis meshgrid on
273 :param s: tuple of number of nodes at each direction, optional
274 :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
275 :return xx, yy, zz: ndarray
277 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
278 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
279 Modified version of:
280 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
283 if not isinstance(s, tuple):
284 print '>> FAILED <<'
285 print 'The number of nodes at each direction is not a tuple: ', s
286 sys.exit(1)
287 # number of nodes in each direction
288 sx, sy, sz = np.array(s).astype(int)
289 # extra step sizes in each direction
290 brx = int(sx * exp)
291 bry = int(sy * exp)
292 brz = int(sz * exp)
293 # grid lengths in each directon
294 lx = x.max() - x.min()
295 ly = y.max() - y.min()
296 lz = z.max() - z.min()
297 # grid resolutions in each direction
298 hx = lx / (sx - 2*brx - 1)
299 hy = ly / (sy - 2*bry - 1)
300 hz = lz / (sz - 2*brz - 1)
301 # extrem values for each dimension
302 x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
303 y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
304 z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
305 # generating the mesh grid
306 xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
307 np.linspace(x_min, x_max, sx),
308 np.linspace(z_min, z_max, sz))
309 return xx, yy, zz
311 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
313 Compute the surface decision frontier for a classifier.
315 :param clf: a classifier
316 :param xx: meshgrid ndarray
317 :param yy: meshgrid ndarray
318 :param zz: meshgrid ndarray
319 :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
320 :param plot_decision: boolean of plotting decision volume
321 :param plot_poly: boolean of plotting polynomial approximation
322 :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
324 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
325 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
326 Modified version of:
327 https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
330 # Creating the 3D grid
331 XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
333 # Evaluating the decision function
334 print '>> Evaluating the decision function...'
335 sys.stdout.flush()
336 t_1 = time()
337 if using_weights:
338 from libsvm_weights.python.svmutil import svm_predict
339 _, _, p_vals = svm_predict([], XX, clf, '-q')
340 ZZ = np.array([p[0] for p in p_vals])
341 else:
342 ZZ = clf.decision_function(XX)
343 t_2 = time()
344 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
345 hist = np.histogram(ZZ)
346 print 'counts: ', hist[0]
347 print 'values: ', hist[1]
348 print 'decision function range: ', ZZ.min(), '~', ZZ.max()
350 # Reshaping decision function volume
351 Z = ZZ.reshape(xx.shape)
352 print 'decision function shape: ', Z.shape
353 sl.save((xx,yy,zz,Z),'decision')
355 if plot_decision:
356 try:
357 from skimage import measure
358 from shiftcmap import shiftedColorMap
359 verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
360 # Scale and transform to actual size of the interesting volume
361 h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
362 verts = verts * h
363 verts = verts + [xx.min(), yy.min(), zz.min()]
364 mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
365 fig = plt.figure()
366 ax = fig.gca(projection='3d')
367 fig.suptitle("Decision volume")
368 col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
369 cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=50)
370 midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
371 shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
372 kk = 1+np.divide(xx.shape,50)
373 X = np.ravel(xx[::kk[0],::kk[1],::kk[2]])
374 Y = np.ravel(yy[::kk[0],::kk[1],::kk[2]])
375 T = np.ravel(zz[::kk[0],::kk[1],::kk[2]])
376 CC = np.ravel(Z[::kk[0],::kk[1],::kk[2]])
377 p = ax.scatter(X, Y, T, c=CC, s=.1, alpha=.5, cmap=shiftedcmap)
378 cbar = fig.colorbar(p)
379 cbar.set_label('decision function value', rotation=270, labelpad=20)
380 ax.add_collection3d(mesh)
381 ax.set_zlim([xx.min(),xx.max()])
382 ax.set_ylim([yy.min(),yy.max()])
383 ax.set_zlim([zz.min(),zz.max()])
384 ax.set_xlabel("Longitude normalized")
385 ax.set_ylabel("Latitude normalized")
386 ax.set_zlabel("Time normalized")
387 plt.savefig('decision.png')
388 except Exception as e:
389 print 'Warning: something went wrong when plotting...'
390 print e
392 if plot_poly:
393 fig = plt.figure()
394 # Computing fire arrival time from previous decision function
395 print '>> Computing fire arrival time...'
396 sys.stdout.flush()
397 t_1 = time()
398 # xx 2-dimensional array
399 Fx = xx[:, :, 0]
400 # yy 2-dimensional array
401 Fy = yy[:, :, 0]
402 # zz 1-dimensional array
403 zr = zz[0, 0]
404 # Initializing fire arrival time
405 Fz = np.zeros(Fx.shape)
406 # For each x and y
407 for k1 in range(Fx.shape[0]):
408 for k2 in range(Fx.shape[1]):
409 # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
410 pz = interpolate.CubicSpline(zr, Z[k1,k2])
411 # Compute the real roots of the the piecewise polynomial
412 rr = pz.roots()
413 # Just take the real roots between min(zz) and max(zz)
414 realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
415 if len(realr) > 0:
416 # Take the minimum root
417 Fz[k1,k2] = realr.min()
418 # Plotting the approximated polynomial with the decision function
419 if plot_poly:
420 try:
421 plt.ion()
422 plt.plot(pz(zr),zr)
423 plt.plot(Z[k1,k2],zr,'+')
424 plt.plot(np.zeros(len(realr)),realr,'o',c='g')
425 plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
426 plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
427 plt.xlabel('decision function value')
428 plt.ylabel('Z')
429 plt.legend(['polynomial','decision values','roots','fire arrival time'])
430 plt.xlim([Z.min(),Z.max()])
431 plt.ylim([zz.min(),zz.max()])
432 plt.show()
433 plt.pause(.001)
434 plt.cla()
435 except Exception as e:
436 print 'Warning: something went wrong when plotting...'
437 print e
438 else:
439 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
440 Fz[k1,k2] = np.nan
441 t_2 = time()
442 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
443 F = [Fx,Fy,Fz]
445 return F
447 def SVM3(X, y, C=1., kgam=1., fire_grid=None, it=None, **params):
449 3D SuperVector Machine analysis and plot
451 :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
452 :param y: Target values
453 :param C: Penalization (argument of svm.SVC class), optional
454 :param kgam: Scalar multiplier for gamma (capture more details increasing it), optional
455 :param fire_grid: The longitud and latitude grid where to have the fire arrival time, optional
456 :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
458 Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
459 Angel Farguell (angel.farguell@gmail.com), 2019-02-20
460 Modified version of:
461 https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
463 # add default values for parameters not specified
464 params = verify_inputs(params)
465 print 'svm params: ',params
467 t_init = time()
469 col = [(0, .5, 0), (.5, 0, 0)]
470 cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
471 col = [(1, 0, 0), (.25, 0, 0)]
472 cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
474 # if fire_grid==None: creation of the grid options
475 # number of vertical nodes per observation
476 vN = 1
477 # number of horizontal nodes per observation
478 hN = 5
480 # using different weights for the data
481 if isinstance(C,(list,tuple,np.ndarray)):
482 using_weights = True
483 from libsvm_weights.python.svm import svm_problem, svm_parameter
484 from libsvm_weights.python.svmutil import svm_train
485 from sklearn.utils import compute_class_weight
486 else:
487 using_weights = False
489 # Data inputs
490 X = np.array(X).astype(float)
491 y = np.array(y)
493 # Original data
494 oX = np.array(X).astype(float)
495 oy = np.array(y)
497 # Visualization of the data
498 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
499 if params['plot_data']:
500 try:
501 fig = plt.figure()
502 ax = fig.gca(projection='3d')
503 fig.suptitle("Plotting the original data to fit")
504 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
505 ax.set_xlabel("Longitude")
506 ax.set_ylabel("Latitude")
507 ax.set_zlabel("Time (days)")
508 plt.savefig('original_data.png')
509 except Exception as e:
510 print 'Warning: something went wrong when plotting...'
511 print e
513 # Normalization of the data into [0,1]^3
514 if params['norm']:
515 xmin = X0.min()
516 xlen = X0.max() - X0.min()
517 x0 = np.divide(X0 - xmin, xlen)
518 ymin = X1.min()
519 ylen = X1.max() - X1.min()
520 x1 = np.divide(X1 - ymin, ylen)
521 zmin = X2.min()
522 zlen = X2.max() - X2.min()
523 x2 = np.divide(X2 - zmin, zlen)
524 X0, X1, X2 = x0, x1, x2
525 X[:, 0] = X0
526 X[:, 1] = X1
527 X[:, 2] = X2
529 # Creation of fire and ground artificial detections
530 if params['artil'] or params['artiu'] or params['toparti'] or params['downarti']:
531 # Extreme values at z direction
532 minz = X[:, 2].min()
533 maxz = X[:, 2].max()
534 # Division of lower and upper bounds for data and confidence level
535 fl = X[y==np.unique(y)[0]]
536 fu = X[y==np.unique(y)[1]]
538 # Artifitial extensions of the lower bounds
539 if params['artil']:
540 # Create artificial lower bounds
541 flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-params['hartil']),f[2])) for f in fl ])
542 # Definition of new ground detections after artificial detections added
543 Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
544 if using_weights:
545 cl = C[y==np.unique(y)[0]]
546 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
547 else:
548 Xg = fl
549 if using_weights:
550 cl = C[y==np.unique(y)[0]]
551 Cg = cl
553 # Artifitial extensions of the upper bounds
554 if params['artiu']:
555 # Create artificial upper bounds
556 fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,params['hartiu']),f[2])) for f in fu ])
557 # Definition of new fire detections after artificial detections added
558 Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
559 # Define new confidence levels
560 if using_weights:
561 cu = C[y==np.unique(y)[1]]
562 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
563 else:
564 Xf = fu
565 if using_weights:
566 cu = C[y==np.unique(y)[1]]
567 Cf = cu
569 # Bottom artificial lower bounds
570 if params['downarti']:
571 # Creation of the x,y new mesh of artificial lower bounds
572 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
573 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
574 # All the artificial new mesh are going to be below the data
575 zng = np.repeat(minz-params['dminz'],len(np.ravel(xn)))
576 # Artifitial lower bounds
577 Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
578 # Definition of new ground detections after down artificial lower detections
579 Xgn = np.concatenate((Xg,Xga))
580 # Definition of new confidence level
581 if using_weights:
582 Cga = np.ones(len(Xga))*params['confal']
583 Cgn = np.concatenate((Cg,Cga))
584 else:
585 Xgn = Xg
586 if using_weights:
587 Cgn = Cg
589 # Top artificial upper bounds
590 if params['toparti']:
591 # Creation of the x,y new mesh of artificial upper bounds
592 xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
593 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
594 # All the artificial new mesh are going to be over the data
595 znf = np.repeat(maxz+params['dmaxz'],len(np.ravel(xn)))
596 # Artifitial upper bounds
597 Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
598 # Definition of new fire detections after top artificial upper detections
599 Xfn = np.concatenate((Xf,Xfa))
600 # Definition of new confidence level
601 if using_weights:
602 Cfa = np.ones(len(Xfa))*params['confau']
603 Cfn = np.concatenate((Cf,Cfa))
604 else:
605 Xfn = Xf
606 if using_weights:
607 Cfn = Cf
609 # New definition of the training vectors
610 X = np.concatenate((Xgn, Xfn))
611 # New definition of the target values
612 y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
613 # New definition of the confidence level
614 if using_weights:
615 C = np.concatenate((Cgn, Cfn))
616 # New definition of each feature vector
617 X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
619 # Printing number of samples and features
620 n0 = (y==np.unique(y)[0]).sum().astype(float)
621 n1 = (y==np.unique(y)[1]).sum().astype(float)
622 n_samples, n_features = X.shape
623 print 'n_samples =', n_samples
624 print 'n_samples_{-1} =', int(n0)
625 print 'n_samples_{+1} =', int(n1)
626 print 'n_features =', n_features
628 # Visualization of scaled data
629 if params['plot_scaled']:
630 try:
631 fig = plt.figure()
632 ax = fig.gca(projection='3d')
633 fig.suptitle("Plotting the data scaled to fit")
634 ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
635 ax.set_xlabel("Longitude normalized")
636 ax.set_ylabel("Latitude normalized")
637 ax.set_zlabel("Time normalized")
638 plt.savefig('scaled_data.png')
639 except Exception as e:
640 print 'Warning: something went wrong when plotting...'
641 print e
643 # Reescaling gamma to include more detailed results
644 gamma = 1. / (n_features * X.std())
645 print 'gamma =', gamma
647 # Creating the SVM model and fitting the data using Super Vector Machine technique
648 print '>> Creating the SVM model...'
649 sys.stdout.flush()
650 if using_weights:
651 gamma = kgam*gamma
652 # Compute class balanced weights
653 cls, _ = np.unique(y, return_inverse=True)
654 class_weight = compute_class_weight("balanced", cls, y)
655 prob = svm_problem(C,y,X)
656 arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
657 cls[1], class_weight[1])
658 param = svm_parameter(arg)
659 print '>> Fitting the SVM model...'
660 t_1 = time()
661 clf = svm_train(prob,param)
662 t_2 = time()
663 else:
664 t_1 = time()
665 if params['search']:
666 print '>> Searching for best value of C and gamma...'
667 # Grid Search
668 # Parameter Grid
669 param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
670 # Make grid search classifier
671 grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
672 print '>> Fitting the SVM model...'
673 # Train the classifier
674 grid_search.fit(X, y)
675 print "Best Parameters:\n", grid_search.best_params_
676 clf = grid_search.best_estimator_
677 print "Best Estimators:\n", clf
678 else:
679 gamma = kgam*gamma
680 clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
681 print clf
682 print '>> Fitting the SVM model...'
683 # Fitting the data using Super Vector Machine technique
684 clf.fit(X, y)
685 t_2 = time()
686 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
688 if not using_weights:
689 # Check if the classification failed
690 if clf.fit_status_:
691 print '>> FAILED <<'
692 print 'Failed fitting the data'
693 sys.exit(1)
694 print 'number of support vectors: ', clf.n_support_
695 print 'score of trained data: ', clf.score(X,y)
697 # Creating the mesh grid to evaluate the classification
698 print '>> Creating mesh grid to evaluate the classification...'
699 nnodes = np.ceil(np.power(n_samples,1./n_features))
700 if fire_grid is None:
701 # Number of necessary nodes
702 hnodes = hN*nnodes
703 vnodes = vN*nnodes
704 print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
705 print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
706 # Computing resolution of the mesh to evaluate
707 sdim = (hnodes,hnodes,vnodes)
708 print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
709 t_1 = time()
710 xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
711 t_2 = time()
712 else:
713 fxlon = np.divide(fire_grid[0] - xmin, xlen)
714 fxlat = np.divide(fire_grid[1] - ymin, ylen)
715 if it is None:
716 it = (X2.min(),X2.max())
717 else:
718 it = np.divide(np.array(it) - zmin, zlen)
719 vnodes = vN*nnodes
720 sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
721 print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
722 t_1 = time()
723 xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
724 t_2 = time()
725 print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
726 print 'elapsed time: %ss.' % str(abs(t_2-t_1))
728 # Computing the 2D fire arrival time, F
729 print '>> Computing the 2D fire arrival time, F...'
730 sys.stdout.flush()
731 F = frontier(clf, xx, yy, zz, plot_decision=params['plot_decision'], plot_poly=params['plot_poly'], using_weights=using_weights)
733 print '>> Creating final results...'
734 sys.stdout.flush()
735 # Plotting the Separating Hyperplane of the SVM classification with the support vectors
736 if params['plot_supports']:
737 try:
738 if using_weights:
739 supp_ind = np.sort(clf.get_sv_indices())-1
740 supp_vec = X[supp_ind]
741 else:
742 supp_ind = clf.support_
743 supp_vec = clf.support_vectors_
744 fig = plt.figure()
745 ax = fig.gca(projection='3d')
746 fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
747 # plotting the separating hyperplane
748 ax.plot_surface(F[0], F[1], F[2], color='orange', alpha=.3)
749 # computing the indeces where no support vectors
750 rr = np.array(range(len(y)))
751 ms = np.isin(rr,supp_ind)
752 nsupp = rr[~ms]
753 # plotting no-support vectors (smaller)
754 ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
755 # plotting support vectors (bigger)
756 ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=10, edgecolors='k', linewidth=.5, alpha=.5);
757 ax.set_xlim(xx.min(),xx.max())
758 ax.set_ylim(yy.min(),yy.max())
759 ax.set_zlim(zz.min(),zz.max())
760 ax.set_xlabel("Longitude normalized")
761 ax.set_ylabel("Latitude normalized")
762 ax.set_zlabel("Time normalized")
763 plt.savefig('support.png')
764 except Exception as e:
765 print 'Warning: something went wrong when plotting...'
766 print e
768 # Plot the fire arrival time resulting from the SVM classification normalized
769 if params['plot_result']:
770 try:
771 Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
772 with np.errstate(invalid='ignore'):
773 Fz[Fz > X2.max()] = np.nan
774 if params['notnan']:
775 Fz[np.isnan(Fz)] = X2.max()
776 Fz = np.minimum(Fz, X2.max())
777 fig = plt.figure()
778 ax = fig.gca(projection='3d')
779 fig.suptitle("Fire arrival time normalized")
780 # plotting fire arrival time
781 p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
782 linewidth=0, antialiased=False)
783 ax.set_xlim(xx.min(),xx.max())
784 ax.set_ylim(yy.min(),yy.max())
785 ax.set_zlim(zz.min(),zz.max())
786 cbar = fig.colorbar(p)
787 cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
788 ax.set_xlabel("Longitude normalized")
789 ax.set_ylabel("Latitude normalized")
790 ax.set_zlabel("Time normalized")
791 plt.savefig('tign_g.png')
792 except Exception as e:
793 print 'Warning: something went wrong when plotting...'
794 print e
796 # Translate the result again into initial data scale
797 if params['norm']:
798 f0 = F[0] * xlen + xmin
799 f1 = F[1] * ylen + ymin
800 f2 = F[2] * zlen + zmin
801 FF = [f0,f1,f2]
803 # Set all the larger values at the end to be the same maximum value
804 oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
805 FFx, FFy, FFz = FF[0], FF[1], FF[2]
807 if params['notnan']:
808 with np.errstate(invalid='ignore'):
809 FFz[np.isnan(FFz)] = np.nanmax(FFz)
810 FF = [FFx,FFy,FFz]
812 # Plot the fire arrival time resulting from the SVM classification
813 if params['plot_result']:
814 try:
815 # Plotting the result
816 fig = plt.figure()
817 ax = fig.gca(projection='3d')
818 fig.suptitle("Plotting the 3D graph function of a SVM")
819 FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
820 # plotting original data
821 ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
822 # plotting fire arrival time
823 ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
824 ax.set_xlabel("Longitude")
825 ax.set_ylabel("Latitude")
826 ax.set_zlabel("Time (days)")
827 plt.savefig('result.png')
828 except Exception as e:
829 print 'Warning: something went wrong when plotting...'
830 print e
832 print '>> SUCCESS <<'
833 t_final = time()
834 print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
835 plt.close()
837 return FF
840 if __name__ == "__main__":
841 # Experiment to do
842 exp = 1
843 search = True
845 # Defining ground and fire detections
846 def exp1():
847 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
848 Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
849 C = 10.
850 kgam = 1.
851 return Xg, Xf, C, kgam
852 def exp2():
853 Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
854 [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
855 [4, 1, .5], [2, 0, .5], [2, 2, .5]]
856 Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
857 C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
858 1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
859 kgam = 5.
860 return Xg, Xf, C, kgam
862 # Creating the options
863 options = {1 : exp1, 2 : exp2}
865 # Defining the option depending on the experiment
866 Xg, Xf, C, kgam = options[exp]()
868 # Creating the data necessary to run SVM3 function
869 X = np.concatenate((Xg, Xf))
870 y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
872 # Running SVM classification
873 SVM3(X,y,C=C,kgam=kgam,search=search,plot_result=True)