svm.py

   1 #
   2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
   3 # Angel Farguell (angel.farguell@gmail.com)
   4 #
   5 # to install:
   6 #       conda install scikit-learn
   7 #       conda install scikit-image
   8
   9 from sklearn import svm
  10 from sklearn.model_selection import GridSearchCV
  11 from scipy import interpolate, spatial
  12 import matplotlib.pyplot as plt
  13 import matplotlib.font_manager
  14 import matplotlib.colors as colors
  15 from mpl_toolkits.mplot3d import axes3d
  16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
  17 import numpy as np
  18 from time import time
  19 from infrared_perimeters import process_infrared_perimeters
  20 import sys
  21 import saveload as sl
  22
  23 def verify_inputs(params):
  24     required_args = [("search", False), ("norm", True),
  25         ("notnan", True), ("artil", False), ("hartil", 0.2),
  26         ("artiu", True), ("hartiu", 0.1), ("downarti", True),
  27         ("dminz", 0.1), ("confal", 100), ("toparti", False),
  28         ("dmaxz", 0.1), ("confau", 100), ("plot_data", False),
  29         ("plot_scaled", False), ("plot_decision", False),
  30         ("plot_poly", False), ("plot_supports", False),
  31         ("plot_result", False)]
  32     # check each argument that should exist
  33     for key, default in required_args:
  34         if key not in params:
  35             params.update({key: default})
  36     return params
  37
  38 def preprocess_data_svm(data, scale, minconf=80.):
  39     """
  40     Preprocess satellite data from JPSSD to use in Support Vector Machine directly
  41     (without any interpolation as space-time 3D points)
  42
  43     :param data: dictionary of satellite data from JPSSD
  44     :param scale: time scales
  45     :param minconf: optional, minim fire confidence level to take into account
  46     :return X: matrix of features for SVM
  47     :return y: vector of labels for SVM
  48     :return c: vector of confidence level for SVM
  49
  50     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
  51     Angel Farguell (angel.farguell@gmail.com), 2019-09-24
  52     """
  53
  54     # confidence of ground detections
  55     gconf = 95.
  56
  57     # scale from seconds to days
  58     tscale = 24.*3600.
  59
  60     detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
  61     detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
  62     confs = np.concatenate([d['conf_fire'] for d in data.itervalues()])
  63     bb = (detlon[confs > minconf].min(),detlon[confs > minconf].max(),detlat[confs > minconf].min(),detlat[confs > minconf].max())
  64     dc = (bb[1]-bb[0],bb[3]-bb[2])
  65     bf = (bb[0]-dc[0]*.3,bb[1]+dc[0]*.3,bb[2]-dc[1]*.3,bb[3]+dc[1]*.3)
  66     print bf
  67
  68     # process all the points as space-time 3D nodes
  69     XX = [[],[]]
  70     cf = []
  71     for gran in data.items():
  72         print '> processing granule %s' % gran[0]
  73         tt = (gran[1]['time_num']-scale[0])/tscale
  74         conf = gran[1]['conf_fire']>=minconf
  75         xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
  76         print 'fire detections: %g' % len(xf)
  77         XX[0].append(xf)
  78         mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
  79                 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
  80                  np.logical_and(gran[1]['lat_nofire'] >= bf[2],
  81                                  gran[1]['lat_nofire'] <= bf[3])))
  82         xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
  83         print 'no fire detections: %g' % len(xg)
  84         coarsening = 1 #np.int(1+len(xg)/min(100,20*max(len(xf),1)))
  85         print 'no fire coarsening: %d' % coarsening
  86         print 'no fire detections reduction: %g' % len(xg[::coarsening])
  87         XX[1].append(xg[::coarsening])
  88         cf.append(gran[1]['conf_fire'][conf])
  89
  90     Xf = np.concatenate(tuple(XX[0]))
  91     Xg = np.concatenate(tuple(XX[1]))
  92     X = np.concatenate((Xg,Xf))
  93     y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
  94     c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
  95     print 'shape X: ', X.shape
  96     print 'shape y: ', y.shape
  97     print 'shape c: ', c.shape
  98     print 'len fire: ', len(X[y==1])
  99     print 'len ground: ', len(X[y==-1])
 100
 101     return X,y,c
 102
 103 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
 104     """
 105     Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
 106
 107     :param lons: longitud grid
 108     :param lats: latitde grid
 109     :param U: upper bound grid
 110     :param L: lower bound grid
 111     :param T: mask grid
 112     :param scale: time scales
 113     :param time_num_granules: times of the granules
 114     :return X: matrix of features for SVM
 115     :return y: vector of labels for SVM
 116     :return c: vector of confidence level for SVM
 117
 118     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 119     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 120     """
 121
 122     # Flatten coordinates
 123     lon = np.ravel(lons).astype(float)
 124     lat = np.ravel(lats).astype(float)
 125
 126     # Temporal scale to days
 127     tscale = 24*3600
 128     U = U/tscale
 129     L = L/tscale
 130
 131     # Ensuring U>=L always
 132     print 'U>L: ',(U>L).sum()
 133     print 'U<L: ',(U<L).sum()
 134     print 'U==L: ',(U==L).sum()
 135
 136     # Reshape to 1D
 137     uu = np.ravel(U)
 138     ll = np.ravel(L)
 139     tt = np.ravel(T)
 140
 141     # Maximum and minimums to NaN data
 142     uu[uu==uu.max()] = np.nan
 143     ll[ll==ll.min()] = np.nan
 144
 145     # Mask created during computation of lower and upper bounds
 146     mk = tt==scale[1]-scale[0]
 147     # Masking upper bounds outside the mask
 148     uu[mk] = np.nan
 149
 150     # Creating minimum value for the upper bounds
 151     muu = uu[~np.isnan(uu)].min()
 152     # Creating maximum value for the lower bounds
 153     mll = ll[~np.isnan(ll)].max()
 154
 155     ### Reduction of the density of lower bounds
 156     # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
 157     lmk = np.copy(mk)
 158     ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
 159     # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
 160     tlmk = lmk[~mk]
 161     # set to True all the bounds less than floor of minimum of upper bounds in fire mask
 162     tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
 163     # set lower mask from temporary mask
 164     lmk[~mk] = tlmk
 165     ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
 166     # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
 167     kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
 168     if kk:
 169         # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
 170         tlmk = ~lmk[~lmk]
 171         # only set a subset of the lower mask values to False (coarsening)
 172         tlmk[::kk] = False
 173         # set lower mask form temporary mask
 174         lmk[~lmk] = tlmk
 175     ## Reason: We care about the maximum lower bounds which are not below upper bounds
 176     # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
 177     tlmk = lmk[mk]
 178     # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
 179     t2lmk = tlmk[~np.isnan(ll[mk])]
 180     # set to False in the temporary lower mask 2 where lower bounds have maximum value
 181     t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
 182     # set temporary lower mask from temporary lower mask 2
 183     tlmk[~np.isnan(ll[mk])] = t2lmk
 184     # set lower mask from temporary lower mask
 185     lmk[mk] = tlmk
 186     ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
 187     # set subset outside of the fire mask for the rest
 188     # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
 189     kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
 190     if kk:
 191         # temporary lower mask, values of the current lower mask outside of the original fire mask
 192         tlmk = lmk[mk]
 193         # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
 194         t2lmk = tlmk[~np.isnan(ll[mk])]
 195         # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
 196         t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
 197         # coarsening of the temporary lower mask 3
 198         t3lmk[::kk] = False
 199         # set the temporary lower mask 2 from the temporary lower mask 3
 200         t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
 201         # set the temporary lower mask from the temporary lower mask 2
 202         tlmk[~np.isnan(ll[mk])] = t2lmk
 203         # set the lower mask from the temporary lower mask
 204         lmk[mk] = tlmk
 205
 206     # Masking lower bounds from previous lower mask
 207     ll[lmk] = np.nan
 208
 209     # Values different than NaN in the upper and lower bounds
 210     um = np.array(~np.isnan(uu))
 211     lm = np.array(~np.isnan(ll))
 212     # Define all the x, y, and z components of upper and lower bounds
 213     ux = lon[um]
 214     uy = lat[um]
 215     uz = uu[um]
 216     lx = lon[lm]
 217     ly = lat[lm]
 218     lz = ll[lm]
 219
 220     # Create the data to call SVM3 function from svm.py
 221     X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
 222     y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
 223     # Print the shape of the data
 224     print 'shape X: ', X.shape
 225     print 'shape y: ', y.shape
 226
 227     if C is None:
 228         c = 80*np.ones(y.shape)
 229     else:
 230         c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
 231
 232     # Clean data if not in bounding box
 233     bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
 234     geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
 235     btime = (0,(scale[1]-scale[0])/tscale)
 236     time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
 237     whole_mask = np.logical_and(geo_mask, time_mask)
 238     X = X[whole_mask,:]
 239     y = y[whole_mask]
 240     c = c[whole_mask]
 241
 242     return X,y,c
 243
 244 def make_fire_mesh(fxlon, fxlat, it, nt):
 245     """
 246     Create a mesh of points to evaluate the decision function
 247
 248     :param fxlon: data to base x-axis meshgrid on
 249     :param fxlat: data to base y-axis meshgrid on
 250     :param it: data to base z-axis meshgrid on
 251     :param nt: tuple of number of nodes at each direction, optional
 252     :param coarse: coarsening of the fire mesh
 253     :return xx, yy, zz: ndarray
 254
 255     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 256     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 257     """
 258
 259     xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
 260     yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
 261     tt = np.linspace(it[0],it[1],nt)
 262     zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
 263
 264     return xx, yy, zz
 265
 266 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
 267     """
 268     Create a mesh of points to evaluate the decision function
 269
 270     :param x: data to base x-axis meshgrid on
 271     :param y: data to base y-axis meshgrid on
 272     :param z: data to base z-axis meshgrid on
 273     :param s: tuple of number of nodes at each direction, optional
 274     :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
 275     :return xx, yy, zz: ndarray
 276
 277     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 278     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 279     Modified version of:
 280     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 281     """
 282
 283     if not isinstance(s, tuple):
 284         print '>> FAILED <<'
 285         print 'The number of nodes at each direction is not a tuple: ', s
 286         sys.exit(1)
 287     # number of nodes in each direction
 288     sx, sy, sz = np.array(s).astype(int)
 289     # extra step sizes in each direction
 290     brx = int(sx * exp)
 291     bry = int(sy * exp)
 292     brz = int(sz * exp)
 293     # grid lengths in each directon
 294     lx = x.max() - x.min()
 295     ly = y.max() - y.min()
 296     lz = z.max() - z.min()
 297     # grid resolutions in each direction
 298     hx = lx / (sx - 2*brx - 1)
 299     hy = ly / (sy - 2*bry - 1)
 300     hz = lz / (sz - 2*brz - 1)
 301     # extrem values for each dimension
 302     x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
 303     y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
 304     z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
 305     # generating the mesh grid
 306     xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
 307                              np.linspace(x_min, x_max, sx),
 308                              np.linspace(z_min, z_max, sz))
 309     return xx, yy, zz
 310
 311 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
 312     """
 313     Compute the surface decision frontier for a classifier.
 314
 315     :param clf: a classifier
 316     :param xx: meshgrid ndarray
 317     :param yy: meshgrid ndarray
 318     :param zz: meshgrid ndarray
 319     :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
 320     :param plot_decision: boolean of plotting decision volume
 321     :param plot_poly: boolean of plotting polynomial approximation
 322     :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
 323
 324     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 325     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 326     Modified version of:
 327     https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
 328     """
 329
 330     # Creating the 3D grid
 331     XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
 332
 333     # Evaluating the decision function
 334     print '>> Evaluating the decision function...'
 335     sys.stdout.flush()
 336     t_1 = time()
 337     if using_weights:
 338         from libsvm_weights.python.svmutil import svm_predict
 339         _, _, p_vals = svm_predict([], XX, clf, '-q')
 340         ZZ = np.array([p[0] for p in p_vals])
 341     else:
 342         ZZ = clf.decision_function(XX)
 343     t_2 = time()
 344     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 345     hist = np.histogram(ZZ)
 346     print 'counts: ', hist[0]
 347     print 'values: ', hist[1]
 348     print 'decision function range: ', ZZ.min(), '~', ZZ.max()
 349
 350     # Reshaping decision function volume
 351     Z = ZZ.reshape(xx.shape)
 352     print 'decision function shape: ', Z.shape
 353     sl.save((xx,yy,zz,Z),'decision')
 354
 355     if plot_decision:
 356         try:
 357             from skimage import measure
 358             from shiftcmap import shiftedColorMap
 359             verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
 360             # Scale and transform to actual size of the interesting volume
 361             h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
 362             verts = verts * h
 363             verts = verts + [xx.min(), yy.min(), zz.min()]
 364             mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
 365             fig = plt.figure()
 366             ax = fig.gca(projection='3d')
 367             fig.suptitle("Decision volume")
 368             col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
 369             cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=50)
 370             midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
 371             shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
 372             kk = 1+np.divide(xx.shape,50)
 373             X = np.ravel(xx[::kk[0],::kk[1],::kk[2]])
 374             Y = np.ravel(yy[::kk[0],::kk[1],::kk[2]])
 375             T = np.ravel(zz[::kk[0],::kk[1],::kk[2]])
 376             CC = np.ravel(Z[::kk[0],::kk[1],::kk[2]])
 377             p = ax.scatter(X, Y, T, c=CC, s=.1, alpha=.5, cmap=shiftedcmap)
 378             cbar = fig.colorbar(p)
 379             cbar.set_label('decision function value', rotation=270, labelpad=20)
 380             ax.add_collection3d(mesh)
 381             ax.set_zlim([xx.min(),xx.max()])
 382             ax.set_ylim([yy.min(),yy.max()])
 383             ax.set_zlim([zz.min(),zz.max()])
 384             ax.set_xlabel("Longitude normalized")
 385             ax.set_ylabel("Latitude normalized")
 386             ax.set_zlabel("Time normalized")
 387             plt.savefig('decision.png')
 388         except Exception as e:
 389             print 'Warning: something went wrong when plotting...'
 390             print e
 391
 392     if plot_poly:
 393         fig = plt.figure()
 394     # Computing fire arrival time from previous decision function
 395     print '>> Computing fire arrival time...'
 396     sys.stdout.flush()
 397     t_1 = time()
 398     # xx 2-dimensional array
 399     Fx = xx[:, :, 0]
 400     # yy 2-dimensional array
 401     Fy = yy[:, :, 0]
 402     # zz 1-dimensional array
 403     zr = zz[0, 0]
 404     # Initializing fire arrival time
 405     Fz = np.zeros(Fx.shape)
 406     # For each x and y
 407     for k1 in range(Fx.shape[0]):
 408         for k2 in range(Fx.shape[1]):
 409             # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
 410             pz = interpolate.CubicSpline(zr, Z[k1,k2])
 411             # Compute the real roots of the the piecewise polynomial
 412             rr = pz.roots()
 413             # Just take the real roots between min(zz) and max(zz)
 414             realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
 415             if len(realr) > 0:
 416                 # Take the minimum root
 417                 Fz[k1,k2] = realr.min()
 418                 # Plotting the approximated polynomial with the decision function
 419                 if plot_poly:
 420                     try:
 421                         plt.ion()
 422                         plt.plot(pz(zr),zr)
 423                         plt.plot(Z[k1,k2],zr,'+')
 424                         plt.plot(np.zeros(len(realr)),realr,'o',c='g')
 425                         plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
 426                         plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
 427                         plt.xlabel('decision function value')
 428                         plt.ylabel('Z')
 429                         plt.legend(['polynomial','decision values','roots','fire arrival time'])
 430                         plt.xlim([Z.min(),Z.max()])
 431                         plt.ylim([zz.min(),zz.max()])
 432                         plt.show()
 433                         plt.pause(.001)
 434                         plt.cla()
 435                     except Exception as e:
 436                         print 'Warning: something went wrong when plotting...'
 437                         print e
 438             else:
 439                 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
 440                 Fz[k1,k2] = np.nan
 441     t_2 = time()
 442     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 443     F = [Fx,Fy,Fz]
 444
 445     return F
 446
 447 def SVM3(X, y, C=1., kgam=1., fire_grid=None, it=None, **params):
 448     """
 449     3D SuperVector Machine analysis and plot
 450
 451     :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
 452     :param y: Target values
 453     :param C: Penalization (argument of svm.SVC class), optional
 454     :param kgam: Scalar multiplier for gamma (capture more details increasing it), optional
 455     :param fire_grid: The longitud and latitude grid where to have the fire arrival time, optional
 456     :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
 457
 458     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 459     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 460     Modified version of:
 461     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 462     """
 463     # add default values for parameters not specified
 464     params = verify_inputs(params)
 465     print 'svm params: ',params
 466
 467     t_init = time()
 468
 469     col = [(0, .5, 0), (.5, 0, 0)]
 470     cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
 471     col = [(1, 0, 0), (.25, 0, 0)]
 472     cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
 473
 474     # if fire_grid==None: creation of the grid options
 475     # number of vertical nodes per observation
 476     vN = 1
 477     # number of horizontal nodes per observation
 478     hN = 5
 479
 480     # using different weights for the data
 481     if isinstance(C,(list,tuple,np.ndarray)):
 482         using_weights = True
 483         from libsvm_weights.python.svm import svm_problem, svm_parameter
 484         from libsvm_weights.python.svmutil import svm_train
 485         from sklearn.utils import compute_class_weight
 486     else:
 487         using_weights = False
 488
 489     # Data inputs
 490     X = np.array(X).astype(float)
 491     y = np.array(y)
 492
 493     # Original data
 494     oX = np.array(X).astype(float)
 495     oy = np.array(y)
 496
 497     # Visualization of the data
 498     X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 499     if params['plot_data']:
 500         try:
 501             fig = plt.figure()
 502             ax = fig.gca(projection='3d')
 503             fig.suptitle("Plotting the original data to fit")
 504             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 505             ax.set_xlabel("Longitude")
 506             ax.set_ylabel("Latitude")
 507             ax.set_zlabel("Time (days)")
 508             plt.savefig('original_data.png')
 509         except Exception as e:
 510             print 'Warning: something went wrong when plotting...'
 511             print e
 512
 513     # Normalization of the data into [0,1]^3
 514     if params['norm']:
 515         xmin = X0.min()
 516         xlen = X0.max() - X0.min()
 517         x0 = np.divide(X0 - xmin, xlen)
 518         ymin = X1.min()
 519         ylen = X1.max() - X1.min()
 520         x1 = np.divide(X1 - ymin, ylen)
 521         zmin = X2.min()
 522         zlen = X2.max() - X2.min()
 523         x2 = np.divide(X2 - zmin, zlen)
 524         X0, X1, X2 = x0, x1, x2
 525         X[:, 0] = X0
 526         X[:, 1] = X1
 527         X[:, 2] = X2
 528
 529     # Creation of fire and ground artificial detections
 530     if params['artil'] or params['artiu'] or params['toparti'] or params['downarti']:
 531         # Extreme values at z direction
 532         minz = X[:, 2].min()
 533         maxz = X[:, 2].max()
 534         # Division of lower and upper bounds for data and confidence level
 535         fl = X[y==np.unique(y)[0]]
 536         fu = X[y==np.unique(y)[1]]
 537
 538         # Artifitial extensions of the lower bounds
 539         if params['artil']:
 540             # Create artificial lower bounds
 541             flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-params['hartil']),f[2])) for f in fl ])
 542             # Definition of new ground detections after artificial detections added
 543             Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
 544             if using_weights:
 545                 cl = C[y==np.unique(y)[0]]
 546                 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
 547         else:
 548             Xg = fl
 549             if using_weights:
 550                 cl = C[y==np.unique(y)[0]]
 551                 Cg = cl
 552
 553         # Artifitial extensions of the upper bounds
 554         if params['artiu']:
 555             # Create artificial upper bounds
 556             fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,params['hartiu']),f[2])) for f in fu ])
 557             # Definition of new fire detections after artificial detections added
 558             Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
 559             # Define new confidence levels
 560             if using_weights:
 561                 cu = C[y==np.unique(y)[1]]
 562                 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
 563         else:
 564             Xf = fu
 565             if using_weights:
 566                 cu = C[y==np.unique(y)[1]]
 567                 Cf = cu
 568
 569         # Bottom artificial lower bounds
 570         if params['downarti']:
 571             # Creation of the x,y new mesh of artificial lower bounds
 572             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 573                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 574             # All the artificial new mesh are going to be below the data
 575             zng = np.repeat(minz-params['dminz'],len(np.ravel(xn)))
 576             # Artifitial lower bounds
 577             Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
 578             # Definition of new ground detections after down artificial lower detections
 579             Xgn = np.concatenate((Xg,Xga))
 580             # Definition of new confidence level
 581             if using_weights:
 582                 Cga = np.ones(len(Xga))*params['confal']
 583                 Cgn = np.concatenate((Cg,Cga))
 584         else:
 585             Xgn = Xg
 586             if using_weights:
 587                 Cgn = Cg
 588
 589         # Top artificial upper bounds
 590         if params['toparti']:
 591             # Creation of the x,y new mesh of artificial upper bounds
 592             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 593                                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 594             # All the artificial new mesh are going to be over the data
 595             znf = np.repeat(maxz+params['dmaxz'],len(np.ravel(xn)))
 596             # Artifitial upper bounds
 597             Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
 598             # Definition of new fire detections after top artificial upper detections
 599             Xfn = np.concatenate((Xf,Xfa))
 600             # Definition of new confidence level
 601             if using_weights:
 602                 Cfa = np.ones(len(Xfa))*params['confau']
 603                 Cfn = np.concatenate((Cf,Cfa))
 604         else:
 605             Xfn = Xf
 606             if using_weights:
 607                 Cfn = Cf
 608
 609         # New definition of the training vectors
 610         X = np.concatenate((Xgn, Xfn))
 611         # New definition of the target values
 612         y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
 613         # New definition of the confidence level
 614         if using_weights:
 615             C = np.concatenate((Cgn, Cfn))
 616         # New definition of each feature vector
 617         X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 618
 619     # Printing number of samples and features
 620     n0 = (y==np.unique(y)[0]).sum().astype(float)
 621     n1 = (y==np.unique(y)[1]).sum().astype(float)
 622     n_samples, n_features = X.shape
 623     print 'n_samples =', n_samples
 624     print 'n_samples_{-1} =', int(n0)
 625     print 'n_samples_{+1} =', int(n1)
 626     print 'n_features =', n_features
 627
 628     # Visualization of scaled data
 629     if params['plot_scaled']:
 630         try:
 631             fig = plt.figure()
 632             ax = fig.gca(projection='3d')
 633             fig.suptitle("Plotting the data scaled to fit")
 634             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 635             ax.set_xlabel("Longitude normalized")
 636             ax.set_ylabel("Latitude normalized")
 637             ax.set_zlabel("Time normalized")
 638             plt.savefig('scaled_data.png')
 639         except Exception as e:
 640             print 'Warning: something went wrong when plotting...'
 641             print e
 642
 643     # Reescaling gamma to include more detailed results
 644     gamma = 1. / (n_features * X.std())
 645     print 'gamma =', gamma
 646
 647     # Creating the SVM model and fitting the data using Super Vector Machine technique
 648     print '>> Creating the SVM model...'
 649     sys.stdout.flush()
 650     if using_weights:
 651         gamma = kgam*gamma
 652         # Compute class balanced weights
 653         cls, _ = np.unique(y, return_inverse=True)
 654         class_weight = compute_class_weight("balanced", cls, y)
 655         prob = svm_problem(C,y,X)
 656         arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
 657                                             cls[1], class_weight[1])
 658         param = svm_parameter(arg)
 659         print '>> Fitting the SVM model...'
 660         t_1 = time()
 661         clf = svm_train(prob,param)
 662         t_2 = time()
 663     else:
 664         t_1 = time()
 665         if params['search']:
 666             print '>> Searching for best value of C and gamma...'
 667             # Grid Search
 668             # Parameter Grid
 669             param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
 670             # Make grid search classifier
 671             grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
 672             print '>> Fitting the SVM model...'
 673             # Train the classifier
 674             grid_search.fit(X, y)
 675             print "Best Parameters:\n", grid_search.best_params_
 676             clf = grid_search.best_estimator_
 677             print "Best Estimators:\n", clf
 678         else:
 679             gamma = kgam*gamma
 680             clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
 681             print clf
 682             print '>> Fitting the SVM model...'
 683             # Fitting the data using Super Vector Machine technique
 684             clf.fit(X, y)
 685         t_2 = time()
 686     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 687
 688     if not using_weights:
 689         # Check if the classification failed
 690         if clf.fit_status_:
 691             print '>> FAILED <<'
 692             print 'Failed fitting the data'
 693             sys.exit(1)
 694         print 'number of support vectors: ', clf.n_support_
 695         print 'score of trained data: ', clf.score(X,y)
 696
 697     # Creating the mesh grid to evaluate the classification
 698     print '>> Creating mesh grid to evaluate the classification...'
 699     nnodes = np.ceil(np.power(n_samples,1./n_features))
 700     if fire_grid is None:
 701         # Number of necessary nodes
 702         hnodes = hN*nnodes
 703         vnodes = vN*nnodes
 704         print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
 705         print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
 706         # Computing resolution of the mesh to evaluate
 707         sdim = (hnodes,hnodes,vnodes)
 708         print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
 709         t_1 = time()
 710         xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
 711         t_2 = time()
 712     else:
 713         fxlon = np.divide(fire_grid[0] - xmin, xlen)
 714         fxlat = np.divide(fire_grid[1] - ymin, ylen)
 715         if it is None:
 716             it = (X2.min(),X2.max())
 717         else:
 718             it = np.divide(np.array(it) - zmin, zlen)
 719         vnodes = vN*nnodes
 720         sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
 721         print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
 722         t_1 = time()
 723         xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
 724         t_2 = time()
 725         print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
 726     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 727
 728     # Computing the 2D fire arrival time, F
 729     print '>> Computing the 2D fire arrival time, F...'
 730     sys.stdout.flush()
 731     F = frontier(clf, xx, yy, zz, plot_decision=params['plot_decision'], plot_poly=params['plot_poly'], using_weights=using_weights)
 732
 733     print '>> Creating final results...'
 734     sys.stdout.flush()
 735     # Plotting the Separating Hyperplane of the SVM classification with the support vectors
 736     if params['plot_supports']:
 737         try:
 738             if using_weights:
 739                 supp_ind = np.sort(clf.get_sv_indices())-1
 740                 supp_vec = X[supp_ind]
 741             else:
 742                 supp_ind = clf.support_
 743                 supp_vec = clf.support_vectors_
 744             fig = plt.figure()
 745             ax = fig.gca(projection='3d')
 746             fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
 747             # plotting the separating hyperplane
 748             ax.plot_surface(F[0], F[1], F[2], color='orange', alpha=.3)
 749             # computing the indeces where no support vectors
 750             rr = np.array(range(len(y)))
 751             ms = np.isin(rr,supp_ind)
 752             nsupp = rr[~ms]
 753             # plotting no-support vectors (smaller)
 754             ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
 755             # plotting support vectors (bigger)
 756             ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=10, edgecolors='k', linewidth=.5, alpha=.5);
 757             ax.set_xlim(xx.min(),xx.max())
 758             ax.set_ylim(yy.min(),yy.max())
 759             ax.set_zlim(zz.min(),zz.max())
 760             ax.set_xlabel("Longitude normalized")
 761             ax.set_ylabel("Latitude normalized")
 762             ax.set_zlabel("Time normalized")
 763             plt.savefig('support.png')
 764         except Exception as e:
 765             print 'Warning: something went wrong when plotting...'
 766             print e
 767
 768     # Plot the fire arrival time resulting from the SVM classification normalized
 769     if params['plot_result']:
 770         try:
 771             Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
 772             with np.errstate(invalid='ignore'):
 773                 Fz[Fz > X2.max()] = np.nan
 774             if params['notnan']:
 775                 Fz[np.isnan(Fz)] = X2.max()
 776                 Fz = np.minimum(Fz, X2.max())
 777             fig = plt.figure()
 778             ax = fig.gca(projection='3d')
 779             fig.suptitle("Fire arrival time normalized")
 780             # plotting fire arrival time
 781             p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
 782                            linewidth=0, antialiased=False)
 783             ax.set_xlim(xx.min(),xx.max())
 784             ax.set_ylim(yy.min(),yy.max())
 785             ax.set_zlim(zz.min(),zz.max())
 786             cbar = fig.colorbar(p)
 787             cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
 788             ax.set_xlabel("Longitude normalized")
 789             ax.set_ylabel("Latitude normalized")
 790             ax.set_zlabel("Time normalized")
 791             plt.savefig('tign_g.png')
 792         except Exception as e:
 793             print 'Warning: something went wrong when plotting...'
 794             print e
 795
 796     # Translate the result again into initial data scale
 797     if params['norm']:
 798         f0 = F[0] * xlen + xmin
 799         f1 = F[1] * ylen + ymin
 800         f2 = F[2] * zlen + zmin
 801         FF = [f0,f1,f2]
 802
 803     # Set all the larger values at the end to be the same maximum value
 804     oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
 805     FFx, FFy, FFz = FF[0], FF[1], FF[2]
 806
 807     if params['notnan']:
 808         with np.errstate(invalid='ignore'):
 809             FFz[np.isnan(FFz)] = np.nanmax(FFz)
 810     FF = [FFx,FFy,FFz]
 811
 812     # Plot the fire arrival time resulting from the SVM classification
 813     if params['plot_result']:
 814         try:
 815             # Plotting the result
 816             fig = plt.figure()
 817             ax = fig.gca(projection='3d')
 818             fig.suptitle("Plotting the 3D graph function of a SVM")
 819             FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
 820             # plotting original data
 821             ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 822             # plotting fire arrival time
 823             ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
 824             ax.set_xlabel("Longitude")
 825             ax.set_ylabel("Latitude")
 826             ax.set_zlabel("Time (days)")
 827             plt.savefig('result.png')
 828         except Exception as e:
 829             print 'Warning: something went wrong when plotting...'
 830             print e
 831
 832     print '>> SUCCESS <<'
 833     t_final = time()
 834     print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
 835     plt.close()
 836
 837     return FF
 838
 839
 840 if __name__ == "__main__":
 841     # Experiment to do
 842     exp = 1
 843     search = True
 844
 845     # Defining ground and fire detections
 846     def exp1():
 847         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
 848         Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
 849         C = 10.
 850         kgam = 1.
 851         return Xg, Xf, C, kgam
 852     def exp2():
 853         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
 854             [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
 855             [4, 1, .5], [2, 0, .5], [2, 2, .5]]
 856         Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
 857         C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
 858                         1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
 859         kgam = 5.
 860         return Xg, Xf, C, kgam
 861
 862     # Creating the options
 863     options = {1 : exp1, 2 : exp2}
 864
 865     # Defining the option depending on the experiment
 866     Xg, Xf, C, kgam = options[exp]()
 867
 868     # Creating the data necessary to run SVM3 function
 869     X = np.concatenate((Xg, Xf))
 870     y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
 871
 872     # Running SVM classification
 873     SVM3(X,y,C=C,kgam=kgam,search=search,plot_result=True)