svm.py

   1 #
   2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
   3 # Angel Farguell (angel.farguell@gmail.com)
   4 #
   5 # to install:
   6 #       conda install scikit-learn
   7 #       conda install scikit-image
   8
   9 from sklearn import svm
  10 from sklearn.model_selection import GridSearchCV
  11 from scipy import interpolate, spatial
  12 import matplotlib.pyplot as plt
  13 import matplotlib.font_manager
  14 import matplotlib.colors as colors
  15 from mpl_toolkits.mplot3d import axes3d
  16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
  17 import numpy as np
  18 from time import time
  19 from infrared_perimeters import process_infrared_perimeters
  20 import sys
  21 import saveload as sl
  22
  23 def preprocess_data_svm(data, scale, minconf=80.):
  24     """
  25     Preprocess satellite data from JPSSD to use in Support Vector Machine directly
  26     (without any interpolation as space-time 3D points)
  27
  28     :param data: dictionary of satellite data from JPSSD
  29     :param scale: time scales
  30     :param minconf: optional, minim fire confidence level to take into account
  31     :return X: matrix of features for SVM
  32     :return y: vector of labels for SVM
  33     :return c: vector of confidence level for SVM
  34
  35     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
  36     Angel Farguell (angel.farguell@gmail.com), 2019-09-24
  37     """
  38
  39     # confidence of ground detections
  40     gconf = 70.
  41
  42     # scale from seconds to days
  43     tscale = 24.*3600.
  44
  45     detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
  46     detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
  47     bb = (detlon.min(),detlon.max(),detlat.min(),detlat.max())
  48     dc = (bb[1]-bb[0],bb[3]-bb[2])
  49     bf = (bb[0]-dc[0]*.5,bb[1]+dc[0]*.5,bb[2]-dc[1]*.5,bb[3]+dc[1]*.5)
  50     print bf
  51
  52     # process all the points as space-time 3D nodes
  53     XX = [[],[]]
  54     cf = []
  55     for gran in data.items():
  56         print '> processing granule %s' % gran[0]
  57         tt = (gran[1]['time_num']-scale[0])/tscale
  58         conf = gran[1]['conf_fire']>=minconf
  59         xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
  60         print 'fire detections: %g' % len(xf)
  61         XX[0].append(xf)
  62         mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
  63                 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
  64                  np.logical_and(gran[1]['lat_nofire'] >= bf[2],
  65                                  gran[1]['lat_nofire'] <= bf[3])))
  66         xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
  67         print 'no fire detections: %g' % len(xg)
  68         coarsening = np.int(1+len(xg)/min(50,5*max(len(xf),1)))
  69         print 'no fire coarsening: %d' % coarsening
  70         print 'no fire detections reduction: %g' % len(xg[::coarsening])
  71         XX[1].append(xg[::coarsening])
  72         cf.append(gran[1]['conf_fire'][conf])
  73
  74     Xf = np.concatenate(tuple(XX[0]))
  75     Xg = np.concatenate(tuple(XX[1]))
  76     X = np.concatenate((Xg,Xf))
  77     y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
  78     c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
  79     print 'shape X: ', X.shape
  80     print 'shape y: ', y.shape
  81     print 'shape c: ', c.shape
  82
  83     return X,y,c
  84
  85 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
  86     """
  87     Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
  88
  89     :param lons: longitud grid
  90     :param lats: latitde grid
  91     :param U: upper bound grid
  92     :param L: lower bound grid
  93     :param T: mask grid
  94     :param scale: time scales
  95     :param time_num_granules: times of the granules
  96     :return X: matrix of features for SVM
  97     :return y: vector of labels for SVM
  98     :return c: vector of confidence level for SVM
  99
 100     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 101     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 102     """
 103
 104     # Flatten coordinates
 105     lon = np.ravel(lons).astype(float)
 106     lat = np.ravel(lats).astype(float)
 107
 108     # Temporal scale to days
 109     tscale = 24*3600
 110     U = U/tscale
 111     L = L/tscale
 112
 113     # Ensuring U>=L always
 114     print 'U>L: ',(U>L).sum()
 115     print 'U<L: ',(U<L).sum()
 116     print 'U==L: ',(U==L).sum()
 117
 118     # Reshape to 1D
 119     uu = np.ravel(U)
 120     ll = np.ravel(L)
 121     tt = np.ravel(T)
 122
 123     # Maximum and minimums to NaN data
 124     uu[uu==uu.max()] = np.nan
 125     ll[ll==ll.min()] = np.nan
 126
 127     # Mask created during computation of lower and upper bounds
 128     mk = tt==scale[1]-scale[0]
 129     # Masking upper bounds outside the mask
 130     uu[mk] = np.nan
 131
 132     # Creating minimum value for the upper bounds
 133     muu = uu[~np.isnan(uu)].min()
 134     # Creating maximum value for the lower bounds
 135     mll = ll[~np.isnan(ll)].max()
 136
 137     ### Reduction of the density of lower bounds
 138     # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
 139     lmk = np.copy(mk)
 140     ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
 141     # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
 142     tlmk = lmk[~mk]
 143     # set to True all the bounds less than floor of minimum of upper bounds in fire mask
 144     tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
 145     # set lower mask from temporary mask
 146     lmk[~mk] = tlmk
 147     ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
 148     # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
 149     kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
 150     if kk:
 151         # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
 152         tlmk = ~lmk[~lmk]
 153         # only set a subset of the lower mask values to False (coarsening)
 154         tlmk[::kk] = False
 155         # set lower mask form temporary mask
 156         lmk[~lmk] = tlmk
 157     ## Reason: We care about the maximum lower bounds which are not below upper bounds
 158     # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
 159     tlmk = lmk[mk]
 160     # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
 161     t2lmk = tlmk[~np.isnan(ll[mk])]
 162     # set to False in the temporary lower mask 2 where lower bounds have maximum value
 163     t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
 164     # set temporary lower mask from temporary lower mask 2
 165     tlmk[~np.isnan(ll[mk])] = t2lmk
 166     # set lower mask from temporary lower mask
 167     lmk[mk] = tlmk
 168     ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
 169     # set subset outside of the fire mask for the rest
 170     # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
 171     kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
 172     if kk:
 173         # temporary lower mask, values of the current lower mask outside of the original fire mask
 174         tlmk = lmk[mk]
 175         # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
 176         t2lmk = tlmk[~np.isnan(ll[mk])]
 177         # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
 178         t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
 179         # coarsening of the temporary lower mask 3
 180         t3lmk[::kk] = False
 181         # set the temporary lower mask 2 from the temporary lower mask 3
 182         t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
 183         # set the temporary lower mask from the temporary lower mask 2
 184         tlmk[~np.isnan(ll[mk])] = t2lmk
 185         # set the lower mask from the temporary lower mask
 186         lmk[mk] = tlmk
 187
 188     # Masking lower bounds from previous lower mask
 189     ll[lmk] = np.nan
 190
 191     # Values different than NaN in the upper and lower bounds
 192     um = np.array(~np.isnan(uu))
 193     lm = np.array(~np.isnan(ll))
 194     # Define all the x, y, and z components of upper and lower bounds
 195     ux = lon[um]
 196     uy = lat[um]
 197     uz = uu[um]
 198     lx = lon[lm]
 199     ly = lat[lm]
 200     lz = ll[lm]
 201
 202     # Create the data to call SVM3 function from svm.py
 203     X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
 204     y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
 205     # Print the shape of the data
 206     print 'shape X: ', X.shape
 207     print 'shape y: ', y.shape
 208
 209     if C is None:
 210         c = 80*np.ones(y.shape)
 211     else:
 212         c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
 213
 214     # Clean data if not in bounding box
 215     bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
 216     geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
 217     btime = (0,(scale[1]-scale[0])/tscale)
 218     time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
 219     whole_mask = np.logical_and(geo_mask, time_mask)
 220     X = X[whole_mask,:]
 221     y = y[whole_mask]
 222     c = c[whole_mask]
 223
 224     return X,y,c
 225
 226 def make_fire_mesh(fxlon, fxlat, it, nt):
 227     """
 228     Create a mesh of points to evaluate the decision function
 229
 230     :param fxlon: data to base x-axis meshgrid on
 231     :param fxlat: data to base y-axis meshgrid on
 232     :param it: data to base z-axis meshgrid on
 233     :param nt: tuple of number of nodes at each direction, optional
 234     :param coarse: coarsening of the fire mesh
 235     :return xx, yy, zz: ndarray
 236
 237     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 238     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 239     """
 240
 241     xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
 242     yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
 243     tt = np.linspace(it[0],it[1],nt)
 244     zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
 245
 246     return xx, yy, zz
 247
 248 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
 249     """
 250     Create a mesh of points to evaluate the decision function
 251
 252     :param x: data to base x-axis meshgrid on
 253     :param y: data to base y-axis meshgrid on
 254     :param z: data to base z-axis meshgrid on
 255     :param s: tuple of number of nodes at each direction, optional
 256     :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
 257     :return xx, yy, zz: ndarray
 258
 259     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 260     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 261     Modified version of:
 262     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 263     """
 264
 265     if not isinstance(s, tuple):
 266         print '>> FAILED <<'
 267         print 'The number of nodes at each direction is not a tuple: ', s
 268         sys.exit(1)
 269     # number of nodes in each direction
 270     sx, sy, sz = np.array(s).astype(int)
 271     # extra step sizes in each direction
 272     brx = int(sx * exp)
 273     bry = int(sy * exp)
 274     brz = int(sz * exp)
 275     # grid lengths in each directon
 276     lx = x.max() - x.min()
 277     ly = y.max() - y.min()
 278     lz = z.max() - z.min()
 279     # grid resolutions in each direction
 280     hx = lx / (sx - 2*brx - 1)
 281     hy = ly / (sy - 2*bry - 1)
 282     hz = lz / (sz - 2*brz - 1)
 283     # extrem values for each dimension
 284     x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
 285     y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
 286     z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
 287     # generating the mesh grid
 288     xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
 289                              np.linspace(x_min, x_max, sx),
 290                              np.linspace(z_min, z_max, sz))
 291     return xx, yy, zz
 292
 293 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
 294     """
 295     Compute the surface decision frontier for a classifier.
 296
 297     :param clf: a classifier
 298     :param xx: meshgrid ndarray
 299     :param yy: meshgrid ndarray
 300     :param zz: meshgrid ndarray
 301     :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
 302     :param plot_decision: boolean of plotting decision volume
 303     :param plot_poly: boolean of plotting polynomial approximation
 304     :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
 305
 306     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 307     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 308     Modified version of:
 309     https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
 310     """
 311
 312     # Creating the 3D grid
 313     XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
 314
 315     # Evaluating the decision function
 316     print '>> Evaluating the decision function...'
 317     sys.stdout.flush()
 318     t_1 = time()
 319     if using_weights:
 320         from libsvm_weights.python.svmutil import svm_predict
 321         _, _, p_vals = svm_predict([], XX, clf)
 322         ZZ = np.array([p[0] for p in p_vals])
 323     else:
 324         ZZ = clf.decision_function(XX)
 325     t_2 = time()
 326     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 327     hist = np.histogram(ZZ)
 328     print 'counts: ', hist[0]
 329     print 'values: ', hist[1]
 330     print 'decision function range: ', ZZ.min(), '~', ZZ.max()
 331
 332     # Reshaping decision function volume
 333     Z = ZZ.reshape(xx.shape)
 334     print 'decision function shape: ', Z.shape
 335     sl.save((xx,yy,zz,Z),'decision')
 336
 337     if plot_decision:
 338         try:
 339             from skimage import measure
 340             from shiftcmap import shiftedColorMap
 341             verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
 342             # Scale and transform to actual size of the interesting volume
 343             h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
 344             verts = verts * h
 345             verts = verts + [xx.min(), yy.min(), zz.min()]
 346             mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
 347             fig = plt.figure()
 348             ax = fig.gca(projection='3d')
 349             fig.suptitle("Decision volume")
 350             col = [(0, .5, 0), (.5, .5, .5), (.5, 0, 0)]
 351             cm = colors.LinearSegmentedColormap.from_list('GrRdD',col,N=100)
 352             midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
 353             shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
 354             X = np.ravel(xx)
 355             Y = np.ravel(yy)
 356             T = np.ravel(zz)
 357             kk = 50
 358             p = ax.scatter(X[0::kk], Y[0::kk], T[0::kk], c=ZZ[0::kk], s=.1, alpha=.4, cmap=shiftedcmap)
 359             cbar = fig.colorbar(p)
 360             cbar.set_label('decision function value', rotation=270, labelpad=20)
 361             ax.add_collection3d(mesh)
 362             ax.set_zlim([xx.min(),xx.max()])
 363             ax.set_ylim([yy.min(),yy.max()])
 364             ax.set_zlim([zz.min(),zz.max()])
 365             ax.set_xlabel("Longitude normalized")
 366             ax.set_ylabel("Latitude normalized")
 367             ax.set_zlabel("Time normalized")
 368             plt.savefig('decision.png')
 369         except Exception as e:
 370             print 'Warning: something went wrong when plotting...'
 371             print e
 372
 373     if plot_poly:
 374         fig = plt.figure()
 375     # Computing fire arrival time from previous decision function
 376     print '>> Computing fire arrival time...'
 377     sys.stdout.flush()
 378     t_1 = time()
 379     # xx 2-dimensional array
 380     Fx = xx[:, :, 0]
 381     # yy 2-dimensional array
 382     Fy = yy[:, :, 0]
 383     # zz 1-dimensional array
 384     zr = zz[0, 0]
 385     # Initializing fire arrival time
 386     Fz = np.zeros(Fx.shape)
 387     # For each x and y
 388     for k1 in range(Fx.shape[0]):
 389         for k2 in range(Fx.shape[1]):
 390             # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
 391             pz = interpolate.CubicSpline(zr, Z[k1,k2])
 392             # Compute the real roots of the the piecewise polynomial
 393             rr = pz.roots()
 394             # Just take the real roots between min(zz) and max(zz)
 395             realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
 396             if len(realr) > 0:
 397                 # Take the minimum root
 398                 Fz[k1,k2] = realr.min()
 399                 # Plotting the approximated polynomial with the decision function
 400                 if plot_poly:
 401                     try:
 402                         plt.ion()
 403                         plt.plot(pz(zr),zr)
 404                         plt.plot(Z[k1,k2],zr,'+')
 405                         plt.plot(np.zeros(len(realr)),realr,'o',c='g')
 406                         plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
 407                         plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
 408                         plt.xlabel('decision function value')
 409                         plt.ylabel('Z')
 410                         plt.legend(['polynomial','decision values','roots','fire arrival time'])
 411                         plt.xlim([Z.min(),Z.max()])
 412                         plt.ylim([zz.min(),zz.max()])
 413                         plt.show()
 414                         plt.pause(.001)
 415                         plt.cla()
 416                     except Exception as e:
 417                         print 'Warning: something went wrong when plotting...'
 418                         print e
 419             else:
 420                 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
 421                 Fz[k1,k2] = np.nan
 422     t_2 = time()
 423     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 424     F = [Fx,Fy,Fz]
 425
 426     return F
 427
 428 def SVM3(X, y, C=1., kgam=1., search=False, norm=True, fire_grid=None, weights=None):
 429     """
 430     3D SuperVector Machine analysis and plot
 431
 432     :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
 433     :param y: Target values
 434     :param C: Weight to not having outliers (argument of svm.SVC class), optional
 435     :param kgam: Scalar multiplier for gamma (capture more details increasing it)
 436     :param norm: Normalize the data in the interval (0,1) in all the directions, optional
 437     :param fire_grid: The longitud and latitude grid where to have the fire arrival time
 438     :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
 439
 440     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 441     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 442     Modified version of:
 443     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 444     """
 445
 446     t_init = time()
 447
 448     col = [(0, .5, 0), (.5, 0, 0)]
 449     cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
 450     col = [(1, 0, 0), (.25, 0, 0)]
 451     cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
 452
 453     # Plot options
 454     # plot original data
 455     plot_data = False
 456     # plot scaled data with artificial data
 457     plot_scaled = False
 458     # plot decision volume
 459     plot_decision = False
 460     # plot polynomial approximation
 461     plot_poly = False
 462     # plot full hyperplane vs detections with support vectors
 463     plot_supports = False
 464     # plot resulting fire arrival time vs detections
 465     plot_result = False
 466
 467     # Other options
 468     # number of vertical nodes per observation
 469     vN = 1
 470     # if not Nans in the data are wanted (all Nans are going to be replaced by the maximum value)
 471     notnan = True
 472
 473     # Options better to not change
 474     # number of horizontal nodes per observation (it is used if fire_grid==None)
 475     hN = 5
 476     # creation of under artificial lower bounds in the pre-processing
 477     artil = False
 478     # if artil = True: resolution of artificial lower bounds vertical to the ground detections
 479     hartil = .2
 480     # creation of over artificial upper bounds in the pre-processing
 481     artiu = True
 482     # if artiu = True: resolution of artificial upper bounds vertical to the fire detections
 483     hartiu = .1
 484     # creation of an artifitial mesh of down lower bounds
 485     downarti = True
 486     # if downarti = True: below min of z direction for lower bound artifitial creation
 487     dminz = .1
 488     # if downarti = True: confidence level of the artificial lower bounds
 489     confal = 100.
 490     # creation of an artifitial mesh of top upper bounds
 491     toparti = False
 492     # if toparti = True: proportion over max of z direction for upper bound artifitial creation
 493     dmaxz = .1
 494     # if toparti = True: confidence level of the artificial upper bounds
 495     confau = 100.
 496
 497     # using different weights for the data
 498     if isinstance(C,(list,tuple,np.ndarray)):
 499         using_weights = True
 500         from libsvm_weights.python.svm import svm_problem, svm_parameter
 501         from libsvm_weights.python.svmutil import svm_train
 502         from sklearn.utils import compute_class_weight
 503     else:
 504         using_weights = False
 505
 506     # Data inputs
 507     X = np.array(X).astype(float)
 508     y = np.array(y)
 509
 510     # Original data
 511     oX = np.array(X).astype(float)
 512     oy = np.array(y)
 513
 514     # Visualization of the data
 515     X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 516     if plot_data:
 517         try:
 518             fig = plt.figure()
 519             ax = fig.gca(projection='3d')
 520             fig.suptitle("Plotting the original data to fit")
 521             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 522             ax.set_xlabel("Longitude")
 523             ax.set_ylabel("Latitude")
 524             ax.set_zlabel("Time (days)")
 525             plt.savefig('original_data.png')
 526         except Exception as e:
 527             print 'Warning: something went wrong when plotting...'
 528             print e
 529
 530     # Normalization of the data into [0,1]^3
 531     if norm:
 532         xmin = X0.min()
 533         xlen = X0.max() - X0.min()
 534         x0 = np.divide(X0 - xmin, xlen)
 535         ymin = X1.min()
 536         ylen = X1.max() - X1.min()
 537         x1 = np.divide(X1 - ymin, ylen)
 538         zmin = X2.min()
 539         zlen = X2.max() - X2.min()
 540         x2 = np.divide(X2 - zmin, zlen)
 541         X0, X1, X2 = x0, x1, x2
 542         X[:, 0] = X0
 543         X[:, 1] = X1
 544         X[:, 2] = X2
 545
 546     # Creation of fire and ground artificial detections
 547     if artil or artiu or toparti or downarti:
 548         # Extreme values at z direction
 549         minz = X[:, 2].min()
 550         maxz = X[:, 2].max()
 551         # Division of lower and upper bounds for data and confidence level
 552         fl = X[y==np.unique(y)[0]]
 553         fu = X[y==np.unique(y)[1]]
 554
 555         # Artifitial extensions of the lower bounds
 556         if artil:
 557             # Create artificial lower bounds
 558             flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-hartil),f[2])) for f in fl ])
 559             # Definition of new ground detections after artificial detections added
 560             Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
 561             if using_weights:
 562                 cl = C[y==np.unique(y)[0]]
 563                 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
 564         else:
 565             Xg = fl
 566             if using_weights:
 567                 cl = C[y==np.unique(y)[0]]
 568                 Cg = cl
 569
 570         # Artifitial extensions of the upper bounds
 571         if artiu:
 572             # Create artificial upper bounds
 573             fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,hartiu),f[2])) for f in fu ])
 574             # Definition of new fire detections after artificial detections added
 575             Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
 576             # Define new confidence levels
 577             if using_weights:
 578                 cu = C[y==np.unique(y)[1]]
 579                 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
 580         else:
 581             Xf = fu
 582             if using_weights:
 583                 cu = C[y==np.unique(y)[1]]
 584                 Cf = cu
 585
 586         # Bottom artificial lower bounds
 587         if downarti:
 588             # Creation of the x,y new mesh of artificial lower bounds
 589             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 590                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 591             # All the artificial new mesh are going to be below the data
 592             zng = np.repeat(minz-dminz,len(np.ravel(xn)))
 593             # Artifitial lower bounds
 594             Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
 595             # Definition of new ground detections after down artificial lower detections
 596             Xgn = np.concatenate((Xg,Xga))
 597             # Definition of new confidence level
 598             if using_weights:
 599                 Cga = np.ones(len(Xga))*confal
 600                 Cgn = np.concatenate((Cg,Cga))
 601         else:
 602             Xgn = Xg
 603             if using_weights:
 604                 Cgn = Cg
 605
 606         # Top artificial upper bounds
 607         if toparti:
 608             # Creation of the x,y new mesh of artificial upper bounds
 609             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 610                                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 611             # All the artificial new mesh are going to be over the data
 612             znf = np.repeat(maxz+dmaxz,len(np.ravel(xn)))
 613             # Artifitial upper bounds
 614             Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
 615             # Definition of new fire detections after top artificial upper detections
 616             Xfn = np.concatenate((Xf,Xfa))
 617             # Definition of new confidence level
 618             if using_weights:
 619                 Cfa = np.ones(len(Xfa))*confau
 620                 Cfn = np.concatenate((Cf,Cfa))
 621         else:
 622             Xfn = Xf
 623             if using_weights:
 624                 Cfn = Cf
 625
 626         # New definition of the training vectors
 627         X = np.concatenate((Xgn, Xfn))
 628         # New definition of the target values
 629         y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
 630         # New definition of the confidence level
 631         if using_weights:
 632             C = np.concatenate((Cgn, Cfn))
 633         # New definition of each feature vector
 634         X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 635
 636     # Printing number of samples and features
 637     n0 = (y==np.unique(y)[0]).sum().astype(float)
 638     n1 = (y==np.unique(y)[1]).sum().astype(float)
 639     n_samples, n_features = X.shape
 640     print 'n_samples =', n_samples
 641     print 'n_samples_{-1} =', int(n0)
 642     print 'n_samples_{+1} =', int(n1)
 643     print 'n_features =', n_features
 644
 645     # Visualization of scaled data
 646     if plot_scaled:
 647         try:
 648             fig = plt.figure()
 649             ax = fig.gca(projection='3d')
 650             fig.suptitle("Plotting the data scaled to fit")
 651             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 652             ax.set_xlabel("Longitude normalized")
 653             ax.set_ylabel("Latitude normalized")
 654             ax.set_zlabel("Time normalized")
 655             plt.savefig('scaled_data.png')
 656         except Exception as e:
 657             print 'Warning: something went wrong when plotting...'
 658             print e
 659
 660     # Reescaling gamma to include more detailed results
 661     gamma = 1. / (n_features * X.std())
 662     print 'gamma =', gamma
 663
 664     # Creating the SVM model and fitting the data using Super Vector Machine technique
 665     print '>> Creating the SVM model...'
 666     sys.stdout.flush()
 667     if using_weights:
 668         gamma = kgam*gamma
 669         # Compute class balanced weights
 670         cls, _ = np.unique(y, return_inverse=True)
 671         class_weight = compute_class_weight("balanced", cls, y)
 672         prob = svm_problem(C,y,X)
 673         arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
 674                                             cls[1], class_weight[1])
 675         param = svm_parameter(arg)
 676         print '>> Fitting the SVM model...'
 677         t_1 = time()
 678         clf = svm_train(prob,param)
 679         t_2 = time()
 680     else:
 681         t_1 = time()
 682         if search:
 683             print '>> Searching for best value of C and gamma...'
 684             # Grid Search
 685             # Parameter Grid
 686             param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
 687             # Make grid search classifier
 688             grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
 689             print '>> Fitting the SVM model...'
 690             # Train the classifier
 691             grid_search.fit(X, y)
 692             print "Best Parameters:\n", grid_search.best_params_
 693             clf = grid_search.best_estimator_
 694             print "Best Estimators:\n", clf
 695         else:
 696             gamma = kgam*gamma
 697             clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
 698             print clf
 699             print '>> Fitting the SVM model...'
 700             # Fitting the data using Super Vector Machine technique
 701             clf.fit(X, y)
 702         t_2 = time()
 703     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 704
 705     if not using_weights:
 706         # Check if the classification failed
 707         if clf.fit_status_:
 708             print '>> FAILED <<'
 709             print 'Failed fitting the data'
 710             sys.exit(1)
 711         print 'number of support vectors: ', clf.n_support_
 712         print 'score of trained data: ', clf.score(X,y)
 713
 714     # Creating the mesh grid to evaluate the classification
 715     print '>> Creating mesh grid to evaluate the classification...'
 716     nnodes = np.ceil(np.power(n_samples,1./n_features))
 717     if fire_grid is None:
 718         # Number of necessary nodes
 719         hnodes = hN*nnodes
 720         vnodes = vN*nnodes
 721         print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
 722         print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
 723         # Computing resolution of the mesh to evaluate
 724         sdim = (hnodes,hnodes,vnodes)
 725         print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
 726         t_1 = time()
 727         xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
 728         t_2 = time()
 729     else:
 730         fxlon = np.divide(fire_grid[0] - xmin, xlen)
 731         fxlat = np.divide(fire_grid[1] - ymin, ylen)
 732         it = (X2.min(),X2.max())
 733         vnodes = vN*nnodes
 734         sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
 735         print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
 736         t_1 = time()
 737         xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
 738         t_2 = time()
 739         print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
 740     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 741
 742     # Computing the 2D fire arrival time, F
 743     print '>> Computing the 2D fire arrival time, F...'
 744     sys.stdout.flush()
 745     F = frontier(clf, xx, yy, zz, plot_decision=plot_decision, plot_poly=plot_poly, using_weights=using_weights)
 746
 747     print '>> Creating final results...'
 748     sys.stdout.flush()
 749     # Plotting the Separating Hyperplane of the SVM classification with the support vectors
 750     if plot_supports:
 751         try:
 752             if using_weights:
 753                 supp_ind = np.sort(clf.get_sv_indices())-1
 754                 supp_vec = X[supp_ind]
 755             else:
 756                 supp_ind = clf.support_
 757                 supp_vec = clf.support_vectors_
 758             fig = plt.figure()
 759             ax = fig.gca(projection='3d')
 760             fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
 761             # plotting the separating hyperplane
 762             ax.plot_wireframe(F[0], F[1], F[2], color='orange', alpha=.5)
 763             # computing the indeces where no support vectors
 764             rr = np.array(range(len(y)))
 765             ms = np.isin(rr,supp_ind)
 766             nsupp = rr[~ms]
 767             # plotting no-support vectors (smaller)
 768             ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
 769             # plotting support vectors (bigger)
 770             ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=1, edgecolors='k', alpha=.2);
 771             ax.set_xlim(xx.min(),xx.max())
 772             ax.set_ylim(yy.min(),yy.max())
 773             ax.set_zlim(zz.min(),zz.max())
 774             ax.set_xlabel("Longitude normalized")
 775             ax.set_ylabel("Latitude normalized")
 776             ax.set_zlabel("Time normalized")
 777             plt.savefig('support.png')
 778         except Exception as e:
 779             print 'Warning: something went wrong when plotting...'
 780             print e
 781
 782     # Plot the fire arrival time resulting from the SVM classification normalized
 783     if plot_result:
 784         try:
 785             Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
 786             with np.errstate(invalid='ignore'):
 787                 Fz[Fz > X2.max()] = np.nan
 788             if notnan:
 789                 Fz[np.isnan(Fz)] = X2.max()
 790                 Fz = np.minimum(Fz, X2.max())
 791             fig = plt.figure()
 792             ax = fig.gca(projection='3d')
 793             fig.suptitle("Fire arrival time normalized")
 794             # plotting fire arrival time
 795             p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
 796                            linewidth=0, antialiased=False)
 797             ax.set_xlim(xx.min(),xx.max())
 798             ax.set_ylim(yy.min(),yy.max())
 799             ax.set_zlim(zz.min(),zz.max())
 800             cbar = fig.colorbar(p)
 801             cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
 802             ax.set_xlabel("Longitude normalized")
 803             ax.set_ylabel("Latitude normalized")
 804             ax.set_zlabel("Time normalized")
 805             plt.savefig('tign_g.png')
 806         except Exception as e:
 807             print 'Warning: something went wrong when plotting...'
 808             print e
 809
 810     # Translate the result again into initial data scale
 811     if norm:
 812         f0 = F[0] * xlen + xmin
 813         f1 = F[1] * ylen + ymin
 814         f2 = F[2] * zlen + zmin
 815         FF = [f0,f1,f2]
 816
 817     # Set all the larger values at the end to be the same maximum value
 818     oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
 819     FFx, FFy, FFz = FF[0], FF[1], FF[2]
 820
 821     with np.errstate(invalid='ignore'):
 822         FFz[FFz > oX2.max()] = np.nan
 823
 824         if notnan:
 825             FFz[np.isnan(FFz)] = oX2.max()
 826             FFz = np.minimum(FFz, oX2.max())
 827     FF = [FFx,FFy,FFz]
 828
 829     # Plot the fire arrival time resulting from the SVM classification
 830     if plot_result:
 831         try:
 832             # Plotting the result
 833             fig = plt.figure()
 834             ax = fig.gca(projection='3d')
 835             fig.suptitle("Plotting the 3D graph function of a SVM")
 836             FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
 837             # plotting original data
 838             ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=1, alpha=.5, vmin=y.min(), vmax=y.max())
 839             # plotting fire arrival time
 840             ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
 841             ax.set_xlabel("Longitude")
 842             ax.set_ylabel("Latitude")
 843             ax.set_zlabel("Time (days)")
 844             plt.savefig('result.png')
 845         except Exception as e:
 846             print 'Warning: something went wrong when plotting...'
 847             print e
 848
 849     print '>> SUCCESS <<'
 850     t_final = time()
 851     print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
 852     plt.close()
 853
 854     return FF
 855
 856
 857 if __name__ == "__main__":
 858     # Experiment to do
 859     exp = 1
 860     search = False
 861
 862     # Defining ground and fire detections
 863     def exp1():
 864         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
 865         Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
 866         C = np.concatenate((10.*np.ones(len(Xg)),100.*np.ones(len(Xf))))
 867         kgam = 1.
 868         return Xg, Xf, C, kgam
 869     def exp2():
 870         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
 871             [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
 872             [4, 1, .5], [2, 0, .5], [2, 2, .5]]
 873         Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
 874         C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
 875                         1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
 876         kgam = 5.
 877         return Xg, Xf, C, kgam
 878
 879     # Creating the options
 880     options = {1 : exp1, 2 : exp2}
 881
 882     # Defining the option depending on the experiment
 883     Xg, Xf, C, kgam = options[exp]()
 884
 885     # Creating the data necessary to run SVM3 function
 886     X = np.concatenate((Xg, Xf))
 887     y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
 888
 889     # Running SVM classification
 890     SVM3(X,y,C=C,kgam=kgam,search=search)