svm.py

   1 #
   2 # Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
   3 # Angel Farguell (angel.farguell@gmail.com)
   4 #
   5 # to install:
   6 #       conda install scikit-learn
   7 #       conda install scikit-image
   8
   9 from sklearn import svm
  10 from sklearn.model_selection import GridSearchCV
  11 from scipy import interpolate, spatial
  12 import matplotlib.pyplot as plt
  13 import matplotlib.font_manager
  14 import matplotlib.colors as colors
  15 from mpl_toolkits.mplot3d import axes3d
  16 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
  17 import numpy as np
  18 from time import time
  19 from infrared_perimeters import process_infrared_perimeters
  20 import sys
  21 import saveload as sl
  22
  23 def preprocess_data_svm(data, scale):
  24     """
  25     Preprocess satellite data from JPSSD to use in Support Vector Machine directly
  26     (without any interpolation as space-time 3D points)
  27
  28     :param data: dictionary of satellite data from JPSSD
  29     :param scale: time scales
  30     :return X: matrix of features for SVM
  31     :return y: vector of labels for SVM
  32     :return c: vector of confidence level for SVM
  33
  34     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
  35     Angel Farguell (angel.farguell@gmail.com), 2019-09-24
  36     """
  37
  38     # minim fire confidence level to take into account
  39     minconf = 70.
  40     # confidence of ground detections
  41     gconf = 70.
  42
  43     # scale from seconds to days
  44     tscale = 24.*3600.
  45
  46     detlon = np.concatenate([d['lon_fire'] for d in data.itervalues()])
  47     detlat = np.concatenate([d['lat_fire'] for d in data.itervalues()])
  48     bb = (detlon.min(),detlon.max(),detlat.min(),detlat.max())
  49     dc = (bb[1]-bb[0],bb[3]-bb[2])
  50     bf = (bb[0]-dc[0]*.5,bb[1]+dc[0]*.5,bb[2]-dc[1]*.5,bb[3]+dc[1]*.5)
  51     print bf
  52
  53     # process all the points as space-time 3D nodes
  54     XX = [[],[]]
  55     cf = []
  56     for gran in data.items():
  57         tt = (gran[1]['time_num']-scale[0])/tscale
  58         conf = gran[1]['conf_fire']>=minconf
  59         xf = np.c_[(gran[1]['lon_fire'][conf],gran[1]['lat_fire'][conf],np.repeat(tt,conf.sum()))]
  60         XX[0].append(xf)
  61         mask = np.logical_and(gran[1]['lon_nofire'] >= bf[0],
  62                 np.logical_and(gran[1]['lon_nofire'] <= bf[1],
  63                  np.logical_and(gran[1]['lat_nofire'] >= bf[2],
  64                                  gran[1]['lat_nofire'] <= bf[3])))
  65         xg = np.c_[(gran[1]['lon_nofire'][mask],gran[1]['lat_nofire'][mask],np.repeat(tt,mask.sum()))]
  66         xg.shape
  67         coarse = np.int(1+len(xg)/(5*max(len(xf),20)))
  68         XX[1].append(xg[::coarse])
  69         cf.append(gran[1]['conf_fire'][conf])
  70
  71     Xf = np.concatenate(tuple(XX[0]))
  72     Xg = np.concatenate(tuple(XX[1]))
  73     X = np.concatenate((Xg,Xf))
  74     y = np.concatenate((-np.ones(len(Xg)),np.ones(len(Xf))))
  75     c = np.concatenate((gconf*np.ones(len(Xg)),np.concatenate(tuple(cf))))
  76     print 'shape X: ', X.shape
  77     print 'shape y: ', y.shape
  78     print 'shape c: ', c.shape
  79
  80     return X,y,c
  81
  82 def preprocess_result_svm(lons, lats, U, L, T, scale, time_num_granules, C=None):
  83     """
  84     Preprocess satellite data from JPSSD and setup to use in Support Vector Machine
  85
  86     :param lons: longitud grid
  87     :param lats: latitde grid
  88     :param U: upper bound grid
  89     :param L: lower bound grid
  90     :param T: mask grid
  91     :param scale: time scales
  92     :param time_num_granules: times of the granules
  93     :return X: matrix of features for SVM
  94     :return y: vector of labels for SVM
  95     :return c: vector of confidence level for SVM
  96
  97     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
  98     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
  99     """
 100
 101     # Flatten coordinates
 102     lon = np.ravel(lons).astype(float)
 103     lat = np.ravel(lats).astype(float)
 104
 105     # Temporal scale to days
 106     tscale = 24*3600
 107     U = U/tscale
 108     L = L/tscale
 109
 110     # Ensuring U>=L always
 111     print 'U>L: ',(U>L).sum()
 112     print 'U<L: ',(U<L).sum()
 113     print 'U==L: ',(U==L).sum()
 114
 115     # Reshape to 1D
 116     uu = np.ravel(U)
 117     ll = np.ravel(L)
 118     tt = np.ravel(T)
 119
 120     # Maximum and minimums to NaN data
 121     uu[uu==uu.max()] = np.nan
 122     ll[ll==ll.min()] = np.nan
 123
 124     # Mask created during computation of lower and upper bounds
 125     mk = tt==scale[1]-scale[0]
 126     # Masking upper bounds outside the mask
 127     uu[mk] = np.nan
 128
 129     # Creating minimum value for the upper bounds
 130     muu = uu[~np.isnan(uu)].min()
 131     # Creating maximum value for the lower bounds
 132     mll = ll[~np.isnan(ll)].max()
 133
 134     ### Reduction of the density of lower bounds
 135     # Creation of low bounds mask (True values are going to turn Nan's in lower bound data)
 136     lmk = np.copy(mk)
 137     ## Reason: We do not care about lower bounds below the upper bounds which are far from the upper bounds
 138     # temporary lower mask, all False (values of the mask where the mask is False, inside the fire mask)
 139     tlmk = lmk[~mk]
 140     # set to True all the bounds less than floor of minimum of upper bounds in fire mask
 141     tlmk[~np.isnan(ll[~mk])] = (ll[~mk][~np.isnan(ll[~mk])] < np.floor(muu))
 142     # set lower mask from temporary mask
 143     lmk[~mk] = tlmk
 144     ## Reason: Coarsening of the lower bounds below the upper bounds to create balance
 145     # create coarsening of the lower bound data below the upper bounds to be similar amount that upper bounds
 146     kk = (~np.isnan(ll[~lmk])).sum()/(~np.isnan(uu)).sum()
 147     if kk:
 148         # temporary lower mask, all True (values of the lower mask where the lower mask is False, set to True)
 149         tlmk = ~lmk[~lmk]
 150         # only set a subset of the lower mask values to False (coarsening)
 151         tlmk[::kk] = False
 152         # set lower mask form temporary mask
 153         lmk[~lmk] = tlmk
 154     ## Reason: We care about the maximum lower bounds which are not below upper bounds
 155     # temporary lower mask, all True (values of the mask where the mask is True, outside the fire mask)
 156     tlmk = lmk[mk]
 157     # temporary lower mask 2, all True (subset of the previous mask where the lower bounds is not Nan)
 158     t2lmk = tlmk[~np.isnan(ll[mk])]
 159     # set to False in the temporary lower mask 2 where lower bounds have maximum value
 160     t2lmk[ll[mk][~np.isnan(ll[mk])] == mll] = False
 161     # set temporary lower mask from temporary lower mask 2
 162     tlmk[~np.isnan(ll[mk])] = t2lmk
 163     # set lower mask from temporary lower mask
 164     lmk[mk] = tlmk
 165     ## Reason: Coarsening of the not maximum lower bounds not below the upper bounds to create balance
 166     # set subset outside of the fire mask for the rest
 167     # create coarsening of the not maximum lower bounds not below the upper bounds to be similar amount that the current number of lower bounds
 168     kk = (ll[mk][~np.isnan(ll[mk])] < mll).sum()/(~np.isnan(ll[~lmk])).sum()
 169     if kk:
 170         # temporary lower mask, values of the current lower mask outside of the original fire mask
 171         tlmk = lmk[mk]
 172         # temporary lower mask 2, subset of the previous mask where the lower bound is not Nan
 173         t2lmk = tlmk[~np.isnan(ll[mk])]
 174         # temporary lower mask 3, subset of the previous mask where the lower bounds are not maximum
 175         t3lmk = t2lmk[ll[mk][~np.isnan(ll[mk])] < mll]
 176         # coarsening of the temporary lower mask 3
 177         t3lmk[::kk] = False
 178         # set the temporary lower mask 2 from the temporary lower mask 3
 179         t2lmk[ll[mk][~np.isnan(ll[mk])] < mll] = t3lmk
 180         # set the temporary lower mask from the temporary lower mask 2
 181         tlmk[~np.isnan(ll[mk])] = t2lmk
 182         # set the lower mask from the temporary lower mask
 183         lmk[mk] = tlmk
 184
 185     # Masking lower bounds from previous lower mask
 186     ll[lmk] = np.nan
 187
 188     # Values different than NaN in the upper and lower bounds
 189     um = np.array(~np.isnan(uu))
 190     lm = np.array(~np.isnan(ll))
 191     # Define all the x, y, and z components of upper and lower bounds
 192     ux = lon[um]
 193     uy = lat[um]
 194     uz = uu[um]
 195     lx = lon[lm]
 196     ly = lat[lm]
 197     lz = ll[lm]
 198
 199     # Create the data to call SVM3 function from svm3test.py
 200     X = np.c_[np.concatenate((lx,ux)),np.concatenate((ly,uy)),np.concatenate((lz,uz))]
 201     y = np.concatenate((-np.ones(len(lx)),np.ones(len(ux))))
 202     # Print the shape of the data
 203     print 'shape X: ', X.shape
 204     print 'shape y: ', y.shape
 205
 206     if C is None:
 207         c = 80*np.ones(y.shape)
 208     else:
 209         c = np.concatenate((np.ravel(C[0])[lm],np.ravel(C[1])[um]))
 210
 211     # Clean data if not in bounding box
 212     bbox = (lon.min(),lon.max(),lat.min(),lat.max(),time_num_granules)
 213     geo_mask = np.logical_and(np.logical_and(np.logical_and(X[:,0] >= bbox[0],X[:,0] <= bbox[1]), X[:,1] >= bbox[2]), X[:,1] <= bbox[3])
 214     btime = (0,(scale[1]-scale[0])/tscale)
 215     time_mask = np.logical_and(X[:,2] >= btime[0], X[:,2] <= btime[1])
 216     whole_mask = np.logical_and(geo_mask, time_mask)
 217     X = X[whole_mask,:]
 218     y = y[whole_mask]
 219     c = c[whole_mask]
 220
 221     return X,y,c
 222
 223 def make_fire_mesh(fxlon, fxlat, it, nt):
 224     """
 225     Create a mesh of points to evaluate the decision function
 226
 227     :param fxlon: data to base x-axis meshgrid on
 228     :param fxlat: data to base y-axis meshgrid on
 229     :param it: data to base z-axis meshgrid on
 230     :param nt: tuple of number of nodes at each direction, optional
 231     :param coarse: coarsening of the fire mesh
 232     :return xx, yy, zz: ndarray
 233
 234     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 235     Angel Farguell (angel.farguell@gmail.com), 2019-04-01
 236     """
 237
 238     xx = np.repeat(fxlon[:, :, np.newaxis], nt, axis=2)
 239     yy = np.repeat(fxlat[:, :, np.newaxis], nt, axis=2)
 240     tt = np.linspace(it[0],it[1],nt)
 241     zz = np.swapaxes(np.swapaxes(np.array([np.ones(fxlon.shape)*t for t in tt]),0,1),1,2)
 242
 243     return xx, yy, zz
 244
 245 def make_meshgrid(x, y, z, s=(50,50,50), exp=.1):
 246     """
 247     Create a mesh of points to evaluate the decision function
 248
 249     :param x: data to base x-axis meshgrid on
 250     :param y: data to base y-axis meshgrid on
 251     :param z: data to base z-axis meshgrid on
 252     :param s: tuple of number of nodes at each direction, optional
 253     :param exp: extra percentage of time steps in each direction (between 0 and 1), optional
 254     :return xx, yy, zz: ndarray
 255
 256     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 257     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 258     Modified version of:
 259     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 260     """
 261
 262     if not isinstance(s, tuple):
 263         print '>> FAILED <<'
 264         print 'The number of nodes at each direction is not a tuple: ', s
 265         sys.exit(1)
 266     # number of nodes in each direction
 267     sx, sy, sz = np.array(s).astype(int)
 268     # extra step sizes in each direction
 269     brx = int(sx * exp)
 270     bry = int(sy * exp)
 271     brz = int(sz * exp)
 272     # grid lengths in each directon
 273     lx = x.max() - x.min()
 274     ly = y.max() - y.min()
 275     lz = z.max() - z.min()
 276     # grid resolutions in each direction
 277     hx = lx / (sx - 2*brx - 1)
 278     hy = ly / (sy - 2*bry - 1)
 279     hz = lz / (sz - 2*brz - 1)
 280     # extrem values for each dimension
 281     x_min, x_max = x.min() - brx * hx, x.max() + brx * hx
 282     y_min, y_max = y.min() - bry * hy, y.max() + bry * hy
 283     z_min, z_max = z.min() - brz * hz, z.max() + brz * hz
 284     # generating the mesh grid
 285     xx, yy, zz = np.meshgrid(np.linspace(y_min, y_max, sy),
 286                              np.linspace(x_min, x_max, sx),
 287                              np.linspace(z_min, z_max, sz))
 288     return xx, yy, zz
 289
 290 def frontier(clf, xx, yy, zz, bal=.5, plot_decision = False, plot_poly=False, using_weights=False):
 291     """
 292     Compute the surface decision frontier for a classifier.
 293
 294     :param clf: a classifier
 295     :param xx: meshgrid ndarray
 296     :param yy: meshgrid ndarray
 297     :param zz: meshgrid ndarray
 298     :param bal: number between 0 and 1, balance between lower and upper bounds in decision function (in case not level 0)
 299     :param plot_decision: boolean of plotting decision volume
 300     :param plot_poly: boolean of plotting polynomial approximation
 301     :return F: 2D meshes with xx, yy coordinates and the hyperplane z which gives decision functon 0
 302
 303     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 304     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 305     Modified version of:
 306     https://www.semipol.de/2015/10/29/SVM-separating-hyperplane-3d-matplotlib.html
 307     """
 308
 309     # Creating the 3D grid
 310     XX = np.c_[np.ravel(xx), np.ravel(yy), np.ravel(zz)]
 311
 312     # Evaluating the decision function
 313     print '>> Evaluating the decision function...'
 314     sys.stdout.flush()
 315     t_1 = time()
 316     if using_weights:
 317         from libsvm_weights.python.svmutil import svm_predict
 318         _, _, p_vals = svm_predict([], XX, clf)
 319         ZZ = np.array([p[0] for p in p_vals])
 320     else:
 321         ZZ = clf.decision_function(XX)
 322     t_2 = time()
 323     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 324     hist = np.histogram(ZZ)
 325     print 'counts: ', hist[0]
 326     print 'values: ', hist[1]
 327     print 'decision function range: ', ZZ.min(), '~', ZZ.max()
 328
 329     # Reshaping decision function volume
 330     Z = ZZ.reshape(xx.shape)
 331     print 'decision function shape: ', Z.shape
 332     sl.save((xx,yy,zz,Z),'decision')
 333
 334     if plot_decision:
 335         try:
 336             from skimage import measure
 337             from shiftcmap import shiftedColorMap
 338             verts, faces, normals, values = measure.marching_cubes_lewiner(Z, level=0, allow_degenerate=False)
 339             # Scale and transform to actual size of the interesting volume
 340             h = np.divide([xx.max()-xx.min(), yy.max() - yy.min(), zz.max() - zz.min()],np.array(xx.shape)-1)
 341             verts = verts * h
 342             verts = verts + [xx.min(), yy.min(), zz.min()]
 343             mesh = Poly3DCollection(verts[faces], facecolor='orange', alpha=.9)
 344             fig = plt.figure()
 345             ax = fig.gca(projection='3d')
 346             fig.suptitle("Decision volume")
 347             col = [(0, 0, 1), (.5, .5, .5), (1, 0, 0)]
 348             cm = colors.LinearSegmentedColormap.from_list('BuRd',col,N=100)
 349             midpoint = 1 - ZZ.max() / (ZZ.max() + abs(ZZ.min()))
 350             shiftedcmap = shiftedColorMap(cm, midpoint=midpoint, name='shifted')
 351             X = np.ravel(xx)
 352             Y = np.ravel(yy)
 353             T = np.ravel(zz)
 354             kk = 50
 355             p = ax.scatter(X[0::kk], Y[0::kk], T[0::kk], c=ZZ[0::kk], s=.1, alpha=.4, cmap=shiftedcmap)
 356             cbar = fig.colorbar(p)
 357             cbar.set_label('decision function value', rotation=270, labelpad=20)
 358             ax.add_collection3d(mesh)
 359             ax.set_zlim([xx.min(),xx.max()])
 360             ax.set_ylim([yy.min(),yy.max()])
 361             ax.set_zlim([zz.min(),zz.max()])
 362             ax.set_xlabel("Longitude normalized")
 363             ax.set_ylabel("Latitude normalized")
 364             ax.set_zlabel("Time normalized")
 365             plt.savefig('decision.png')
 366         except Exception as e:
 367             print 'Warning: something went wrong when plotting...'
 368             print e
 369
 370     if plot_poly:
 371         fig = plt.figure()
 372     # Computing fire arrival time from previous decision function
 373     print '>> Computing fire arrival time...'
 374     sys.stdout.flush()
 375     t_1 = time()
 376     # xx 2-dimensional array
 377     Fx = xx[:, :, 0]
 378     # yy 2-dimensional array
 379     Fy = yy[:, :, 0]
 380     # zz 1-dimensional array
 381     zr = zz[0, 0]
 382     # Initializing fire arrival time
 383     Fz = np.zeros(Fx.shape)
 384     # For each x and y
 385     for k1 in range(Fx.shape[0]):
 386         for k2 in range(Fx.shape[1]):
 387             # Approximate the vertical decision function by a piecewise polynomial (cubic spline interpolation)
 388             pz = interpolate.CubicSpline(zr, Z[k1,k2])
 389             # Compute the real roots of the the piecewise polynomial
 390             rr = pz.roots()
 391             # Just take the real roots between min(zz) and max(zz)
 392             realr = rr.real[np.logical_and(abs(rr.imag) < 1e-5, np.logical_and(rr.real > zr.min(), rr.real < zr.max()))]
 393             if len(realr) > 0:
 394                 # Take the minimum root
 395                 Fz[k1,k2] = realr.min()
 396                 # Plotting the approximated polynomial with the decision function
 397                 if plot_poly:
 398                     try:
 399                         plt.ion()
 400                         plt.plot(pz(zr),zr)
 401                         plt.plot(Z[k1,k2],zr,'+')
 402                         plt.plot(np.zeros(len(realr)),realr,'o',c='g')
 403                         plt.plot(0,Fz[k1,k2],'o',markersize=3,c='r')
 404                         plt.title('Polynomial approximation of decision_function(%f,%f,z)' % (Fx[k1,k2],Fy[k1,k2]))
 405                         plt.xlabel('decision function value')
 406                         plt.ylabel('Z')
 407                         plt.legend(['polynomial','decision values','roots','fire arrival time'])
 408                         plt.xlim([Z.min(),Z.max()])
 409                         plt.ylim([zz.min(),zz.max()])
 410                         plt.show()
 411                         plt.pause(.001)
 412                         plt.cla()
 413                     except Exception as e:
 414                         print 'Warning: something went wrong when plotting...'
 415                         print e
 416             else:
 417                 # If there is not a real root of the polynomial between zz.min() and zz.max(), just define as a Nan
 418                 Fz[k1,k2] = np.nan
 419     t_2 = time()
 420     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 421     F = [Fx,Fy,Fz]
 422
 423     return F
 424
 425 def SVM3(X, y, C=1., kgam=1., search=False, norm=True, fire_grid=None, weights=None):
 426     """
 427     3D SuperVector Machine analysis and plot
 428
 429     :param X: Training vectors, where n_samples is the number of samples and n_features is the number of features.
 430     :param y: Target values
 431     :param C: Weight to not having outliers (argument of svm.SVC class), optional
 432     :param kgam: Scalar multiplier for gamma (capture more details increasing it)
 433     :param norm: Normalize the data in the interval (0,1) in all the directions, optional
 434     :param fire_grid: The longitud and latitude grid where to have the fire arrival time
 435     :return F: tuple with (longitude grid, latitude grid, fire arrival time grid)
 436
 437     Developed in Python 2.7.15 :: Anaconda 4.5.10, on MACINTOSH.
 438     Angel Farguell (angel.farguell@gmail.com), 2019-02-20
 439     Modified version of:
 440     https://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py
 441     """
 442
 443     t_init = time()
 444
 445     col = [(0, 1, 0), (1, 0, 0)]
 446     cm_GR = colors.LinearSegmentedColormap.from_list('GrRd',col,N=2)
 447     col = [(1, 0, 0), (.25, 0, 0)]
 448     cm_Rds = colors.LinearSegmentedColormap.from_list('Rds',col,N=100)
 449
 450     # Plot options
 451     # plot original data
 452     plot_data = False
 453     # plot scaled data with artificial data
 454     plot_scaled = False
 455     # plot decision volume
 456     plot_decision = False
 457     # plot polynomial approximation
 458     plot_poly = False
 459     # plot full hyperplane vs detections with support vectors
 460     plot_supports = False
 461     # plot resulting fire arrival time vs detections
 462     plot_result = False
 463
 464     # Other options
 465     # number of vertical nodes per observation
 466     vN = 1
 467     # if not Nans in the data are wanted (all Nans are going to be replaced by the maximum value)
 468     notnan = True
 469
 470     # Options better to not change
 471     # number of horizontal nodes per observation (it is used if fire_grid==None)
 472     hN = 5
 473     # creation of under artificial lower bounds in the pre-processing
 474     artil = True
 475     # if artil = True: resolution of artificial lower bounds vertical to the ground detections
 476     hartil = .2
 477     # creation of over artificial upper bounds in the pre-processing
 478     artiu = True
 479     # if artiu = True: resolution of artificial upper bounds vertical to the fire detections
 480     hartiu = .1
 481     # creation of an artifitial mesh of down lower bounds
 482     downarti = True
 483     # if downarti = True: below min of z direction for lower bound artifitial creation
 484     dminz = .1
 485     # if downarti = True: confidence level of the artificial lower bounds
 486     confal = 100.
 487     # creation of an artifitial mesh of top upper bounds
 488     toparti = False
 489     # if toparti = True: proportion over max of z direction for upper bound artifitial creation
 490     dmaxz = .1
 491     # if toparti = True: confidence level of the artificial upper bounds
 492     confau = 100.
 493
 494     # using different weights for the data
 495     if isinstance(C,(list,tuple,np.ndarray)):
 496         using_weights = True
 497         from libsvm_weights.python.svm import svm_problem, svm_parameter
 498         from libsvm_weights.python.svmutil import svm_train
 499         from sklearn.utils import compute_class_weight
 500     else:
 501         using_weights = False
 502
 503     # Data inputs
 504     X = np.array(X).astype(float)
 505     y = np.array(y)
 506
 507     # Original data
 508     oX = np.array(X).astype(float)
 509     oy = np.array(y)
 510
 511     # Visualization of the data
 512     X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 513     if plot_data:
 514         try:
 515             fig = plt.figure()
 516             ax = fig.gca(projection='3d')
 517             fig.suptitle("Plotting the original data to fit")
 518             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=20, edgecolors='k', vmin=y.min(), vmax=y.max())
 519             ax.set_xlabel("Longitude")
 520             ax.set_ylabel("Latitude")
 521             ax.set_zlabel("Time (days)")
 522             plt.savefig('original_data.png')
 523         except Exception as e:
 524             print 'Warning: something went wrong when plotting...'
 525             print e
 526
 527     # Normalization of the data into [0,1]^3
 528     if norm:
 529         xmin = X0.min()
 530         xlen = X0.max() - X0.min()
 531         x0 = np.divide(X0 - xmin, xlen)
 532         ymin = X1.min()
 533         ylen = X1.max() - X1.min()
 534         x1 = np.divide(X1 - ymin, ylen)
 535         zmin = X2.min()
 536         zlen = X2.max() - X2.min()
 537         x2 = np.divide(X2 - zmin, zlen)
 538         X0, X1, X2 = x0, x1, x2
 539         X[:, 0] = X0
 540         X[:, 1] = X1
 541         X[:, 2] = X2
 542
 543     # Creation of fire and ground artificial detections
 544     if artil or artiu or toparti or downarti:
 545         # Extreme values at z direction
 546         minz = X[:, 2].min()
 547         maxz = X[:, 2].max()
 548         # Division of lower and upper bounds for data and confidence level
 549         fl = X[y==np.unique(y)[0]]
 550         fu = X[y==np.unique(y)[1]]
 551
 552         # Artifitial extensions of the lower bounds
 553         if artil:
 554             # Create artificial lower bounds
 555             flz = np.array([ np.unique(np.append(np.arange(f[2],minz,-hartil),f[2])) for f in fl ])
 556             # Definition of new ground detections after artificial detections added
 557             Xg = np.concatenate([ np.c_[(np.repeat(fl[k][0],len(flz[k])),np.repeat(fl[k][1],len(flz[k])),flz[k])] for k in range(len(flz)) ])
 558             if using_weights:
 559                 cl = C[y==np.unique(y)[0]]
 560                 Cg = np.concatenate([ np.repeat(cl[k],len(flz[k])) for k in range(len(flz)) ])
 561         else:
 562             Xg = fl
 563             if using_weights:
 564                 cl = C[y==np.unique(y)[0]]
 565                 Cg = cl
 566
 567         # Artifitial extensions of the upper bounds
 568         if artiu:
 569             # Create artificial upper bounds
 570             fuz = np.array([ np.unique(np.append(np.arange(f[2],maxz,hartiu),f[2])) for f in fu ])
 571             # Definition of new fire detections after artificial detections added
 572             Xf = np.concatenate([ np.c_[(np.repeat(fu[k][0],len(fuz[k])),np.repeat(fu[k][1],len(fuz[k])),fuz[k])] for k in range(len(fuz)) ])
 573             # Define new confidence levels
 574             if using_weights:
 575                 cu = C[y==np.unique(y)[1]]
 576                 Cf = np.concatenate([ np.repeat(cu[k],len(fuz[k])) for k in range(len(fuz)) ])
 577         else:
 578             Xf = fu
 579             if using_weights:
 580                 cu = C[y==np.unique(y)[1]]
 581                 Cf = cu
 582
 583         # Bottom artificial lower bounds
 584         if downarti:
 585             # Creation of the x,y new mesh of artificial lower bounds
 586             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 587                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 588             # All the artificial new mesh are going to be below the data
 589             zng = np.repeat(minz-dminz,len(np.ravel(xn)))
 590             # Artifitial lower bounds
 591             Xga = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(zng)]
 592             # Definition of new ground detections after down artificial lower detections
 593             Xgn = np.concatenate((Xg,Xga))
 594             # Definition of new confidence level
 595             if using_weights:
 596                 Cga = np.ones(len(Xga))*confal
 597                 Cgn = np.concatenate((Cg,Cga))
 598         else:
 599             Xgn = Xg
 600             if using_weights:
 601                 Cgn = Cg
 602
 603         # Top artificial upper bounds
 604         if toparti:
 605             # Creation of the x,y new mesh of artificial upper bounds
 606             xn, yn = np.meshgrid(np.linspace(X[:, 0].min(), X[:, 0].max(), 20),
 607                                 np.linspace(X[:, 1].min(), X[:, 1].max(), 20))
 608             # All the artificial new mesh are going to be over the data
 609             znf = np.repeat(maxz+dmaxz,len(np.ravel(xn)))
 610             # Artifitial upper bounds
 611             Xfa = np.c_[np.ravel(xn),np.ravel(yn),np.ravel(znf)]
 612             # Definition of new fire detections after top artificial upper detections
 613             Xfn = np.concatenate((Xf,Xfa))
 614             # Definition of new confidence level
 615             if using_weights:
 616                 Cfa = np.ones(len(Xfa))*confau
 617                 Cfn = np.concatenate((Cf,Cfa))
 618         else:
 619             Xfn = Xf
 620             if using_weights:
 621                 Cfn = Cf
 622
 623         # New definition of the training vectors
 624         X = np.concatenate((Xgn, Xfn))
 625         # New definition of the target values
 626         y = np.concatenate((np.repeat(np.unique(y)[0],len(Xgn)),np.repeat(np.unique(y)[1],len(Xfn))))
 627         # New definition of the confidence level
 628         if using_weights:
 629             C = np.concatenate((Cgn, Cfn))
 630         # New definition of each feature vector
 631         X0, X1, X2 = X[:, 0], X[:, 1], X[:, 2]
 632
 633     # Printing number of samples and features
 634     n0 = (y==np.unique(y)[0]).sum().astype(float)
 635     n1 = (y==np.unique(y)[1]).sum().astype(float)
 636     n_samples, n_features = X.shape
 637     print 'n_samples =', n_samples
 638     print 'n_samples_{-1} =', int(n0)
 639     print 'n_samples_{+1} =', int(n1)
 640     print 'n_features =', n_features
 641
 642     # Visualization of scaled data
 643     if plot_scaled:
 644         try:
 645             fig = plt.figure()
 646             ax = fig.gca(projection='3d')
 647             fig.suptitle("Plotting the data scaled to fit")
 648             ax.scatter(X0, X1, X2, c=y, cmap=cm_GR, s=20, edgecolors='k', vmin=y.min(), vmax=y.max())
 649             ax.set_xlabel("Longitude normalized")
 650             ax.set_ylabel("Latitude normalized")
 651             ax.set_zlabel("Time normalized")
 652             plt.savefig('scaled_data.png')
 653         except Exception as e:
 654             print 'Warning: something went wrong when plotting...'
 655             print e
 656
 657     # Reescaling gamma to include more detailed results
 658     gamma = 1. / (n_features * X.std())
 659     print 'gamma =', gamma
 660
 661     # Creating the SVM model and fitting the data using Super Vector Machine technique
 662     print '>> Creating the SVM model...'
 663     sys.stdout.flush()
 664     if using_weights:
 665         gamma = kgam*gamma
 666         # Compute class balanced weights
 667         cls, _ = np.unique(y, return_inverse=True)
 668         class_weight = compute_class_weight("balanced", cls, y)
 669         prob = svm_problem(C,y,X)
 670         arg = '-g %.15g -w%01d %.15g -w%01d %.15g -m 1000 -h 0' % (gamma, cls[0], class_weight[0],
 671                                             cls[1], class_weight[1])
 672         param = svm_parameter(arg)
 673         print '>> Fitting the SVM model...'
 674         t_1 = time()
 675         clf = svm_train(prob,param)
 676         t_2 = time()
 677     else:
 678         t_1 = time()
 679         if search:
 680             print '>> Searching for best value of C and gamma...'
 681             # Grid Search
 682             # Parameter Grid
 683             param_grid = {'C': np.logspace(0,5,6), 'gamma': gamma*np.logspace(0,5,6)}
 684             # Make grid search classifier
 685             grid_search = GridSearchCV(svm.SVC(cache_size=2000,class_weight="balanced",probability=True), param_grid, n_jobs=-1, verbose=1, cv=5, iid=False)
 686             print '>> Fitting the SVM model...'
 687             # Train the classifier
 688             grid_search.fit(X, y)
 689             print "Best Parameters:\n", grid_search.best_params_
 690             clf = grid_search.best_estimator_
 691             print "Best Estimators:\n", clf
 692         else:
 693             gamma = kgam*gamma
 694             clf = svm.SVC(C=C, kernel="rbf", gamma=gamma, cache_size=2000, class_weight="balanced") # default kernel: exp(-gamma||x-x'||^2)
 695             print clf
 696             print '>> Fitting the SVM model...'
 697             # Fitting the data using Super Vector Machine technique
 698             clf.fit(X, y)
 699         t_2 = time()
 700     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 701
 702     if not using_weights:
 703         # Check if the classification failed
 704         if clf.fit_status_:
 705             print '>> FAILED <<'
 706             print 'Failed fitting the data'
 707             sys.exit(1)
 708         print 'number of support vectors: ', clf.n_support_
 709         print 'score of trained data: ', clf.score(X,y)
 710
 711     # Creating the mesh grid to evaluate the classification
 712     print '>> Creating mesh grid to evaluate the classification...'
 713     nnodes = np.ceil(np.power(n_samples,1./n_features))
 714     if fire_grid is None:
 715         # Number of necessary nodes
 716         hnodes = hN*nnodes
 717         vnodes = vN*nnodes
 718         print 'number of horizontal nodes (%d meshgrid nodes for each observation): %d' % (hN,hnodes)
 719         print 'number of vertical nodes (%d meshgrid nodes for each observation): %d' % (vN,vnodes)
 720         # Computing resolution of the mesh to evaluate
 721         sdim = (hnodes,hnodes,vnodes)
 722         print 'grid_size = %dx%dx%d = %d' % (sdim[0],sdim[1],sdim[2],np.prod(sdim))
 723         t_1 = time()
 724         xx, yy, zz = make_meshgrid(X0, X1, X2, s=sdim)
 725         t_2 = time()
 726     else:
 727         fxlon = np.divide(fire_grid[0] - xmin, xlen)
 728         fxlat = np.divide(fire_grid[1] - ymin, ylen)
 729         it = (X2.min(),X2.max())
 730         vnodes = vN*nnodes
 731         sdim = (fxlon.shape[0],fxlon.shape[1],vnodes)
 732         print 'fire_grid_size = %dx%dx%d = %d' % (sdim + (np.prod(sdim),))
 733         t_1 = time()
 734         xx, yy, zz = make_fire_mesh(fxlon, fxlat, it, sdim[2])
 735         t_2 = time()
 736         print 'grid_created = %dx%dx%d = %d' % (zz.shape + (np.prod(zz.shape),))
 737     print 'elapsed time: %ss.' % str(abs(t_2-t_1))
 738
 739     # Computing the 2D fire arrival time, F
 740     print '>> Computing the 2D fire arrival time, F...'
 741     sys.stdout.flush()
 742     F = frontier(clf, xx, yy, zz, plot_decision=plot_decision, plot_poly=plot_poly, using_weights=using_weights)
 743
 744     print '>> Creating final results...'
 745     sys.stdout.flush()
 746     # Plotting the Separating Hyperplane of the SVM classification with the support vectors
 747     if plot_supports:
 748         try:
 749             if using_weights:
 750                 supp_ind = np.sort(clf.get_sv_indices())-1
 751                 supp_vec = X[supp_ind]
 752             else:
 753                 supp_ind = clf.support_
 754                 supp_vec = clf.support_vectors_
 755             fig = plt.figure()
 756             ax = fig.gca(projection='3d')
 757             fig.suptitle("Plotting the 3D Separating Hyperplane of an SVM")
 758             # plotting the separating hyperplane
 759             ax.plot_wireframe(F[0], F[1], F[2], color='orange', alpha=.5)
 760             # computing the indeces where no support vectors
 761             rr = np.array(range(len(y)))
 762             ms = np.isin(rr,supp_ind)
 763             nsupp = rr[~ms]
 764             # plotting no-support vectors (smaller)
 765             ax.scatter(X0[nsupp], X1[nsupp], X2[nsupp], c=y[nsupp], cmap=cm_GR, s=.5, vmin=y.min(), vmax=y.max(), alpha=.1)
 766             # plotting support vectors (bigger)
 767             ax.scatter(supp_vec[:, 0], supp_vec[:, 1], supp_vec[:, 2], c=y[supp_ind], cmap=cm_GR, s=20, edgecolors='k', alpha=.2);
 768             ax.set_xlim(xx.min(),xx.max())
 769             ax.set_ylim(yy.min(),yy.max())
 770             ax.set_zlim(zz.min(),zz.max())
 771             ax.set_xlabel("Longitude normalized")
 772             ax.set_ylabel("Latitude normalized")
 773             ax.set_zlabel("Time normalized")
 774             plt.savefig('support.png')
 775         except Exception as e:
 776             print 'Warning: something went wrong when plotting...'
 777             print e
 778
 779     # Plot the fire arrival time resulting from the SVM classification normalized
 780     if plot_result:
 781         try:
 782             Fx, Fy, Fz = np.array(F[0]), np.array(F[1]), np.array(F[2])
 783             with np.errstate(invalid='ignore'):
 784                 Fz[Fz > X2.max()] = np.nan
 785             if notnan:
 786                 Fz[np.isnan(Fz)] = X2.max()
 787                 Fz = np.minimum(Fz, X2.max())
 788             fig = plt.figure()
 789             ax = fig.gca(projection='3d')
 790             fig.suptitle("Fire arrival time normalized")
 791             # plotting fire arrival time
 792             p = ax.plot_surface(Fx, Fy, Fz, cmap=cm_Rds,
 793                            linewidth=0, antialiased=False)
 794             ax.set_xlim(xx.min(),xx.max())
 795             ax.set_ylim(yy.min(),yy.max())
 796             ax.set_zlim(zz.min(),zz.max())
 797             cbar = fig.colorbar(p)
 798             cbar.set_label('Fire arrival time normalized', labelpad=20, rotation=270)
 799             ax.set_xlabel("Longitude normalized")
 800             ax.set_ylabel("Latitude normalized")
 801             ax.set_zlabel("Time normalized")
 802             plt.savefig('tign_g.png')
 803         except Exception as e:
 804             print 'Warning: something went wrong when plotting...'
 805             print e
 806
 807     # Translate the result again into initial data scale
 808     if norm:
 809         f0 = F[0] * xlen + xmin
 810         f1 = F[1] * ylen + ymin
 811         f2 = F[2] * zlen + zmin
 812         FF = [f0,f1,f2]
 813
 814     # Set all the larger values at the end to be the same maximum value
 815     oX0, oX1, oX2 = oX[:, 0], oX[:, 1], oX[:, 2]
 816     FFx, FFy, FFz = FF[0], FF[1], FF[2]
 817
 818     with np.errstate(invalid='ignore'):
 819         FFz[FFz > oX2.max()] = np.nan
 820
 821         if notnan:
 822             FFz[np.isnan(FFz)] = oX2.max()
 823             FFz = np.minimum(FFz, oX2.max())
 824     FF = [FFx,FFy,FFz]
 825
 826     # Plot the fire arrival time resulting from the SVM classification
 827     if plot_result:
 828         try:
 829             # Plotting the result
 830             fig = plt.figure()
 831             ax = fig.gca(projection='3d')
 832             fig.suptitle("Plotting the 3D graph function of a SVM")
 833             FFx, FFy, FFz = np.array(FF[0]), np.array(FF[1]), np.array(FF[2])
 834             # plotting original data
 835             ax.scatter(oX0, oX1, oX2, c=oy, cmap=cm_GR, s=2, vmin=y.min(), vmax=y.max())
 836             # plotting fire arrival time
 837             ax.plot_wireframe(FFx, FFy, FFz, color='orange', alpha=.5)
 838             ax.set_xlabel("Longitude")
 839             ax.set_ylabel("Latitude")
 840             ax.set_zlabel("Time (days)")
 841             plt.savefig('result.png')
 842         except Exception as e:
 843             print 'Warning: something went wrong when plotting...'
 844             print e
 845
 846     print '>> SUCCESS <<'
 847     t_final = time()
 848     print 'TOTAL elapsed time: %ss.' % str(abs(t_final-t_init))
 849     plt.close()
 850
 851     return FF
 852
 853
 854 if __name__ == "__main__":
 855     # Experiment to do
 856     exp = 1
 857     search = False
 858
 859     # Defining ground and fire detections
 860     def exp1():
 861         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0]]
 862         Xf = [[0, 0, 1], [1, 1, 0], [2, 2, 1], [2, 0, 1], [0, 2, 1]]
 863         C = np.concatenate((10.*np.ones(len(Xg)),100.*np.ones(len(Xf))))
 864         kgam = 1.
 865         return Xg, Xf, C, kgam
 866     def exp2():
 867         Xg = [[0, 0, 0], [2, 2, 0], [2, 0, 0], [0, 2, 0],
 868             [4, 2, 0], [4, 0, 0], [2, 1, .5], [0, 1, .5],
 869             [4, 1, .5], [2, 0, .5], [2, 2, .5]]
 870         Xf = [[0, 0, 1], [1, 1, 0.25], [2, 2, 1], [2, 0, 1], [0, 2, 1], [3, 1, 0.25], [4, 2, 1], [4, 0, 1]]
 871         C = np.concatenate((np.array([50.,50.,50.,50.,50.,50.,
 872                         1000.,100.,100.,100.,100.]), 100.*np.ones(len(Xf))))
 873         kgam = 5.
 874         return Xg, Xf, C, kgam
 875
 876     # Creating the options
 877     options = {1 : exp1, 2 : exp2}
 878
 879     # Defining the option depending on the experiment
 880     Xg, Xf, C, kgam = options[exp]()
 881
 882     # Creating the data necessary to run SVM3 function
 883     X = np.concatenate((Xg, Xf))
 884     y = np.concatenate((-np.ones(len(Xg)), np.ones(len(Xf))))
 885
 886     # Running SVM classification
 887     SVM3(X,y,C=C,kgam=kgam,search=search)