Add abhijeet.k@samsung.com to AUTHORS list.
[chromium-blink-merge.git] / components / dom_distiller / core / page_features.h
blob236796b542aecb860e3cb03311872183e50aec4f
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
8 #include <vector>
10 #include "base/values.h"
11 #include "url/gurl.h"
13 namespace dom_distiller {
15 // The length of the derived features vector.
16 extern int kDerivedFeaturesCount;
18 // The distillable page detector is a model trained on a list of numeric
19 // features derived from core more complex features of a webpage (like the
20 // body's .textContent). This derives the numeric features for a set of core
21 // features.
23 // Note: It is crucial that these features are derived in the same way and are
24 // in the same order as in the training pipeline. See //heuristics/distillable
25 // in the external DomDistillerJs repo.
26 std::vector<double> CalculateDerivedFeatures(bool isOGArticle,
27 const GURL& url,
28 double numElements,
29 double numAnchors,
30 double numForms,
31 const std::string& innerText,
32 const std::string& textContent,
33 const std::string& innerHTML);
35 // Calculates the derived features from the JSON value as returned by the
36 // javascript core feature extraction.
37 std::vector<double> CalculateDerivedFeaturesFromJSON(
38 const base::Value* stringified_json);
40 } // namespace dom_distiller
42 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_