1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_
10 #include "base/values.h"
13 namespace dom_distiller
{
15 // The distillable page detector is a model trained on a list of numeric
16 // features derived from core more complex features of a webpage (like the
17 // body's .textContent). This derives the numeric features for a set of core
20 // Note: It is crucial that these features are derived in the same way and are
21 // in the same order as in the training pipeline. See //heuristics/distillable
22 // in the external DomDistillerJs repo.
23 std::vector
<double> CalculateDerivedFeatures(bool isOGArticle
,
28 const std::string
& innerText
,
29 const std::string
& textContent
,
30 const std::string
& innerHTML
);
32 // Calculates the derived features from the JSON value as returned by the
33 // javascript core feature extraction.
34 std::vector
<double> CalculateDerivedFeaturesFromJSON(const base::Value
* json
);
36 } // namespace dom_distiller
38 #endif // COMPONENTS_DOM_DISTILLER_CORE_PAGE_FEATURES_H_