Roll src/third_party/WebKit 3aea697:d9c6159 (svn 201973:201974)
[chromium-blink-merge.git] / components / dom_distiller / core / distillable_page_detector.h
blobfbe36bf88194503e28f26ae0406f49cfa8ef91e9
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLABLE_PAGE_DETECTOR_H_
6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLABLE_PAGE_DETECTOR_H_
8 #include <vector>
10 #include "base/memory/scoped_ptr.h"
11 #include "components/dom_distiller/core/proto/adaboost.pb.h"
13 namespace dom_distiller {
15 // DistillablePageDetector provides methods to identify whether or not a page is
16 // likely to be distillable based on a vector of derived features (see
17 // dom_distiller::CalculateDerivedFeatures). It uses a simple AdaBoost-trained
18 // model.
19 class DistillablePageDetector {
20 public:
21 static const DistillablePageDetector* GetDefault();
22 explicit DistillablePageDetector(scoped_ptr<AdaBoostProto> proto);
23 ~DistillablePageDetector();
25 // Returns true if the model classifies the vector of features as a
26 // distillable page.
27 bool Classify(const std::vector<double>& features) const;
29 double Score(const std::vector<double>& features) const;
30 double GetThreshold() const;
31 private:
32 scoped_ptr<AdaBoostProto> proto_;
33 double threshold_;
34 DISALLOW_COPY_AND_ASSIGN(DistillablePageDetector);
38 } // namespace dom_distiller
40 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLABLE_PAGE_DETECTOR_H_