1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 package dom_distiller.proto;
8 option optimize_for = LITE_RUNTIME;
9 option java_package = "com.dom_distiller.proto";
10 option java_outer_classname = "DomDistillerProtos";
12 message DistilledContent {
13 optional string html = 1;
16 message PaginationInfo {
17 optional string next_page = 1;
18 optional string prev_page = 2;
19 optional string canonical_page = 3;
22 message MarkupArticle {
23 optional string published_time = 1;
24 optional string modified_time = 2;
25 optional string expiration_time = 3;
26 optional string section = 4;
27 repeated string authors = 5;
31 optional string url = 1;
32 optional string secure_url = 2;
33 optional string type = 3;
34 optional string caption = 4;
35 optional int32 width = 5;
36 optional int32 height = 6;
40 optional string title = 1;
41 optional string type = 2;
42 optional string url = 3;
43 optional string description = 4;
44 optional string publisher = 5;
45 optional string copyright = 6;
46 optional string author = 7;
47 optional MarkupArticle article = 8;
48 repeated MarkupImage images = 9;
52 optional string name = 1;
53 optional double time = 2;
57 optional double markup_parsing_time = 1;
58 optional double document_construction_time = 2;
59 optional double article_processing_time = 3;
60 optional double formatting_time = 4;
61 optional double total_time = 5;
63 // A place to hold arbitrary breakdowns of time. The perf scoring/server
64 // should display these entries with appropriate names.
65 repeated TimingEntry other_times = 6;
69 optional string log = 1;
72 message StatisticsInfo {
73 optional int32 word_count = 1;
76 message DomDistillerResult {
77 optional string title = 1;
78 optional DistilledContent distilled_content = 2;
79 optional PaginationInfo pagination_info = 3;
80 optional MarkupInfo markup_info = 5;
81 optional TimingInfo timing_info = 6;
82 optional DebugInfo debug_info = 7;
83 optional StatisticsInfo statistics_info = 8;
84 optional string text_direction = 9;
86 // Represents an image found in the content of a page.
87 message ContentImage {
88 optional string url = 1;
91 repeated ContentImage content_images = 10;
94 message DomDistillerOptions {
95 // Whether to extract only the text (or to include the containing html).
96 optional bool extract_text_only = 1;
98 // How much debug output to dump to window.console.
100 // (1): Text Node data for each stage of processing
101 // (2): (1) and some node visibility information
102 // (3): (2) and extracted paging information
103 optional int32 debug_level = 2;
105 // The original domain of the page if it's a file://, used for detecting next/prev page links
106 // which expects the same domain for both current page and paging links.
107 optional string original_domain = 3;