Roll src/third_party/WebKit 3aea697:d9c6159 (svn 201973:201974)
[chromium-blink-merge.git] / pdf / pdfium / pdfium_page.cc
blob93e764564f34d57a7f10e8ef9261c62438b2d91b
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "pdf/pdfium/pdfium_page.h"
7 #include <math.h>
9 #include "base/logging.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
15 #include "pdf/pdfium/pdfium_engine.h"
17 // Used when doing hit detection.
18 #define kTolerance 20.0
20 namespace {
22 // Dictionary Value key names for returning the accessible page content as JSON.
23 const char kPageWidth[] = "width";
24 const char kPageHeight[] = "height";
25 const char kPageTextBox[] = "textBox";
26 const char kTextBoxLeft[] = "left";
27 const char kTextBoxTop[] = "top";
28 const char kTextBoxWidth[] = "width";
29 const char kTextBoxHeight[] = "height";
30 const char kTextBoxFontSize[] = "fontSize";
31 const char kTextBoxNodes[] = "textNodes";
32 const char kTextNodeType[] = "type";
33 const char kTextNodeText[] = "text";
34 const char kTextNodeURL[] = "url";
35 const char kTextNodeTypeText[] = "text";
36 const char kTextNodeTypeURL[] = "url";
37 const char kDocLinkURLPrefix[] = "#page";
39 } // namespace
41 namespace chrome_pdf {
43 PDFiumPage::PDFiumPage(PDFiumEngine* engine,
44 int i,
45 const pp::Rect& r,
46 bool available)
47 : engine_(engine),
48 page_(NULL),
49 text_page_(NULL),
50 index_(i),
51 loading_count_(0),
52 rect_(r),
53 calculated_links_(false),
54 available_(available) {
57 PDFiumPage::~PDFiumPage() {
58 DCHECK_EQ(0, loading_count_);
61 void PDFiumPage::Unload() {
62 // Do not unload while in the middle of a load.
63 if (loading_count_)
64 return;
66 if (text_page_) {
67 FPDFText_ClosePage(text_page_);
68 text_page_ = NULL;
71 if (page_) {
72 if (engine_->form()) {
73 FORM_OnBeforeClosePage(page_, engine_->form());
75 FPDF_ClosePage(page_);
76 page_ = NULL;
80 FPDF_PAGE PDFiumPage::GetPage() {
81 ScopedUnsupportedFeature scoped_unsupported_feature(engine_);
82 if (!available_)
83 return NULL;
84 if (!page_) {
85 ScopedLoadCounter scoped_load(this);
86 page_ = FPDF_LoadPage(engine_->doc(), index_);
87 if (page_ && engine_->form()) {
88 FORM_OnAfterLoadPage(page_, engine_->form());
91 return page_;
94 FPDF_PAGE PDFiumPage::GetPrintPage() {
95 ScopedUnsupportedFeature scoped_unsupported_feature(engine_);
96 if (!available_)
97 return NULL;
98 if (!page_) {
99 ScopedLoadCounter scoped_load(this);
100 page_ = FPDF_LoadPage(engine_->doc(), index_);
102 return page_;
105 void PDFiumPage::ClosePrintPage() {
106 // Do not close |page_| while in the middle of a load.
107 if (loading_count_)
108 return;
110 if (page_) {
111 FPDF_ClosePage(page_);
112 page_ = NULL;
116 FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
117 if (!available_)
118 return NULL;
119 if (!text_page_) {
120 ScopedLoadCounter scoped_load(this);
121 text_page_ = FPDFText_LoadPage(GetPage());
123 return text_page_;
126 base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) {
127 base::DictionaryValue* node = new base::DictionaryValue();
129 if (!available_)
130 return node;
132 double width = FPDF_GetPageWidth(GetPage());
133 double height = FPDF_GetPageHeight(GetPage());
135 base::ListValue* text = new base::ListValue();
136 int box_count = FPDFText_CountRects(GetTextPage(), 0, GetCharCount());
137 for (int i = 0; i < box_count; i++) {
138 double left, top, right, bottom;
139 FPDFText_GetRect(GetTextPage(), i, &left, &top, &right, &bottom);
140 text->Append(
141 GetTextBoxAsValue(height, left, top, right, bottom, rotation));
144 node->SetDouble(kPageWidth, width);
145 node->SetDouble(kPageHeight, height);
146 node->Set(kPageTextBox, text); // Takes ownership of |text|
148 return node;
151 base::Value* PDFiumPage::GetTextBoxAsValue(double page_height,
152 double left, double top,
153 double right, double bottom,
154 int rotation) {
155 base::string16 text_utf16;
156 int char_count =
157 FPDFText_GetBoundedText(GetTextPage(), left, top, right, bottom, NULL, 0);
158 if (char_count > 0) {
159 unsigned short* data = reinterpret_cast<unsigned short*>(
160 base::WriteInto(&text_utf16, char_count + 1));
161 FPDFText_GetBoundedText(GetTextPage(),
162 left, top, right, bottom,
163 data, char_count);
165 std::string text_utf8 = base::UTF16ToUTF8(text_utf16);
167 FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), left, top);
168 Area area;
169 std::vector<LinkTarget> targets;
170 if (link) {
171 targets.push_back(LinkTarget());
172 area = GetLinkTarget(link, &targets[0]);
173 } else {
174 pp::Rect rect(
175 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, rotation));
176 GetLinks(rect, &targets);
177 area = targets.size() == 0 ? TEXT_AREA : WEBLINK_AREA;
180 int char_index = FPDFText_GetCharIndexAtPos(GetTextPage(), left, top,
181 kTolerance, kTolerance);
182 double font_size = FPDFText_GetFontSize(GetTextPage(), char_index);
184 base::DictionaryValue* node = new base::DictionaryValue();
185 node->SetDouble(kTextBoxLeft, left);
186 node->SetDouble(kTextBoxTop, page_height - top);
187 node->SetDouble(kTextBoxWidth, right - left);
188 node->SetDouble(kTextBoxHeight, top - bottom);
189 node->SetDouble(kTextBoxFontSize, font_size);
191 base::ListValue* text_nodes = new base::ListValue();
193 if (area == DOCLINK_AREA) {
194 std::string url = kDocLinkURLPrefix + base::IntToString(targets[0].page);
195 text_nodes->Append(CreateURLNode(text_utf8, url));
196 } else if (area == WEBLINK_AREA && link) {
197 text_nodes->Append(CreateURLNode(text_utf8, targets[0].url));
198 } else if (area == WEBLINK_AREA && !link) {
199 size_t start = 0;
200 for (size_t i = 0; i < targets.size(); ++i) {
201 // If there is an extra NULL character at end, find() will not return any
202 // matches. There should not be any though.
203 if (!targets[i].url.empty())
204 DCHECK(targets[i].url[targets[i].url.size() - 1] != '\0');
206 // PDFium may change the case of generated links.
207 std::string lowerCaseURL = base::ToLowerASCII(targets[i].url);
208 std::string lowerCaseText = base::ToLowerASCII(text_utf8);
209 size_t pos = lowerCaseText.find(lowerCaseURL, start);
210 size_t length = targets[i].url.size();
211 if (pos == std::string::npos) {
212 // Check if the link is a "mailto:" URL
213 if (lowerCaseURL.compare(0, 7, "mailto:") == 0) {
214 pos = lowerCaseText.find(lowerCaseURL.substr(7), start);
215 length -= 7;
218 if (pos == std::string::npos) {
219 // No match has been found. This should never happen.
220 continue;
224 std::string before_text = text_utf8.substr(start, pos - start);
225 if (before_text.size() > 0)
226 text_nodes->Append(CreateTextNode(before_text));
227 std::string link_text = text_utf8.substr(pos, length);
228 text_nodes->Append(CreateURLNode(link_text, targets[i].url));
230 start = pos + length;
232 std::string before_text = text_utf8.substr(start);
233 if (before_text.size() > 0)
234 text_nodes->Append(CreateTextNode(before_text));
235 } else {
236 text_nodes->Append(CreateTextNode(text_utf8));
239 node->Set(kTextBoxNodes, text_nodes); // Takes ownership of |text_nodes|.
240 return node;
243 base::Value* PDFiumPage::CreateTextNode(std::string text) {
244 base::DictionaryValue* node = new base::DictionaryValue();
245 node->SetString(kTextNodeType, kTextNodeTypeText);
246 node->SetString(kTextNodeText, text);
247 return node;
250 base::Value* PDFiumPage::CreateURLNode(std::string text, std::string url) {
251 base::DictionaryValue* node = new base::DictionaryValue();
252 node->SetString(kTextNodeType, kTextNodeTypeURL);
253 node->SetString(kTextNodeText, text);
254 node->SetString(kTextNodeURL, url);
255 return node;
258 PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point,
259 int rotation,
260 int* char_index,
261 int* form_type,
262 LinkTarget* target) {
263 if (!available_)
264 return NONSELECTABLE_AREA;
265 pp::Point point2 = point - rect_.point();
266 double new_x;
267 double new_y;
268 FPDF_DeviceToPage(GetPage(), 0, 0, rect_.width(), rect_.height(),
269 rotation, point2.x(), point2.y(), &new_x, &new_y);
271 int rv = FPDFText_GetCharIndexAtPos(
272 GetTextPage(), new_x, new_y, kTolerance, kTolerance);
273 *char_index = rv;
275 FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), new_x, new_y);
276 int control =
277 FPDPage_HasFormFieldAtPoint(engine_->form(), GetPage(), new_x, new_y);
279 // If there is a control and link at the same point, figure out their z-order
280 // to determine which is on top.
281 if (link && control > FPDF_FORMFIELD_UNKNOWN) {
282 int control_z_order = FPDFPage_FormFieldZOrderAtPoint(
283 engine_->form(), GetPage(), new_x, new_y);
284 int link_z_order = FPDFLink_GetLinkZOrderAtPoint(GetPage(), new_x, new_y);
285 DCHECK_NE(control_z_order, link_z_order);
286 if (control_z_order > link_z_order) {
287 *form_type = control;
288 return PDFiumPage::NONSELECTABLE_AREA;
291 // We don't handle all possible link types of the PDF. For example,
292 // launch actions, cross-document links, etc.
293 // In that case, GetLinkTarget() will return NONSELECTABLE_AREA
294 // and we should proceed with area detection.
295 PDFiumPage::Area area = GetLinkTarget(link, target);
296 if (area != PDFiumPage::NONSELECTABLE_AREA)
297 return area;
298 } else if (link) {
299 // We don't handle all possible link types of the PDF. For example,
300 // launch actions, cross-document links, etc.
301 // See identical block above.
302 PDFiumPage::Area area = GetLinkTarget(link, target);
303 if (area != PDFiumPage::NONSELECTABLE_AREA)
304 return area;
305 } else if (control > FPDF_FORMFIELD_UNKNOWN) {
306 *form_type = control;
307 return PDFiumPage::NONSELECTABLE_AREA;
310 if (rv < 0)
311 return NONSELECTABLE_AREA;
313 return GetLink(*char_index, target) != -1 ? WEBLINK_AREA : TEXT_AREA;
316 base::char16 PDFiumPage::GetCharAtIndex(int index) {
317 if (!available_)
318 return L'\0';
319 return static_cast<base::char16>(FPDFText_GetUnicode(GetTextPage(), index));
322 int PDFiumPage::GetCharCount() {
323 if (!available_)
324 return 0;
325 return FPDFText_CountChars(GetTextPage());
328 PDFiumPage::Area PDFiumPage::GetLinkTarget(
329 FPDF_LINK link, PDFiumPage::LinkTarget* target) {
330 FPDF_DEST dest = FPDFLink_GetDest(engine_->doc(), link);
331 if (dest != NULL)
332 return GetDestinationTarget(dest, target);
334 FPDF_ACTION action = FPDFLink_GetAction(link);
335 if (action) {
336 switch (FPDFAction_GetType(action)) {
337 case PDFACTION_GOTO: {
338 FPDF_DEST dest = FPDFAction_GetDest(engine_->doc(), action);
339 if (dest)
340 return GetDestinationTarget(dest, target);
341 // TODO(gene): We don't fully support all types of the in-document
342 // links. Need to implement that. There is a bug to track that:
343 // http://code.google.com/p/chromium/issues/detail?id=55776
344 } break;
345 case PDFACTION_URI: {
346 if (target) {
347 size_t buffer_size =
348 FPDFAction_GetURIPath(engine_->doc(), action, NULL, 0);
349 if (buffer_size > 0) {
350 PDFiumAPIStringBufferAdapter<std::string> api_string_adapter(
351 &target->url, buffer_size, true);
352 void* data = api_string_adapter.GetData();
353 size_t bytes_written = FPDFAction_GetURIPath(
354 engine_->doc(), action, data, buffer_size);
355 api_string_adapter.Close(bytes_written);
358 return WEBLINK_AREA;
359 } break;
360 // TODO(gene): We don't support PDFACTION_REMOTEGOTO and PDFACTION_LAUNCH
361 // at the moment.
365 return NONSELECTABLE_AREA;
368 PDFiumPage::Area PDFiumPage::GetDestinationTarget(
369 FPDF_DEST destination, PDFiumPage::LinkTarget* target) {
370 int page_index = FPDFDest_GetPageIndex(engine_->doc(), destination);
371 if (target) {
372 target->page = page_index;
374 return DOCLINK_AREA;
377 int PDFiumPage::GetLink(int char_index, PDFiumPage::LinkTarget* target) {
378 if (!available_)
379 return -1;
381 CalculateLinks();
383 // Get the bounding box of the rect again, since it might have moved because
384 // of the tolerance above.
385 double left, right, bottom, top;
386 FPDFText_GetCharBox(GetTextPage(), char_index, &left, &right, &bottom, &top);
388 pp::Point origin(
389 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0).point());
390 for (size_t i = 0; i < links_.size(); ++i) {
391 for (size_t j = 0; j < links_[i].rects.size(); ++j) {
392 if (links_[i].rects[j].Contains(origin)) {
393 if (target)
394 target->url = links_[i].url;
395 return i;
399 return -1;
402 std::vector<int> PDFiumPage::GetLinks(pp::Rect text_area,
403 std::vector<LinkTarget>* targets) {
404 if (!available_)
405 return std::vector<int>();
407 CalculateLinks();
409 std::vector<int> links;
411 for (size_t i = 0; i < links_.size(); ++i) {
412 for (size_t j = 0; j < links_[i].rects.size(); ++j) {
413 if (links_[i].rects[j].Intersects(text_area)) {
414 if (targets) {
415 LinkTarget target;
416 target.url = links_[i].url;
417 targets->push_back(target);
419 links.push_back(i);
423 return links;
426 void PDFiumPage::CalculateLinks() {
427 if (calculated_links_)
428 return;
430 calculated_links_ = true;
431 FPDF_PAGELINK links = FPDFLink_LoadWebLinks(GetTextPage());
432 int count = FPDFLink_CountWebLinks(links);
433 for (int i = 0; i < count; ++i) {
434 base::string16 url;
435 int url_length = FPDFLink_GetURL(links, i, NULL, 0);
436 if (url_length > 0) {
437 PDFiumAPIStringBufferAdapter<base::string16> api_string_adapter(
438 &url, url_length, true);
439 unsigned short* data =
440 reinterpret_cast<unsigned short*>(api_string_adapter.GetData());
441 int actual_length = FPDFLink_GetURL(links, i, data, url_length);
442 api_string_adapter.Close(actual_length);
444 Link link;
445 link.url = base::UTF16ToUTF8(url);
447 // If the link cannot be converted to a pp::Var, then it is not possible to
448 // pass it to JS. In this case, ignore the link like other PDF viewers.
449 // See http://crbug.com/312882 for an example.
450 pp::Var link_var(link.url);
451 if (!link_var.is_string())
452 continue;
454 // Make sure all the characters in the URL are valid per RFC 1738.
455 // http://crbug.com/340326 has a sample bad PDF.
456 // GURL does not work correctly, e.g. it just strips \t \r \n.
457 bool is_invalid_url = false;
458 for (size_t j = 0; j < link.url.length(); ++j) {
459 // Control characters are not allowed.
460 // 0x7F is also a control character.
461 // 0x80 and above are not in US-ASCII.
462 if (link.url[j] < ' ' || link.url[j] >= '\x7F') {
463 is_invalid_url = true;
464 break;
467 if (is_invalid_url)
468 continue;
470 int rect_count = FPDFLink_CountRects(links, i);
471 for (int j = 0; j < rect_count; ++j) {
472 double left, top, right, bottom;
473 FPDFLink_GetRect(links, i, j, &left, &top, &right, &bottom);
474 link.rects.push_back(
475 PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0));
477 links_.push_back(link);
479 FPDFLink_CloseWebLinks(links);
482 pp::Rect PDFiumPage::PageToScreen(const pp::Point& offset,
483 double zoom,
484 double left,
485 double top,
486 double right,
487 double bottom,
488 int rotation) {
489 if (!available_)
490 return pp::Rect();
492 int new_left, new_top, new_right, new_bottom;
493 FPDF_PageToDevice(
494 page_,
495 static_cast<int>((rect_.x() - offset.x()) * zoom),
496 static_cast<int>((rect_.y() - offset.y()) * zoom),
497 static_cast<int>(ceil(rect_.width() * zoom)),
498 static_cast<int>(ceil(rect_.height() * zoom)),
499 rotation, left, top, &new_left, &new_top);
500 FPDF_PageToDevice(
501 page_,
502 static_cast<int>((rect_.x() - offset.x()) * zoom),
503 static_cast<int>((rect_.y() - offset.y()) * zoom),
504 static_cast<int>(ceil(rect_.width() * zoom)),
505 static_cast<int>(ceil(rect_.height() * zoom)),
506 rotation, right, bottom, &new_right, &new_bottom);
508 // If the PDF is rotated, the horizontal/vertical coordinates could be
509 // flipped. See
510 // http://www.netl.doe.gov/publications/proceedings/03/ubc/presentations/Goeckner-pres.pdf
511 if (new_right < new_left)
512 std::swap(new_right, new_left);
513 if (new_bottom < new_top)
514 std::swap(new_bottom, new_top);
516 return pp::Rect(
517 new_left, new_top, new_right - new_left + 1, new_bottom - new_top + 1);
520 PDFiumPage::ScopedLoadCounter::ScopedLoadCounter(PDFiumPage* page)
521 : page_(page) {
522 page_->loading_count_++;
525 PDFiumPage::ScopedLoadCounter::~ScopedLoadCounter() {
526 page_->loading_count_--;
529 PDFiumPage::Link::Link() {
532 PDFiumPage::Link::~Link() {
535 } // namespace chrome_pdf