Generate file attachment transactions for explicit Remarkup attachments on common...
[phabricator.git] / src / applications / files / document / PhabricatorJupyterDocumentEngine.php
blob20474b047b69386eb998046758e31fb21c507e52
1 <?php
3 final class PhabricatorJupyterDocumentEngine
4 extends PhabricatorDocumentEngine {
6 const ENGINEKEY = 'jupyter';
8 public function getViewAsLabel(PhabricatorDocumentRef $ref) {
9 return pht('View as Jupyter Notebook');
12 protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {
13 return 'fa-sun-o';
16 protected function getDocumentRenderingText(PhabricatorDocumentRef $ref) {
17 return pht('Rendering Jupyter Notebook...');
20 public function shouldRenderAsync(PhabricatorDocumentRef $ref) {
21 return true;
24 protected function getContentScore(PhabricatorDocumentRef $ref) {
25 $name = $ref->getName();
27 if (preg_match('/\\.ipynb\z/i', $name)) {
28 return 2000;
31 return 500;
34 protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {
35 return $ref->isProbablyJSON();
38 public function canDiffDocuments(
39 PhabricatorDocumentRef $uref = null,
40 PhabricatorDocumentRef $vref = null) {
41 return true;
44 public function newEngineBlocks(
45 PhabricatorDocumentRef $uref = null,
46 PhabricatorDocumentRef $vref = null) {
48 $blocks = new PhabricatorDocumentEngineBlocks();
50 try {
51 if ($uref) {
52 $u_blocks = $this->newDiffBlocks($uref);
53 } else {
54 $u_blocks = array();
57 if ($vref) {
58 $v_blocks = $this->newDiffBlocks($vref);
59 } else {
60 $v_blocks = array();
63 $blocks->addBlockList($uref, $u_blocks);
64 $blocks->addBlockList($vref, $v_blocks);
65 } catch (Exception $ex) {
66 phlog($ex);
67 $blocks->addMessage($ex->getMessage());
70 return $blocks;
73 public function newBlockDiffViews(
74 PhabricatorDocumentRef $uref,
75 PhabricatorDocumentEngineBlock $ublock,
76 PhabricatorDocumentRef $vref,
77 PhabricatorDocumentEngineBlock $vblock) {
79 $ucell = $ublock->getContent();
80 $vcell = $vblock->getContent();
82 $utype = idx($ucell, 'cell_type');
83 $vtype = idx($vcell, 'cell_type');
85 if ($utype === $vtype) {
86 switch ($utype) {
87 case 'markdown':
88 $usource = $this->readString($ucell, 'source');
89 $vsource = $this->readString($vcell, 'source');
91 $diff = id(new PhutilProseDifferenceEngine())
92 ->getDiff($usource, $vsource);
94 $u_content = $this->newProseDiffCell($diff, array('=', '-'));
95 $v_content = $this->newProseDiffCell($diff, array('=', '+'));
97 $u_content = $this->newJupyterCell(null, $u_content, null);
98 $v_content = $this->newJupyterCell(null, $v_content, null);
100 $u_content = $this->newCellContainer($u_content);
101 $v_content = $this->newCellContainer($v_content);
103 return id(new PhabricatorDocumentEngineBlockDiff())
104 ->setOldContent($u_content)
105 ->addOldClass('old')
106 ->setNewContent($v_content)
107 ->addNewClass('new');
108 case 'code/line':
109 $usource = idx($ucell, 'raw');
110 $vsource = idx($vcell, 'raw');
111 $udisplay = idx($ucell, 'display');
112 $vdisplay = idx($vcell, 'display');
114 $intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
115 $usource,
116 $vsource);
118 $u_segments = array();
119 foreach ($intraline_segments[0] as $u_segment) {
120 $u_segments[] = $u_segment;
123 $v_segments = array();
124 foreach ($intraline_segments[1] as $v_segment) {
125 $v_segments[] = $v_segment;
128 $usource = PhabricatorDifferenceEngine::applyIntralineDiff(
129 $udisplay,
130 $u_segments);
132 $vsource = PhabricatorDifferenceEngine::applyIntralineDiff(
133 $vdisplay,
134 $v_segments);
136 list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
137 list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
139 $classes = array(
140 'jupyter-cell-flush',
143 $u_content = $this->newJupyterCell($u_label, $u_content, $classes);
144 $v_content = $this->newJupyterCell($v_label, $v_content, $classes);
146 $u_content = $this->newCellContainer($u_content);
147 $v_content = $this->newCellContainer($v_content);
149 return id(new PhabricatorDocumentEngineBlockDiff())
150 ->setOldContent($u_content)
151 ->addOldClass('old')
152 ->setNewContent($v_content)
153 ->addNewClass('new');
157 return parent::newBlockDiffViews($uref, $ublock, $vref, $vblock);
160 public function newBlockContentView(
161 PhabricatorDocumentRef $ref,
162 PhabricatorDocumentEngineBlock $block) {
164 $viewer = $this->getViewer();
165 $cell = $block->getContent();
167 $cell_content = $this->renderJupyterCell($viewer, $cell);
169 return $this->newCellContainer($cell_content);
172 private function newCellContainer($cell_content) {
173 $notebook_table = phutil_tag(
174 'table',
175 array(
176 'class' => 'jupyter-notebook',
178 $cell_content);
180 $container = phutil_tag(
181 'div',
182 array(
183 'class' => 'document-engine-jupyter document-engine-diff',
185 $notebook_table);
187 return $container;
190 private function newProseDiffCell(PhutilProseDiff $diff, array $mask) {
191 $mask = array_fuse($mask);
193 $result = array();
194 foreach ($diff->getParts() as $part) {
195 $type = $part['type'];
196 $text = $part['text'];
198 if (!isset($mask[$type])) {
199 continue;
202 switch ($type) {
203 case '-':
204 $result[] = phutil_tag(
205 'span',
206 array(
207 'class' => 'bright',
209 $text);
210 break;
211 case '+':
212 $result[] = phutil_tag(
213 'span',
214 array(
215 'class' => 'bright',
217 $text);
218 break;
219 case '=':
220 $result[] = $text;
221 break;
225 return array(
226 null,
227 phutil_tag(
228 'div',
229 array(
230 'class' => 'jupyter-cell-markdown',
232 $result),
236 private function newDiffBlocks(PhabricatorDocumentRef $ref) {
237 $viewer = $this->getViewer();
238 $content = $ref->loadData();
240 $cells = $this->newCells($content, true);
242 $idx = 1;
243 $blocks = array();
244 foreach ($cells as $cell) {
245 // When the cell is a source code line, we can hash just the raw
246 // input rather than all the cell metadata.
248 switch (idx($cell, 'cell_type')) {
249 case 'code/line':
250 $hash_input = $cell['raw'];
251 break;
252 case 'markdown':
253 $hash_input = $this->readString($cell, 'source');
254 break;
255 default:
256 $hash_input = serialize($cell);
257 break;
260 $hash = PhabricatorHash::digestWithNamedKey(
261 $hash_input,
262 'document-engine.content-digest');
264 $blocks[] = id(new PhabricatorDocumentEngineBlock())
265 ->setBlockKey($idx)
266 ->setDifferenceHash($hash)
267 ->setContent($cell);
269 $idx++;
272 return $blocks;
275 protected function newDocumentContent(PhabricatorDocumentRef $ref) {
276 $viewer = $this->getViewer();
277 $content = $ref->loadData();
279 try {
280 $cells = $this->newCells($content, false);
281 } catch (Exception $ex) {
282 return $this->newMessage($ex->getMessage());
285 $rows = array();
286 foreach ($cells as $cell) {
287 $rows[] = $this->renderJupyterCell($viewer, $cell);
290 $notebook_table = phutil_tag(
291 'table',
292 array(
293 'class' => 'jupyter-notebook',
295 $rows);
297 $container = phutil_tag(
298 'div',
299 array(
300 'class' => 'document-engine-jupyter',
302 $notebook_table);
304 return $container;
307 private function newCells($content, $for_diff) {
308 try {
309 $data = phutil_json_decode($content);
310 } catch (PhutilJSONParserException $ex) {
311 throw new Exception(
312 pht(
313 'This is not a valid JSON document and can not be rendered as '.
314 'a Jupyter notebook: %s.',
315 $ex->getMessage()));
318 if (!is_array($data)) {
319 throw new Exception(
320 pht(
321 'This document does not encode a valid JSON object and can not '.
322 'be rendered as a Jupyter notebook.'));
325 $nbformat = idx($data, 'nbformat');
326 if (!strlen($nbformat)) {
327 throw new Exception(
328 pht(
329 'This document is missing an "nbformat" field. Jupyter notebooks '.
330 'must have this field.'));
333 if ($nbformat !== 4) {
334 throw new Exception(
335 pht(
336 'This Jupyter notebook uses an unsupported version of the file '.
337 'format (found version %s, expected version 4).',
338 $nbformat));
341 $cells = idx($data, 'cells');
342 if (!is_array($cells)) {
343 throw new Exception(
344 pht(
345 'This Jupyter notebook does not specify a list of "cells".'));
348 if (!$cells) {
349 throw new Exception(
350 pht(
351 'This Jupyter notebook does not specify any notebook cells.'));
354 if (!$for_diff) {
355 return $cells;
358 // If we're extracting cells to build a diff view, split code cells into
359 // individual lines and individual outputs. We want users to be able to
360 // add inline comments to each line and each output block.
362 $results = array();
363 foreach ($cells as $cell) {
364 $cell_type = idx($cell, 'cell_type');
365 if ($cell_type === 'markdown') {
366 $source = $this->readString($cell, 'source');
368 // Attempt to split contiguous blocks of markdown into smaller
369 // pieces.
371 $chunks = preg_split(
372 '/\n\n+/',
373 $source);
375 foreach ($chunks as $chunk) {
376 $result = $cell;
377 $result['source'] = array($chunk);
378 $results[] = $result;
381 continue;
384 if ($cell_type !== 'code') {
385 $results[] = $cell;
386 continue;
389 $label = $this->newCellLabel($cell);
391 $lines = $this->readStringList($cell, 'source');
392 $content = $this->highlightLines($lines);
394 $count = count($lines);
395 for ($ii = 0; $ii < $count; $ii++) {
396 $is_head = ($ii === 0);
397 $is_last = ($ii === ($count - 1));
399 if ($is_head) {
400 $line_label = $label;
401 } else {
402 $line_label = null;
405 $results[] = array(
406 'cell_type' => 'code/line',
407 'label' => $line_label,
408 'raw' => $lines[$ii],
409 'display' => idx($content, $ii),
410 'head' => $is_head,
411 'last' => $is_last,
415 $outputs = array();
416 $output_list = idx($cell, 'outputs');
417 if (is_array($output_list)) {
418 foreach ($output_list as $output) {
419 $results[] = array(
420 'cell_type' => 'code/output',
421 'output' => $output,
427 return $results;
431 private function renderJupyterCell(
432 PhabricatorUser $viewer,
433 array $cell) {
435 list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
437 $classes = null;
438 switch (idx($cell, 'cell_type')) {
439 case 'code/line':
440 $classes = 'jupyter-cell-flush';
441 break;
444 return $this->newJupyterCell(
445 $label,
446 $content,
447 $classes);
450 private function newJupyterCell($label, $content, $classes) {
451 $label_cell = phutil_tag(
452 'td',
453 array(
454 'class' => 'jupyter-label',
456 $label);
458 $content_cell = phutil_tag(
459 'td',
460 array(
461 'class' => $classes,
463 $content);
465 return phutil_tag(
466 'tr',
467 array(),
468 array(
469 $label_cell,
470 $content_cell,
474 private function renderJupyterCellContent(
475 PhabricatorUser $viewer,
476 array $cell) {
478 $cell_type = idx($cell, 'cell_type');
479 switch ($cell_type) {
480 case 'markdown':
481 return $this->newMarkdownCell($cell);
482 case 'code':
483 return $this->newCodeCell($cell);
484 case 'code/line':
485 return $this->newCodeLineCell($cell);
486 case 'code/output':
487 return $this->newCodeOutputCell($cell);
490 $json_content = id(new PhutilJSON())
491 ->encodeFormatted($cell);
493 return $this->newRawCell($json_content);
496 private function newRawCell($content) {
497 return array(
498 null,
499 phutil_tag(
500 'div',
501 array(
502 'class' => 'jupyter-cell-raw PhabricatorMonospaced',
504 $content),
508 private function newMarkdownCell(array $cell) {
509 $content = $this->readStringList($cell, 'source');
511 // TODO: This should ideally highlight as Markdown, but the "md"
512 // highlighter in Pygments is painfully slow and not terribly useful.
513 $content = $this->highlightLines($content, 'txt');
515 return array(
516 null,
517 phutil_tag(
518 'div',
519 array(
520 'class' => 'jupyter-cell-markdown',
522 $content),
526 private function newCodeCell(array $cell) {
527 $label = $this->newCellLabel($cell);
529 $content = $this->readStringList($cell, 'source');
530 $content = $this->highlightLines($content);
532 $outputs = array();
533 $output_list = idx($cell, 'outputs');
534 if (is_array($output_list)) {
535 foreach ($output_list as $output) {
536 $outputs[] = $this->newOutput($output);
540 return array(
541 $label,
542 array(
543 phutil_tag(
544 'div',
545 array(
546 'class' =>
547 'jupyter-cell-code jupyter-cell-code-block '.
548 'PhabricatorMonospaced remarkup-code',
550 array(
551 $content,
553 $outputs,
558 private function newCodeLineCell(array $cell, $content = null) {
559 $classes = array();
560 $classes[] = 'PhabricatorMonospaced';
561 $classes[] = 'remarkup-code';
562 $classes[] = 'jupyter-cell-code';
563 $classes[] = 'jupyter-cell-code-line';
565 if ($cell['head']) {
566 $classes[] = 'jupyter-cell-code-head';
569 if ($cell['last']) {
570 $classes[] = 'jupyter-cell-code-last';
573 $classes = implode(' ', $classes);
575 if ($content === null) {
576 $content = $cell['display'];
579 return array(
580 $cell['label'],
581 array(
582 phutil_tag(
583 'div',
584 array(
585 'class' => $classes,
587 array(
588 $content,
594 private function newCodeOutputCell(array $cell) {
595 return array(
596 null,
597 $this->newOutput($cell['output']),
601 private function newOutput(array $output) {
602 if (!is_array($output)) {
603 return pht('<Invalid Output>');
606 $classes = array(
607 'jupyter-output',
608 'PhabricatorMonospaced',
611 $output_name = idx($output, 'name');
612 switch ($output_name) {
613 case 'stderr':
614 $classes[] = 'jupyter-output-stderr';
615 break;
618 $output_type = idx($output, 'output_type');
619 switch ($output_type) {
620 case 'execute_result':
621 case 'display_data':
622 $data = idx($output, 'data');
624 $image_formats = array(
625 'image/png',
626 'image/jpeg',
627 'image/jpg',
628 'image/gif',
631 foreach ($image_formats as $image_format) {
632 if (!isset($data[$image_format])) {
633 continue;
636 $raw_data = $this->readString($data, $image_format);
638 $content = phutil_tag(
639 'img',
640 array(
641 'src' => 'data:'.$image_format.';base64,'.$raw_data,
644 break 2;
647 if (isset($data['text/html'])) {
648 $content = $data['text/html'];
649 $classes[] = 'jupyter-output-html';
650 break;
653 if (isset($data['application/javascript'])) {
654 $content = $data['application/javascript'];
655 $classes[] = 'jupyter-output-html';
656 break;
659 if (isset($data['text/plain'])) {
660 $content = $data['text/plain'];
661 break;
664 break;
665 case 'stream':
666 default:
667 $content = $this->readString($output, 'text');
668 break;
671 return phutil_tag(
672 'div',
673 array(
674 'class' => implode(' ', $classes),
676 $content);
679 private function newCellLabel(array $cell) {
680 $execution_count = idx($cell, 'execution_count');
681 if ($execution_count) {
682 $label = 'In ['.$execution_count.']:';
683 } else {
684 $label = null;
687 return $label;
690 private function highlightLines(array $lines, $force_language = null) {
691 if ($force_language === null) {
692 $head = head($lines);
693 $matches = null;
694 if (preg_match('/^%%(.*)$/', $head, $matches)) {
695 $restore = array_shift($lines);
696 $lang = $matches[1];
697 } else {
698 $restore = null;
699 $lang = 'py';
701 } else {
702 $restore = null;
703 $lang = $force_language;
706 $content = PhabricatorSyntaxHighlighter::highlightWithLanguage(
707 $lang,
708 implode('', $lines));
709 $content = phutil_split_lines($content);
711 if ($restore !== null) {
712 $language_tag = phutil_tag(
713 'span',
714 array(
715 'class' => 'language-tag',
717 $restore);
719 array_unshift($content, $language_tag);
722 return $content;
725 public function shouldSuggestEngine(PhabricatorDocumentRef $ref) {
726 return true;
729 private function readString(array $src, $key) {
730 $list = $this->readStringList($src, $key);
731 return implode('', $list);
734 private function readStringList(array $src, $key) {
735 $list = idx($src, $key);
737 if (is_array($list)) {
738 $list = $list;
739 } else if (is_string($list)) {
740 $list = array($list);
741 } else {
742 $list = array();
745 return $list;