3 final class PhabricatorJupyterDocumentEngine
4 extends PhabricatorDocumentEngine
{
6 const ENGINEKEY
= 'jupyter';
8 public function getViewAsLabel(PhabricatorDocumentRef
$ref) {
9 return pht('View as Jupyter Notebook');
12 protected function getDocumentIconIcon(PhabricatorDocumentRef
$ref) {
16 protected function getDocumentRenderingText(PhabricatorDocumentRef
$ref) {
17 return pht('Rendering Jupyter Notebook...');
20 public function shouldRenderAsync(PhabricatorDocumentRef
$ref) {
24 protected function getContentScore(PhabricatorDocumentRef
$ref) {
25 $name = $ref->getName();
27 if (preg_match('/\\.ipynb\z/i', $name)) {
34 protected function canRenderDocumentType(PhabricatorDocumentRef
$ref) {
35 return $ref->isProbablyJSON();
38 public function canDiffDocuments(
39 PhabricatorDocumentRef
$uref = null,
40 PhabricatorDocumentRef
$vref = null) {
44 public function newEngineBlocks(
45 PhabricatorDocumentRef
$uref = null,
46 PhabricatorDocumentRef
$vref = null) {
48 $blocks = new PhabricatorDocumentEngineBlocks();
52 $u_blocks = $this->newDiffBlocks($uref);
58 $v_blocks = $this->newDiffBlocks($vref);
63 $blocks->addBlockList($uref, $u_blocks);
64 $blocks->addBlockList($vref, $v_blocks);
65 } catch (Exception
$ex) {
67 $blocks->addMessage($ex->getMessage());
73 public function newBlockDiffViews(
74 PhabricatorDocumentRef
$uref,
75 PhabricatorDocumentEngineBlock
$ublock,
76 PhabricatorDocumentRef
$vref,
77 PhabricatorDocumentEngineBlock
$vblock) {
79 $ucell = $ublock->getContent();
80 $vcell = $vblock->getContent();
82 $utype = idx($ucell, 'cell_type');
83 $vtype = idx($vcell, 'cell_type');
85 if ($utype === $vtype) {
88 $usource = $this->readString($ucell, 'source');
89 $vsource = $this->readString($vcell, 'source');
91 $diff = id(new PhutilProseDifferenceEngine())
92 ->getDiff($usource, $vsource);
94 $u_content = $this->newProseDiffCell($diff, array('=', '-'));
95 $v_content = $this->newProseDiffCell($diff, array('=', '+'));
97 $u_content = $this->newJupyterCell(null, $u_content, null);
98 $v_content = $this->newJupyterCell(null, $v_content, null);
100 $u_content = $this->newCellContainer($u_content);
101 $v_content = $this->newCellContainer($v_content);
103 return id(new PhabricatorDocumentEngineBlockDiff())
104 ->setOldContent($u_content)
106 ->setNewContent($v_content)
107 ->addNewClass('new');
109 $usource = idx($ucell, 'raw');
110 $vsource = idx($vcell, 'raw');
111 $udisplay = idx($ucell, 'display');
112 $vdisplay = idx($vcell, 'display');
114 $intraline_segments = ArcanistDiffUtils
::generateIntralineDiff(
118 $u_segments = array();
119 foreach ($intraline_segments[0] as $u_segment) {
120 $u_segments[] = $u_segment;
123 $v_segments = array();
124 foreach ($intraline_segments[1] as $v_segment) {
125 $v_segments[] = $v_segment;
128 $usource = PhabricatorDifferenceEngine
::applyIntralineDiff(
132 $vsource = PhabricatorDifferenceEngine
::applyIntralineDiff(
136 list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
137 list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
140 'jupyter-cell-flush',
143 $u_content = $this->newJupyterCell($u_label, $u_content, $classes);
144 $v_content = $this->newJupyterCell($v_label, $v_content, $classes);
146 $u_content = $this->newCellContainer($u_content);
147 $v_content = $this->newCellContainer($v_content);
149 return id(new PhabricatorDocumentEngineBlockDiff())
150 ->setOldContent($u_content)
152 ->setNewContent($v_content)
153 ->addNewClass('new');
157 return parent
::newBlockDiffViews($uref, $ublock, $vref, $vblock);
160 public function newBlockContentView(
161 PhabricatorDocumentRef
$ref,
162 PhabricatorDocumentEngineBlock
$block) {
164 $viewer = $this->getViewer();
165 $cell = $block->getContent();
167 $cell_content = $this->renderJupyterCell($viewer, $cell);
169 return $this->newCellContainer($cell_content);
172 private function newCellContainer($cell_content) {
173 $notebook_table = phutil_tag(
176 'class' => 'jupyter-notebook',
180 $container = phutil_tag(
183 'class' => 'document-engine-jupyter document-engine-diff',
190 private function newProseDiffCell(PhutilProseDiff
$diff, array $mask) {
191 $mask = array_fuse($mask);
194 foreach ($diff->getParts() as $part) {
195 $type = $part['type'];
196 $text = $part['text'];
198 if (!isset($mask[$type])) {
204 $result[] = phutil_tag(
212 $result[] = phutil_tag(
230 'class' => 'jupyter-cell-markdown',
236 private function newDiffBlocks(PhabricatorDocumentRef
$ref) {
237 $viewer = $this->getViewer();
238 $content = $ref->loadData();
240 $cells = $this->newCells($content, true);
244 foreach ($cells as $cell) {
245 // When the cell is a source code line, we can hash just the raw
246 // input rather than all the cell metadata.
248 switch (idx($cell, 'cell_type')) {
250 $hash_input = $cell['raw'];
253 $hash_input = $this->readString($cell, 'source');
256 $hash_input = serialize($cell);
260 $hash = PhabricatorHash
::digestWithNamedKey(
262 'document-engine.content-digest');
264 $blocks[] = id(new PhabricatorDocumentEngineBlock())
266 ->setDifferenceHash($hash)
275 protected function newDocumentContent(PhabricatorDocumentRef
$ref) {
276 $viewer = $this->getViewer();
277 $content = $ref->loadData();
280 $cells = $this->newCells($content, false);
281 } catch (Exception
$ex) {
282 return $this->newMessage($ex->getMessage());
286 foreach ($cells as $cell) {
287 $rows[] = $this->renderJupyterCell($viewer, $cell);
290 $notebook_table = phutil_tag(
293 'class' => 'jupyter-notebook',
297 $container = phutil_tag(
300 'class' => 'document-engine-jupyter',
307 private function newCells($content, $for_diff) {
309 $data = phutil_json_decode($content);
310 } catch (PhutilJSONParserException
$ex) {
313 'This is not a valid JSON document and can not be rendered as '.
314 'a Jupyter notebook: %s.',
318 if (!is_array($data)) {
321 'This document does not encode a valid JSON object and can not '.
322 'be rendered as a Jupyter notebook.'));
325 $nbformat = idx($data, 'nbformat');
326 if (!strlen($nbformat)) {
329 'This document is missing an "nbformat" field. Jupyter notebooks '.
330 'must have this field.'));
333 if ($nbformat !== 4) {
336 'This Jupyter notebook uses an unsupported version of the file '.
337 'format (found version %s, expected version 4).',
341 $cells = idx($data, 'cells');
342 if (!is_array($cells)) {
345 'This Jupyter notebook does not specify a list of "cells".'));
351 'This Jupyter notebook does not specify any notebook cells.'));
358 // If we're extracting cells to build a diff view, split code cells into
359 // individual lines and individual outputs. We want users to be able to
360 // add inline comments to each line and each output block.
363 foreach ($cells as $cell) {
364 $cell_type = idx($cell, 'cell_type');
365 if ($cell_type === 'markdown') {
366 $source = $this->readString($cell, 'source');
368 // Attempt to split contiguous blocks of markdown into smaller
371 $chunks = preg_split(
375 foreach ($chunks as $chunk) {
377 $result['source'] = array($chunk);
378 $results[] = $result;
384 if ($cell_type !== 'code') {
389 $label = $this->newCellLabel($cell);
391 $lines = $this->readStringList($cell, 'source');
392 $content = $this->highlightLines($lines);
394 $count = count($lines);
395 for ($ii = 0; $ii < $count; $ii++
) {
396 $is_head = ($ii === 0);
397 $is_last = ($ii === ($count - 1));
400 $line_label = $label;
406 'cell_type' => 'code/line',
407 'label' => $line_label,
408 'raw' => $lines[$ii],
409 'display' => idx($content, $ii),
416 $output_list = idx($cell, 'outputs');
417 if (is_array($output_list)) {
418 foreach ($output_list as $output) {
420 'cell_type' => 'code/output',
431 private function renderJupyterCell(
432 PhabricatorUser
$viewer,
435 list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
438 switch (idx($cell, 'cell_type')) {
440 $classes = 'jupyter-cell-flush';
444 return $this->newJupyterCell(
450 private function newJupyterCell($label, $content, $classes) {
451 $label_cell = phutil_tag(
454 'class' => 'jupyter-label',
458 $content_cell = phutil_tag(
474 private function renderJupyterCellContent(
475 PhabricatorUser
$viewer,
478 $cell_type = idx($cell, 'cell_type');
479 switch ($cell_type) {
481 return $this->newMarkdownCell($cell);
483 return $this->newCodeCell($cell);
485 return $this->newCodeLineCell($cell);
487 return $this->newCodeOutputCell($cell);
490 $json_content = id(new PhutilJSON())
491 ->encodeFormatted($cell);
493 return $this->newRawCell($json_content);
496 private function newRawCell($content) {
502 'class' => 'jupyter-cell-raw PhabricatorMonospaced',
508 private function newMarkdownCell(array $cell) {
509 $content = $this->readStringList($cell, 'source');
511 // TODO: This should ideally highlight as Markdown, but the "md"
512 // highlighter in Pygments is painfully slow and not terribly useful.
513 $content = $this->highlightLines($content, 'txt');
520 'class' => 'jupyter-cell-markdown',
526 private function newCodeCell(array $cell) {
527 $label = $this->newCellLabel($cell);
529 $content = $this->readStringList($cell, 'source');
530 $content = $this->highlightLines($content);
533 $output_list = idx($cell, 'outputs');
534 if (is_array($output_list)) {
535 foreach ($output_list as $output) {
536 $outputs[] = $this->newOutput($output);
547 'jupyter-cell-code jupyter-cell-code-block '.
548 'PhabricatorMonospaced remarkup-code',
558 private function newCodeLineCell(array $cell, $content = null) {
560 $classes[] = 'PhabricatorMonospaced';
561 $classes[] = 'remarkup-code';
562 $classes[] = 'jupyter-cell-code';
563 $classes[] = 'jupyter-cell-code-line';
566 $classes[] = 'jupyter-cell-code-head';
570 $classes[] = 'jupyter-cell-code-last';
573 $classes = implode(' ', $classes);
575 if ($content === null) {
576 $content = $cell['display'];
594 private function newCodeOutputCell(array $cell) {
597 $this->newOutput($cell['output']),
601 private function newOutput(array $output) {
602 if (!is_array($output)) {
603 return pht('<Invalid Output>');
608 'PhabricatorMonospaced',
611 $output_name = idx($output, 'name');
612 switch ($output_name) {
614 $classes[] = 'jupyter-output-stderr';
618 $output_type = idx($output, 'output_type');
619 switch ($output_type) {
620 case 'execute_result':
622 $data = idx($output, 'data');
624 $image_formats = array(
631 foreach ($image_formats as $image_format) {
632 if (!isset($data[$image_format])) {
636 $raw_data = $this->readString($data, $image_format);
638 $content = phutil_tag(
641 'src' => 'data:'.$image_format.';base64,'.$raw_data,
647 if (isset($data['text/html'])) {
648 $content = $data['text/html'];
649 $classes[] = 'jupyter-output-html';
653 if (isset($data['application/javascript'])) {
654 $content = $data['application/javascript'];
655 $classes[] = 'jupyter-output-html';
659 if (isset($data['text/plain'])) {
660 $content = $data['text/plain'];
667 $content = $this->readString($output, 'text');
674 'class' => implode(' ', $classes),
679 private function newCellLabel(array $cell) {
680 $execution_count = idx($cell, 'execution_count');
681 if ($execution_count) {
682 $label = 'In ['.$execution_count.']:';
690 private function highlightLines(array $lines, $force_language = null) {
691 if ($force_language === null) {
692 $head = head($lines);
694 if (preg_match('/^%%(.*)$/', $head, $matches)) {
695 $restore = array_shift($lines);
703 $lang = $force_language;
706 $content = PhabricatorSyntaxHighlighter
::highlightWithLanguage(
708 implode('', $lines));
709 $content = phutil_split_lines($content);
711 if ($restore !== null) {
712 $language_tag = phutil_tag(
715 'class' => 'language-tag',
719 array_unshift($content, $language_tag);
725 public function shouldSuggestEngine(PhabricatorDocumentRef
$ref) {
729 private function readString(array $src, $key) {
730 $list = $this->readStringList($src, $key);
731 return implode('', $list);
734 private function readStringList(array $src, $key) {
735 $list = idx($src, $key);
737 if (is_array($list)) {
739 } else if (is_string($list)) {
740 $list = array($list);