diff --git a/src/applications/diviner/markup/DivinerRemarkupRuleSymbol.php b/src/applications/diviner/markup/DivinerRemarkupRuleSymbol.php index ca395d85c5..1acbaea8d5 100644 --- a/src/applications/diviner/markup/DivinerRemarkupRuleSymbol.php +++ b/src/applications/diviner/markup/DivinerRemarkupRuleSymbol.php @@ -1,163 +1,163 @@ [^:]+?):)?'. '(?P[^}|]+?)'. '(?:[|](?P[^}]+))?'. '}/', array($this, 'markupSymbol'), $text); } public function markupSymbol($matches) { - if ($this->isFlatText($matches[0])) { + if (!$this->isFlatText($matches[0])) { return $matches[0]; } $type = (string)idx($matches, 'type'); $name = (string)$matches['name']; $title = (string)idx($matches, 'title'); // Collapse sequences of whitespace into a single space. $type = preg_replace('/\s+/', ' ', trim($type)); $name = preg_replace('/\s+/', ' ', trim($name)); $title = preg_replace('/\s+/', ' ', trim($title)); $ref = array(); if (strpos($type, '@') !== false) { list($type, $book) = explode('@', $type, 2); $ref['type'] = trim($type); $ref['book'] = trim($book); } else { $ref['type'] = $type; } if (strpos($name, '@') !== false) { list($name, $context) = explode('@', $name, 2); $ref['name'] = trim($name); $ref['context'] = trim($context); } else { $ref['name'] = $name; } $ref['title'] = nonempty($title, $name); foreach ($ref as $key => $value) { if ($value === '') { unset($ref[$key]); } } $engine = $this->getEngine(); $token = $engine->storeText(''); $key = self::KEY_RULE_ATOM_REF; $data = $engine->getTextMetadata($key, array()); $data[$token] = $ref; $engine->setTextMetadata($key, $data); return $token; } public function didMarkupText() { $engine = $this->getEngine(); $key = self::KEY_RULE_ATOM_REF; $data = $engine->getTextMetadata($key, array()); $renderer = $engine->getConfig('diviner.renderer'); foreach ($data as $token => $ref_dict) { $ref = DivinerAtomRef::newFromDictionary($ref_dict); $title = $ref->getTitle(); $href = null; if ($renderer) { // Here, we're generating documentation. If possible, we want to find // the real atom ref so we can render the correct default title and // render invalid links in an alternate style. $ref = $renderer->normalizeAtomRef($ref); if ($ref) { $title = nonempty($ref->getTitle(), $ref->getName()); $href = $renderer->getHrefForAtomRef($ref); } } else { // Here, we're generating comment text or something like that. Just // link to Diviner and let it sort things out. $href = id(new PhutilURI('/diviner/find/')) ->setQueryParams( array( 'book' => $ref->getBook(), 'name' => $ref->getName(), 'type' => $ref->getType(), 'context' => $ref->getContext(), 'jump' => true, )); } // TODO: This probably is not the best place to do this. Move it somewhere // better when it becomes more clear where it should actually go. if ($ref) { switch ($ref->getType()) { case 'function': case 'method': $title = $title.'()'; break; } } if ($this->getEngine()->isTextMode()) { if ($href) { $link = $title.' <'.PhabricatorEnv::getProductionURI($href).'>'; } else { $link = $title; } } else if ($href) { $link = $this->newTag( 'a', array( 'class' => 'atom-ref', 'href' => $href, ), $title); } else { $link = $this->newTag( 'span', array( 'class' => 'atom-ref-invalid', ), $title); } $engine->overwriteStoredText($token, $link); } } } diff --git a/src/infrastructure/markup/PhabricatorMarkupEngine.php b/src/infrastructure/markup/PhabricatorMarkupEngine.php index e50a19c566..fac08cbbce 100644 --- a/src/infrastructure/markup/PhabricatorMarkupEngine.php +++ b/src/infrastructure/markup/PhabricatorMarkupEngine.php @@ -1,603 +1,603 @@ <?php /** * Manages markup engine selection, configuration, application, caching and * pipelining. * * @{class:PhabricatorMarkupEngine} can be used to render objects which * implement @{interface:PhabricatorMarkupInterface} in a batched, cache-aware * way. For example, if you have a list of comments written in remarkup (and * the objects implement the correct interface) you can render them by first * building an engine and adding the fields with @{method:addObject}. * * $field = 'field:body'; // Field you want to render. Each object exposes * // one or more fields of markup. * * $engine = new PhabricatorMarkupEngine(); * foreach ($comments as $comment) { * $engine->addObject($comment, $field); * } * * Now, call @{method:process} to perform the actual cache/rendering * step. This is a heavyweight call which does batched data access and * transforms the markup into output. * * $engine->process(); * * Finally, do something with the results: * * $results = array(); * foreach ($comments as $comment) { * $results[] = $engine->getOutput($comment, $field); * } * * If you have a single object to render, you can use the convenience method * @{method:renderOneObject}. * * @task markup Markup Pipeline * @task engine Engine Construction */ final class PhabricatorMarkupEngine { private $objects = array(); private $viewer; - private $version = 10; + private $version = 11; /* -( Markup Pipeline )---------------------------------------------------- */ /** * Convenience method for pushing a single object through the markup * pipeline. * * @param PhabricatorMarkupInterface The object to render. * @param string The field to render. * @param PhabricatorUser User viewing the markup. * @return string Marked up output. * @task markup */ public static function renderOneObject( PhabricatorMarkupInterface $object, $field, PhabricatorUser $viewer) { return id(new PhabricatorMarkupEngine()) ->setViewer($viewer) ->addObject($object, $field) ->process() ->getOutput($object, $field); } /** * Queue an object for markup generation when @{method:process} is * called. You can retrieve the output later with @{method:getOutput}. * * @param PhabricatorMarkupInterface The object to render. * @param string The field to render. * @return this * @task markup */ public function addObject(PhabricatorMarkupInterface $object, $field) { $key = $this->getMarkupFieldKey($object, $field); $this->objects[$key] = array( 'object' => $object, 'field' => $field, ); return $this; } /** * Process objects queued with @{method:addObject}. You can then retrieve * the output with @{method:getOutput}. * * @return this * @task markup */ public function process() { $keys = array(); foreach ($this->objects as $key => $info) { if (!isset($info['markup'])) { $keys[] = $key; } } if (!$keys) { return; } $objects = array_select_keys($this->objects, $keys); // Build all the markup engines. We need an engine for each field whether // we have a cache or not, since we still need to postprocess the cache. $engines = array(); foreach ($objects as $key => $info) { $engines[$key] = $info['object']->newMarkupEngine($info['field']); $engines[$key]->setConfig('viewer', $this->viewer); } // Load or build the preprocessor caches. $blocks = $this->loadPreprocessorCaches($engines, $objects); $blocks = mpull($blocks, 'getCacheData'); $this->engineCaches = $blocks; // Finalize the output. foreach ($objects as $key => $info) { $engine = $engines[$key]; $field = $info['field']; $object = $info['object']; $output = $engine->postprocessText($blocks[$key]); $output = $object->didMarkupText($field, $output, $engine); $this->objects[$key]['output'] = $output; } return $this; } /** * Get the output of markup processing for a field queued with * @{method:addObject}. Before you can call this method, you must call * @{method:process}. * * @param PhabricatorMarkupInterface The object to retrieve. * @param string The field to retrieve. * @return string Processed output. * @task markup */ public function getOutput(PhabricatorMarkupInterface $object, $field) { $key = $this->getMarkupFieldKey($object, $field); $this->requireKeyProcessed($key); return $this->objects[$key]['output']; } /** * Retrieve engine metadata for a given field. * * @param PhabricatorMarkupInterface The object to retrieve. * @param string The field to retrieve. * @param string The engine metadata field to retrieve. * @param wild Optional default value. * @task markup */ public function getEngineMetadata( PhabricatorMarkupInterface $object, $field, $metadata_key, $default = null) { $key = $this->getMarkupFieldKey($object, $field); $this->requireKeyProcessed($key); return idx($this->engineCaches[$key]['metadata'], $metadata_key, $default); } /** * @task markup */ private function requireKeyProcessed($key) { if (empty($this->objects[$key])) { throw new Exception( "Call addObject() before using results (key = '{$key}')."); } if (!isset($this->objects[$key]['output'])) { throw new Exception( 'Call process() before using results.'); } } /** * @task markup */ private function getMarkupFieldKey( PhabricatorMarkupInterface $object, $field) { static $custom; if ($custom === null) { $custom = array_merge( self::loadCustomInlineRules(), self::loadCustomBlockRules()); $custom = mpull($custom, 'getRuleVersion', null); ksort($custom); $custom = PhabricatorHash::digestForIndex(serialize($custom)); } return $object->getMarkupFieldKey($field).'@'.$this->version.'@'.$custom; } /** * @task markup */ private function loadPreprocessorCaches(array $engines, array $objects) { $blocks = array(); $use_cache = array(); foreach ($objects as $key => $info) { if ($info['object']->shouldUseMarkupCache($info['field'])) { $use_cache[$key] = true; } } if ($use_cache) { try { $blocks = id(new PhabricatorMarkupCache())->loadAllWhere( 'cacheKey IN (%Ls)', array_keys($use_cache)); $blocks = mpull($blocks, null, 'getCacheKey'); } catch (Exception $ex) { phlog($ex); } } foreach ($objects as $key => $info) { // False check in case MySQL doesn't support unicode characters // in the string (T1191), resulting in unserialize returning false. if (isset($blocks[$key]) && $blocks[$key]->getCacheData() !== false) { // If we already have a preprocessing cache, we don't need to rebuild // it. continue; } $text = $info['object']->getMarkupText($info['field']); $data = $engines[$key]->preprocessText($text); // NOTE: This is just debugging information to help sort out cache issues. // If one machine is misconfigured and poisoning caches you can use this // field to hunt it down. $metadata = array( 'host' => php_uname('n'), ); $blocks[$key] = id(new PhabricatorMarkupCache()) ->setCacheKey($key) ->setCacheData($data) ->setMetadata($metadata); if (isset($use_cache[$key])) { // This is just filling a cache and always safe, even on a read pathway. $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $blocks[$key]->replace(); unset($unguarded); } } return $blocks; } /** * Set the viewing user. Used to implement object permissions. * * @param PhabricatorUser The viewing user. * @return this * @task markup */ public function setViewer(PhabricatorUser $viewer) { $this->viewer = $viewer; return $this; } /* -( Engine Construction )------------------------------------------------ */ /** * @task engine */ public static function newManiphestMarkupEngine() { return self::newMarkupEngine(array( )); } /** * @task engine */ public static function newPhrictionMarkupEngine() { return self::newMarkupEngine(array( 'header.generate-toc' => true, )); } /** * @task engine */ public static function newPhameMarkupEngine() { return self::newMarkupEngine(array( 'macros' => false, )); } /** * @task engine */ public static function newFeedMarkupEngine() { return self::newMarkupEngine( array( 'macros' => false, 'youtube' => false, )); } /** * @task engine */ public static function newDifferentialMarkupEngine(array $options = array()) { return self::newMarkupEngine(array( 'differential.diff' => idx($options, 'differential.diff'), )); } /** * @task engine */ public static function newDiffusionMarkupEngine(array $options = array()) { return self::newMarkupEngine(array( 'header.generate-toc' => true, )); } /** * @task engine */ public static function getEngine($ruleset = 'default') { static $engines = array(); if (isset($engines[$ruleset])) { return $engines[$ruleset]; } $engine = null; switch ($ruleset) { case 'default': $engine = self::newMarkupEngine(array()); break; case 'nolinebreaks': $engine = self::newMarkupEngine(array()); $engine->setConfig('preserve-linebreaks', false); break; case 'diffusion-readme': $engine = self::newMarkupEngine(array()); $engine->setConfig('preserve-linebreaks', false); $engine->setConfig('header.generate-toc', true); break; case 'diviner': $engine = self::newMarkupEngine(array()); $engine->setConfig('preserve-linebreaks', false); // $engine->setConfig('diviner.renderer', new DivinerDefaultRenderer()); $engine->setConfig('header.generate-toc', true); break; case 'extract': // Engine used for reference/edge extraction. Turn off anything which // is slow and doesn't change reference extraction. $engine = self::newMarkupEngine(array()); $engine->setConfig('pygments.enabled', false); break; default: throw new Exception("Unknown engine ruleset: {$ruleset}!"); } $engines[$ruleset] = $engine; return $engine; } /** * @task engine */ private static function getMarkupEngineDefaultConfiguration() { return array( 'pygments' => PhabricatorEnv::getEnvConfig('pygments.enabled'), 'youtube' => PhabricatorEnv::getEnvConfig( 'remarkup.enable-embedded-youtube'), 'differential.diff' => null, 'header.generate-toc' => false, 'macros' => true, 'uri.allowed-protocols' => PhabricatorEnv::getEnvConfig( 'uri.allowed-protocols'), 'syntax-highlighter.engine' => PhabricatorEnv::getEnvConfig( 'syntax-highlighter.engine'), 'preserve-linebreaks' => true, ); } /** * @task engine */ public static function newMarkupEngine(array $options) { $options += self::getMarkupEngineDefaultConfiguration(); $engine = new PhutilRemarkupEngine(); $engine->setConfig('preserve-linebreaks', $options['preserve-linebreaks']); $engine->setConfig('pygments.enabled', $options['pygments']); $engine->setConfig( 'uri.allowed-protocols', $options['uri.allowed-protocols']); $engine->setConfig('differential.diff', $options['differential.diff']); $engine->setConfig('header.generate-toc', $options['header.generate-toc']); $engine->setConfig( 'syntax-highlighter.engine', $options['syntax-highlighter.engine']); $rules = array(); $rules[] = new PhutilRemarkupRuleEscapeRemarkup(); $rules[] = new PhutilRemarkupRuleMonospace(); $rules[] = new PhutilRemarkupRuleDocumentLink(); if ($options['youtube']) { $rules[] = new PhabricatorRemarkupRuleYoutube(); } $applications = PhabricatorApplication::getAllInstalledApplications(); foreach ($applications as $application) { foreach ($application->getRemarkupRules() as $rule) { $rules[] = $rule; } } $rules[] = new PhutilRemarkupRuleHyperlink(); if ($options['macros']) { $rules[] = new PhabricatorRemarkupRuleImageMacro(); $rules[] = new PhabricatorRemarkupRuleMeme(); } $rules[] = new PhutilRemarkupRuleBold(); $rules[] = new PhutilRemarkupRuleItalic(); $rules[] = new PhutilRemarkupRuleDel(); $rules[] = new PhutilRemarkupRuleUnderline(); foreach (self::loadCustomInlineRules() as $rule) { $rules[] = $rule; } $blocks = array(); $blocks[] = new PhutilRemarkupEngineRemarkupQuotesBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupReplyBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupLiteralBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHeaderBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupListBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupCodeBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupNoteBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupSimpleTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupInterpreterRule(); $blocks[] = new PhutilRemarkupEngineRemarkupDefaultBlockRule(); foreach (self::loadCustomBlockRules() as $rule) { $blocks[] = $rule; } foreach ($blocks as $block) { $block->setMarkupRules($rules); } $engine->setBlockRules($blocks); return $engine; } public static function extractPHIDsFromMentions( PhabricatorUser $viewer, array $content_blocks) { $mentions = array(); $engine = self::newDifferentialMarkupEngine(); $engine->setConfig('viewer', $viewer); foreach ($content_blocks as $content_block) { $engine->markupText($content_block); $phids = $engine->getTextMetadata( PhabricatorRemarkupRuleMention::KEY_MENTIONED, array()); $mentions += $phids; } return $mentions; } public static function extractFilePHIDsFromEmbeddedFiles( PhabricatorUser $viewer, array $content_blocks) { $files = array(); $engine = self::newDifferentialMarkupEngine(); $engine->setConfig('viewer', $viewer); foreach ($content_blocks as $content_block) { $engine->markupText($content_block); $ids = $engine->getTextMetadata( PhabricatorRemarkupRuleEmbedFile::KEY_EMBED_FILE_PHIDS, array()); $files += $ids; } return $files; } /** * Produce a corpus summary, in a way that shortens the underlying text * without truncating it somewhere awkward. * * TODO: We could do a better job of this. * * @param string Remarkup corpus to summarize. * @return string Summarized corpus. */ public static function summarize($corpus) { // Major goals here are: // - Don't split in the middle of a character (utf-8). // - Don't split in the middle of, e.g., **bold** text, since // we end up with hanging '**' in the summary. // - Try not to pick an image macro, header, embedded file, etc. // - Hopefully don't return too much text. We don't explicitly limit // this right now. $blocks = preg_split("/\n *\n\s*/", trim($corpus)); $best = null; foreach ($blocks as $block) { // This is a test for normal spaces in the block, i.e. a heuristic to // distinguish standard paragraphs from things like image macros. It may // not work well for non-latin text. We prefer to summarize with a // paragraph of normal words over an image macro, if possible. $has_space = preg_match('/\w\s\w/', $block); // This is a test to find embedded images and headers. We prefer to // summarize with a normal paragraph over a header or an embedded object, // if possible. $has_embed = preg_match('/^[{=]/', $block); if ($has_space && !$has_embed) { // This seems like a good summary, so return it. return $block; } if (!$best) { // This is the first block we found; if everything is garbage just // use the first block. $best = $block; } } return $best; } private static function loadCustomInlineRules() { return id(new PhutilSymbolLoader()) ->setAncestorClass('PhabricatorRemarkupCustomInlineRule') ->loadObjects(); } private static function loadCustomBlockRules() { return id(new PhutilSymbolLoader()) ->setAncestorClass('PhabricatorRemarkupCustomBlockRule') ->loadObjects(); } }