diff --git a/src/applications/diffusion/conduit/ConduitAPI_diffusion_readmequery_Method.php b/src/applications/diffusion/conduit/ConduitAPI_diffusion_readmequery_Method.php index 23f18b544d..b6d781e3ef 100644 --- a/src/applications/diffusion/conduit/ConduitAPI_diffusion_readmequery_Method.php +++ b/src/applications/diffusion/conduit/ConduitAPI_diffusion_readmequery_Method.php @@ -1,117 +1,128 @@ 'required array ', ); } protected function getResult(ConduitAPIRequest $request) { $drequest = $this->getDiffusionRequest(); $path_dicts = $request->getValue('paths', array()); $paths = array(); foreach ($path_dicts as $dict) { $paths[] = DiffusionRepositoryPath::newFromDictionary($dict); } $readme = ''; foreach ($paths as $result_path) { $file_type = $result_path->getFileType(); if (($file_type != ArcanistDiffChangeType::FILE_NORMAL) && ($file_type != ArcanistDiffChangeType::FILE_TEXT)) { // Skip directories, etc. continue; } $path = $result_path->getPath(); if (preg_match('/^readme(|\.txt|\.remarkup|\.rainbow|\.md)$/i', $path)) { $readme = $result_path; break; } } if (!$readme) { return ''; } $readme_request = DiffusionRequest::newFromDictionary( array( 'user' => $request->getUser(), 'repository' => $drequest->getRepository(), 'commit' => $drequest->getStableCommitName(), 'path' => $readme->getFullPath(), )); $file_content = DiffusionFileContent::newFromConduit( DiffusionQuery::callConduitWithDiffusionRequest( $request->getUser(), $readme_request, 'diffusion.filecontentquery', array( 'commit' => $drequest->getStableCommitName(), 'path' => $readme->getFullPath(), 'needsBlame' => false, ))); $readme_content = $file_content->getCorpus(); if (preg_match('/\\.txt$/', $readme->getPath())) { $readme_content = phutil_escape_html_newlines($readme_content); $class = null; } else if (preg_match('/\\.rainbow$/', $readme->getPath())) { $highlighter = new PhutilRainbowSyntaxHighlighter(); $readme_content = $highlighter ->getHighlightFuture($readme_content) ->resolve(); $readme_content = phutil_escape_html_newlines($readme_content); require_celerity_resource('syntax-highlighting-css'); $class = 'remarkup-code'; } else { - // Markup extensionless files as remarkup so we get links and such. - $engine = PhabricatorMarkupEngine::newDiffusionMarkupEngine(); - $engine->setConfig('viewer', $request->getUser()); - $readme_content = $engine->markupText($readme_content); + // TODO: This is sketchy, but make sure we hit the markup cache. + $markup_object = id(new PhabricatorMarkupOneOff()) + ->setEngineRuleset('diffusion-readme') + ->setContent($readme_content); + $markup_field = 'default'; + + $readme_content = id(new PhabricatorMarkupEngine()) + ->setViewer($request->getUser()) + ->addObject($markup_object, $markup_field) + ->process() + ->getOutput($markup_object, $markup_field); + + $engine = $markup_object->newMarkupEngine($markup_field); $toc = PhutilRemarkupEngineRemarkupHeaderBlockRule::renderTableOfContents( $engine); if ($toc) { - $toc = phutil_tag_div('phabricator-remarkup-toc', array( - phutil_tag_div( - 'phabricator-remarkup-toc-header', - pht('Table of Contents')), - $toc, - )); + $toc = phutil_tag_div( + 'phabricator-remarkup-toc', + array( + phutil_tag_div( + 'phabricator-remarkup-toc-header', + pht('Table of Contents')), + $toc, + )); $readme_content = array($toc, $readme_content); } $class = 'phabricator-remarkup'; } $readme_content = phutil_tag( 'div', array( 'class' => $class, ), $readme_content); return $readme_content; } } diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php index ae2f652bef..432bde057c 100644 --- a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php +++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php @@ -1,386 +1,393 @@ -- * * ...routinely takes several hundred milliseconds, and equivalent requests * often take longer in Mercurial. * * Unfortunately, this operation is fundamental to rendering a repository for * the web, and essentially everything else that's slow can be reduced to this * plus some trivial work afterward. Making this fast is desirable and powerful, * and allows us to make other things fast by expressing them in terms of this * query. * * Because the query is fundamentally a graph query, it isn't easy to express * in a reasonable way in MySQL, and we can't do round trips to the server to * walk the graph without incurring huge performance penalties. * * However, the total amount of data in the graph is relatively small. By * caching it in chunks and keeping it in APC, we can reasonably load and walk * the graph in PHP quickly. * * For more context, see T2683. * * Structure of the Cache * ====================== * * The cache divides commits into buckets (see @{method:getBucketSize}). To * walk the graph, we pull a commit's bucket. The bucket is a map from commit * IDs to a list of parents and changed paths, separated by `null`. For * example, a bucket might look like this: * * array( * 1 => array(0, null, 17, 18), * 2 => array(1, null, 4), * // ... * ) * * This means that commit ID 1 has parent commit 0 (a special value meaning * no parents) and affected path IDs 17 and 18. Commit ID 2 has parent commit 1, * and affected path 4. * * This data structure attempts to balance compactness, ease of construction, * simplicity of cache semantics, and lookup performance. In the average case, * it appears to do a reasonable job at this. * * @task query Querying the Graph Cache * @task cache Cache Internals */ final class PhabricatorRepositoryGraphCache { /* -( Querying the Graph Cache )------------------------------------------- */ /** * Search the graph cache for the most modification to a path. * * @param int The commit ID to search ancestors of. * @param int The path ID to search for changes to. * @param float Maximum number of seconds to spend trying to satisfy this * query using the graph cache. By default, `0.5` (500ms). * @return mixed Commit ID, or `null` if no ancestors exist, or `false` if * the graph cache was unable to determine the answer. * @task query */ public function loadLastModifiedCommitID($commit_id, $path_id, $time = 0.5) { $commit_id = (int)$commit_id; $path_id = (int)$path_id; $bucket_data = null; $data_key = null; $seen = array(); $t_start = microtime(true); $iterations = 0; while (true) { $bucket_key = $this->getBucketKey($commit_id); if (($data_key != $bucket_key) || $bucket_data === null) { $bucket_data = $this->getBucketData($bucket_key); $data_key = $bucket_key; } if (empty($bucket_data[$commit_id])) { // Rebuild the cache bucket, since the commit might be a very recent // one that we'll pick up by rebuilding. $bucket_data = $this->getBucketData($bucket_key, $bucket_data); if (empty($bucket_data[$commit_id])) { // A rebuild didn't help. This can occur legitimately if the commit // is new and hasn't parsed yet. return false; } // Otherwise, the rebuild gave us the data, so we can keep going. } // Sanity check so we can survive and recover from bad data. if (isset($seen[$commit_id])) { phlog(pht('Unexpected infinite loop in RepositoryGraphCache!')); return false; } else { $seen[$commit_id] = true; } // `$data` is a list: the commit's parent IDs, followed by `null`, // followed by the modified paths in ascending order. We figure out the // first parent first, then check if the path was touched. If the path // was touched, this is the commit we're after. If not, walk backward // in the tree. $items = $bucket_data[$commit_id]; $size = count($items); // Walk past the parent information. $parent_id = null; for ($ii = 0; ; ++$ii) { if ($items[$ii] === null) { break; } if ($parent_id === null) { $parent_id = $items[$ii]; } } // Look for a modification to the path. for (; $ii < $size; ++$ii) { $item = $items[$ii]; if ($item > $path_id) { break; } if ($item === $path_id) { return $commit_id; } } if ($parent_id) { $commit_id = $parent_id; // Periodically check if we've spent too long looking for a result // in the cache, and return so we can fall back to a VCS operation. This // keeps us from having a degenerate worst case if, e.g., the cache // is cold and we need to inspect a very large number of blocks // to satisfy the query. if (((++$iterations) % 64) === 0) { $t_end = microtime(true); if (($t_end - $t_start) > $time) { return false; } } continue; } // If we have an explicit 0, that means this commit really has no parents. // Usually, it is the first commit in the repository. if ($parent_id === 0) { return null; } // If we didn't find a parent, the parent data isn't available. We fail // to find an answer in the cache and fall back to querying the VCS. return false; } } /* -( Cache Internals )---------------------------------------------------- */ /** * Get the bucket key for a given commit ID. * * @param int Commit ID. * @return int Bucket key. * @task cache */ private function getBucketKey($commit_id) { return (int)floor($commit_id / $this->getBucketSize()); } /** * Get the cache key for a given bucket key (from @{method:getBucketKey}). * * @param int Bucket key. * @return string Cache key. * @task cache */ private function getBucketCacheKey($bucket_key) { static $prefix; if ($prefix === null) { $self = get_class($this); $size = $this->getBucketSize(); $prefix = "{$self}:{$size}:2:"; } return $prefix.$bucket_key; } /** * Get the number of items per bucket. * * @return int Number of items to store per bucket. * @task cache */ private function getBucketSize() { return 4096; } /** * Retrieve or build a graph cache bucket from the cache. * * Normally, this operates as a readthrough cache call. It can also be used * to force a cache update by passing the existing data to `$rebuild_data`. * * @param int Bucket key, from @{method:getBucketKey}. * @param mixed Current data, to force a cache rebuild of this bucket. * @return array Data from the cache. * @task cache */ private function getBucketData($bucket_key, $rebuild_data = null) { $cache_key = $this->getBucketCacheKey($bucket_key); // TODO: This cache stuff could be handled more gracefully, but the // database cache currently requires values to be strings and needs // some tweaking to support this as part of a stack. Our cache semantics // here are also unusual (not purely readthrough) because this cache is // appendable. $cache_level1 = PhabricatorCaches::getRepositoryGraphL1Cache(); $cache_level2 = PhabricatorCaches::getRepositoryGraphL2Cache(); if ($rebuild_data === null) { $bucket_data = $cache_level1->getKey($cache_key); if ($bucket_data) { return $bucket_data; } $bucket_data = $cache_level2->getKey($cache_key); if ($bucket_data) { $unserialized = @unserialize($bucket_data); if ($unserialized) { // Fill APC if we got a database hit but missed in APC. $cache_level1->setKey($cache_key, $unserialized); return $unserialized; } } } if (!is_array($rebuild_data)) { $rebuild_data = array(); } $bucket_data = $this->rebuildBucket($bucket_key, $rebuild_data); // Don't bother writing the data if we didn't update anything. if ($bucket_data !== $rebuild_data) { $cache_level2->setKey($cache_key, serialize($bucket_data)); $cache_level1->setKey($cache_key, $bucket_data); } return $bucket_data; } /** * Rebuild a cache bucket, amending existing data if avialable. * * @param int Bucket key, from @{method:getBucketKey}. * @param array Existing bucket data. * @return array Rebuilt bucket data. * @task cache */ private function rebuildBucket($bucket_key, array $current_data) { $bucket_min = ($bucket_key * $this->getBucketSize()); $bucket_max = ($bucket_min + $this->getBucketSize()) - 1; // We need to reload all of the commits in the bucket because there is // no guarantee that they'll get parsed in order, so we can fill large // commit IDs before small ones. Later on, we'll ignore the commits we // already know about. $table_commit = new PhabricatorRepositoryCommit(); $table_repository = new PhabricatorRepository(); $conn_r = $table_commit->establishConnection('r'); // Find all the Git and Mercurial commits in the block which have completed // change import. We can't fill the cache accurately for commits which have // not completed change import, so just pretend we don't know about them. // In these cases, we will will ultimately fall back to VCS queries. $commit_rows = queryfx_all( $conn_r, 'SELECT c.id FROM %T c JOIN %T r ON c.repositoryID = r.id AND r.versionControlSystem IN (%Ls) WHERE c.id BETWEEN %d AND %d AND (c.importStatus & %d) = %d', $table_commit->getTableName(), $table_repository->getTableName(), array( PhabricatorRepositoryType::REPOSITORY_TYPE_GIT, PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL, ), $bucket_min, $bucket_max, PhabricatorRepositoryCommit::IMPORTED_CHANGE, PhabricatorRepositoryCommit::IMPORTED_CHANGE); // If we don't have any data, just return the existing data. if (!$commit_rows) { return $current_data; } // Remove the commits we already have data for. We don't need to rebuild // these. If there's nothing left, return the existing data. $commit_ids = ipull($commit_rows, 'id', 'id'); $commit_ids = array_diff_key($commit_ids, $current_data); if (!$commit_ids) { return $current_data; } // Find all the path changes for the new commits. $path_changes = queryfx_all( $conn_r, 'SELECT commitID, pathID FROM %T WHERE commitID IN (%Ld) AND (isDirect = 1 OR changeType = %d)', PhabricatorRepository::TABLE_PATHCHANGE, $commit_ids, DifferentialChangeType::TYPE_CHILD); $path_changes = igroup($path_changes, 'commitID'); // Find all the parents for the new commits. $parents = queryfx_all( $conn_r, 'SELECT childCommitID, parentCommitID FROM %T WHERE childCommitID IN (%Ld) ORDER BY id ASC', PhabricatorRepository::TABLE_PARENTS, $commit_ids); $parents = igroup($parents, 'childCommitID'); // Build the actual data for the cache. foreach ($commit_ids as $commit_id) { $parent_ids = array(); - if (isset($parents[$commit_id])) { + if (!empty($parents[$commit_id])) { foreach ($parents[$commit_id] as $row) { $parent_ids[] = (int)$row['parentCommitID']; } + } else { + // We expect all rows to have parents (commits with no parents get + // an explicit "0" placeholder). If we're in an older repository, the + // parent information might not have been populated yet. Decline to fill + // the cache if we don't have the parent information, since the fill + // will be incorrect. + continue; } if (isset($path_changes[$commit_id])) { $path_ids = $path_changes[$commit_id]; foreach ($path_ids as $key => $path_id) { $path_ids[$key] = (int)$path_id['pathID']; } sort($path_ids); } else { $path_ids = array(); } $value = $parent_ids; $value[] = null; foreach ($path_ids as $path_id) { $value[] = $path_id; } $current_data[$commit_id] = $value; } return $current_data; } } diff --git a/src/infrastructure/markup/PhabricatorMarkupEngine.php b/src/infrastructure/markup/PhabricatorMarkupEngine.php index 64f3a03924..bedbc089da 100644 --- a/src/infrastructure/markup/PhabricatorMarkupEngine.php +++ b/src/infrastructure/markup/PhabricatorMarkupEngine.php @@ -1,598 +1,603 @@ addObject($comment, $field); * } * * Now, call @{method:process} to perform the actual cache/rendering * step. This is a heavyweight call which does batched data access and * transforms the markup into output. * * $engine->process(); * * Finally, do something with the results: * * $results = array(); * foreach ($comments as $comment) { * $results[] = $engine->getOutput($comment, $field); * } * * If you have a single object to render, you can use the convenience method * @{method:renderOneObject}. * * @task markup Markup Pipeline * @task engine Engine Construction */ final class PhabricatorMarkupEngine { private $objects = array(); private $viewer; private $version = 8; /* -( Markup Pipeline )---------------------------------------------------- */ /** * Convenience method for pushing a single object through the markup * pipeline. * * @param PhabricatorMarkupInterface The object to render. * @param string The field to render. * @param PhabricatorUser User viewing the markup. * @return string Marked up output. * @task markup */ public static function renderOneObject( PhabricatorMarkupInterface $object, $field, PhabricatorUser $viewer) { return id(new PhabricatorMarkupEngine()) ->setViewer($viewer) ->addObject($object, $field) ->process() ->getOutput($object, $field); } /** * Queue an object for markup generation when @{method:process} is * called. You can retrieve the output later with @{method:getOutput}. * * @param PhabricatorMarkupInterface The object to render. * @param string The field to render. * @return this * @task markup */ public function addObject(PhabricatorMarkupInterface $object, $field) { $key = $this->getMarkupFieldKey($object, $field); $this->objects[$key] = array( 'object' => $object, 'field' => $field, ); return $this; } /** * Process objects queued with @{method:addObject}. You can then retrieve * the output with @{method:getOutput}. * * @return this * @task markup */ public function process() { $keys = array(); foreach ($this->objects as $key => $info) { if (!isset($info['markup'])) { $keys[] = $key; } } if (!$keys) { return; } $objects = array_select_keys($this->objects, $keys); // Build all the markup engines. We need an engine for each field whether // we have a cache or not, since we still need to postprocess the cache. $engines = array(); foreach ($objects as $key => $info) { $engines[$key] = $info['object']->newMarkupEngine($info['field']); $engines[$key]->setConfig('viewer', $this->viewer); } // Load or build the preprocessor caches. $blocks = $this->loadPreprocessorCaches($engines, $objects); $blocks = mpull($blocks, 'getCacheData'); $this->engineCaches = $blocks; // Finalize the output. foreach ($objects as $key => $info) { $engine = $engines[$key]; $field = $info['field']; $object = $info['object']; $output = $engine->postprocessText($blocks[$key]); $output = $object->didMarkupText($field, $output, $engine); $this->objects[$key]['output'] = $output; } return $this; } /** * Get the output of markup processing for a field queued with * @{method:addObject}. Before you can call this method, you must call * @{method:process}. * * @param PhabricatorMarkupInterface The object to retrieve. * @param string The field to retrieve. * @return string Processed output. * @task markup */ public function getOutput(PhabricatorMarkupInterface $object, $field) { $key = $this->getMarkupFieldKey($object, $field); $this->requireKeyProcessed($key); return $this->objects[$key]['output']; } /** * Retrieve engine metadata for a given field. * * @param PhabricatorMarkupInterface The object to retrieve. * @param string The field to retrieve. * @param string The engine metadata field to retrieve. * @param wild Optional default value. * @task markup */ public function getEngineMetadata( PhabricatorMarkupInterface $object, $field, $metadata_key, $default = null) { $key = $this->getMarkupFieldKey($object, $field); $this->requireKeyProcessed($key); return idx($this->engineCaches[$key]['metadata'], $metadata_key, $default); } /** * @task markup */ private function requireKeyProcessed($key) { if (empty($this->objects[$key])) { throw new Exception( "Call addObject() before using results (key = '{$key}')."); } if (!isset($this->objects[$key]['output'])) { throw new Exception( "Call process() before using results."); } } /** * @task markup */ private function getMarkupFieldKey( PhabricatorMarkupInterface $object, $field) { static $custom; if ($custom === null) { $custom = array_merge( self::loadCustomInlineRules(), self::loadCustomBlockRules()); $custom = mpull($custom, 'getRuleVersion', null); ksort($custom); $custom = PhabricatorHash::digestForIndex(serialize($custom)); } return $object->getMarkupFieldKey($field).'@'.$this->version.'@'.$custom; } /** * @task markup */ private function loadPreprocessorCaches(array $engines, array $objects) { $blocks = array(); $use_cache = array(); foreach ($objects as $key => $info) { if ($info['object']->shouldUseMarkupCache($info['field'])) { $use_cache[$key] = true; } } if ($use_cache) { try { $blocks = id(new PhabricatorMarkupCache())->loadAllWhere( 'cacheKey IN (%Ls)', array_keys($use_cache)); $blocks = mpull($blocks, null, 'getCacheKey'); } catch (Exception $ex) { phlog($ex); } } foreach ($objects as $key => $info) { // False check in case MySQL doesn't support unicode characters // in the string (T1191), resulting in unserialize returning false. if (isset($blocks[$key]) && $blocks[$key]->getCacheData() !== false) { // If we already have a preprocessing cache, we don't need to rebuild // it. continue; } $text = $info['object']->getMarkupText($info['field']); $data = $engines[$key]->preprocessText($text); // NOTE: This is just debugging information to help sort out cache issues. // If one machine is misconfigured and poisoning caches you can use this // field to hunt it down. $metadata = array( 'host' => php_uname('n'), ); $blocks[$key] = id(new PhabricatorMarkupCache()) ->setCacheKey($key) ->setCacheData($data) ->setMetadata($metadata); if (isset($use_cache[$key])) { // This is just filling a cache and always safe, even on a read pathway. $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); $blocks[$key]->replace(); unset($unguarded); } } return $blocks; } /** * Set the viewing user. Used to implement object permissions. * * @param PhabricatorUser The viewing user. * @return this * @task markup */ public function setViewer(PhabricatorUser $viewer) { $this->viewer = $viewer; return $this; } /* -( Engine Construction )------------------------------------------------ */ /** * @task engine */ public static function newManiphestMarkupEngine() { return self::newMarkupEngine(array( )); } /** * @task engine */ public static function newPhrictionMarkupEngine() { return self::newMarkupEngine(array( 'header.generate-toc' => true, )); } /** * @task engine */ public static function newPhameMarkupEngine() { return self::newMarkupEngine(array( 'macros' => false, )); } /** * @task engine */ public static function newFeedMarkupEngine() { return self::newMarkupEngine( array( 'macros' => false, 'youtube' => false, )); } /** * @task engine */ public static function newDifferentialMarkupEngine(array $options = array()) { return self::newMarkupEngine(array( 'differential.diff' => idx($options, 'differential.diff'), )); } /** * @task engine */ public static function newDiffusionMarkupEngine(array $options = array()) { return self::newMarkupEngine(array( 'header.generate-toc' => true, )); } /** * @task engine */ public static function getEngine($ruleset = 'default') { static $engines = array(); if (isset($engines[$ruleset])) { return $engines[$ruleset]; } $engine = null; switch ($ruleset) { case 'default': $engine = self::newMarkupEngine(array()); break; case 'nolinebreaks': $engine = self::newMarkupEngine(array()); $engine->setConfig('preserve-linebreaks', false); break; + case 'diffusion-readme': + $engine = self::newMarkupEngine(array()); + $engine->setConfig('preserve-linebreaks', false); + $engine->setConfig('header.generate-toc', true); + break; case 'diviner': $engine = self::newMarkupEngine(array()); $engine->setConfig('preserve-linebreaks', false); // $engine->setConfig('diviner.renderer', new DivinerDefaultRenderer()); $engine->setConfig('header.generate-toc', true); break; case 'extract': // Engine used for reference/edge extraction. Turn off anything which // is slow and doesn't change reference extraction. $engine = self::newMarkupEngine(array()); $engine->setConfig('pygments.enabled', false); break; default: throw new Exception("Unknown engine ruleset: {$ruleset}!"); } $engines[$ruleset] = $engine; return $engine; } /** * @task engine */ private static function getMarkupEngineDefaultConfiguration() { return array( 'pygments' => PhabricatorEnv::getEnvConfig('pygments.enabled'), 'youtube' => PhabricatorEnv::getEnvConfig( 'remarkup.enable-embedded-youtube'), 'differential.diff' => null, 'header.generate-toc' => false, 'macros' => true, 'uri.allowed-protocols' => PhabricatorEnv::getEnvConfig( 'uri.allowed-protocols'), 'syntax-highlighter.engine' => PhabricatorEnv::getEnvConfig( 'syntax-highlighter.engine'), 'preserve-linebreaks' => true, ); } /** * @task engine */ public static function newMarkupEngine(array $options) { $options += self::getMarkupEngineDefaultConfiguration(); $engine = new PhutilRemarkupEngine(); $engine->setConfig('preserve-linebreaks', $options['preserve-linebreaks']); $engine->setConfig('pygments.enabled', $options['pygments']); $engine->setConfig( 'uri.allowed-protocols', $options['uri.allowed-protocols']); $engine->setConfig('differential.diff', $options['differential.diff']); $engine->setConfig('header.generate-toc', $options['header.generate-toc']); $engine->setConfig( 'syntax-highlighter.engine', $options['syntax-highlighter.engine']); $rules = array(); $rules[] = new PhutilRemarkupRuleEscapeRemarkup(); $rules[] = new PhutilRemarkupRuleMonospace(); $rules[] = new PhutilRemarkupRuleDocumentLink(); if ($options['youtube']) { $rules[] = new PhabricatorRemarkupRuleYoutube(); } $applications = PhabricatorApplication::getAllInstalledApplications(); foreach ($applications as $application) { foreach ($application->getRemarkupRules() as $rule) { $rules[] = $rule; } } $rules[] = new PhutilRemarkupRuleHyperlink(); if ($options['macros']) { $rules[] = new PhabricatorRemarkupRuleImageMacro(); $rules[] = new PhabricatorRemarkupRuleMeme(); } $rules[] = new PhutilRemarkupRuleBold(); $rules[] = new PhutilRemarkupRuleItalic(); $rules[] = new PhutilRemarkupRuleDel(); $rules[] = new PhutilRemarkupRuleUnderline(); foreach (self::loadCustomInlineRules() as $rule) { $rules[] = $rule; } $blocks = array(); $blocks[] = new PhutilRemarkupEngineRemarkupQuotesBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupReplyBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupLiteralBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHeaderBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupHorizontalRuleBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupListBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupCodeBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupNoteBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupSimpleTableBlockRule(); $blocks[] = new PhutilRemarkupEngineRemarkupInterpreterRule(); $blocks[] = new PhutilRemarkupEngineRemarkupDefaultBlockRule(); foreach (self::loadCustomBlockRules() as $rule) { $blocks[] = $rule; } foreach ($blocks as $block) { $block->setMarkupRules($rules); } $engine->setBlockRules($blocks); return $engine; } public static function extractPHIDsFromMentions( PhabricatorUser $viewer, array $content_blocks) { $mentions = array(); $engine = self::newDifferentialMarkupEngine(); $engine->setConfig('viewer', $viewer); foreach ($content_blocks as $content_block) { $engine->markupText($content_block); $phids = $engine->getTextMetadata( PhabricatorRemarkupRuleMention::KEY_MENTIONED, array()); $mentions += $phids; } return $mentions; } public static function extractFilePHIDsFromEmbeddedFiles( PhabricatorUser $viewer, array $content_blocks) { $files = array(); $engine = self::newDifferentialMarkupEngine(); $engine->setConfig('viewer', $viewer); foreach ($content_blocks as $content_block) { $engine->markupText($content_block); $ids = $engine->getTextMetadata( PhabricatorRemarkupRuleEmbedFile::KEY_EMBED_FILE_PHIDS, array()); $files += $ids; } return $files; } /** * Produce a corpus summary, in a way that shortens the underlying text * without truncating it somewhere awkward. * * TODO: We could do a better job of this. * * @param string Remarkup corpus to summarize. * @return string Summarized corpus. */ public static function summarize($corpus) { // Major goals here are: // - Don't split in the middle of a character (utf-8). // - Don't split in the middle of, e.g., **bold** text, since // we end up with hanging '**' in the summary. // - Try not to pick an image macro, header, embedded file, etc. // - Hopefully don't return too much text. We don't explicitly limit // this right now. $blocks = preg_split("/\n *\n\s*/", trim($corpus)); $best = null; foreach ($blocks as $block) { // This is a test for normal spaces in the block, i.e. a heuristic to // distinguish standard paragraphs from things like image macros. It may // not work well for non-latin text. We prefer to summarize with a // paragraph of normal words over an image macro, if possible. $has_space = preg_match('/\w\s\w/', $block); // This is a test to find embedded images and headers. We prefer to // summarize with a normal paragraph over a header or an embedded object, // if possible. $has_embed = preg_match('/^[{=]/', $block); if ($has_space && !$has_embed) { // This seems like a good summary, so return it. return $block; } if (!$best) { // This is the first block we found; if everything is garbage just // use the first block. $best = $block; } } return $best; } private static function loadCustomInlineRules() { return id(new PhutilSymbolLoader()) ->setAncestorClass('PhabricatorRemarkupCustomInlineRule') ->loadObjects(); } private static function loadCustomBlockRules() { return id(new PhutilSymbolLoader()) ->setAncestorClass('PhabricatorRemarkupCustomBlockRule') ->loadObjects(); } } diff --git a/src/infrastructure/markup/PhabricatorMarkupOneOff.php b/src/infrastructure/markup/PhabricatorMarkupOneOff.php index ec112e5c00..d3350bfd27 100644 --- a/src/infrastructure/markup/PhabricatorMarkupOneOff.php +++ b/src/infrastructure/markup/PhabricatorMarkupOneOff.php @@ -1,68 +1,80 @@ setContent($some_content), * 'default', * $viewer); * * This is less efficient than batching rendering, but appropriate for small * amounts of one-off text in form instructions. */ final class PhabricatorMarkupOneOff implements PhabricatorMarkupInterface { private $content; private $preserveLinebreaks; + private $engineRuleset; + + public function setEngineRuleset($engine_ruleset) { + $this->engineRuleset = $engine_ruleset; + return $this; + } + + public function getEngineRuleset() { + return $this->engineRuleset; + } public function setPreserveLinebreaks($preserve_linebreaks) { $this->preserveLinebreaks = $preserve_linebreaks; return $this; } public function setContent($content) { $this->content = $content; return $this; } public function getContent() { return $this->content; } public function getMarkupFieldKey($field) { return PhabricatorHash::digestForIndex($this->getContent()).':oneoff'; } public function newMarkupEngine($field) { - if ($this->preserveLinebreaks) { + if ($this->engineRuleset) { + return PhabricatorMarkupEngine::getEngine($this->engineRuleset); + } else if ($this->preserveLinebreaks) { return PhabricatorMarkupEngine::getEngine(); } else { return PhabricatorMarkupEngine::getEngine('nolinebreaks'); } } public function getMarkupText($field) { return $this->getContent(); } public function didMarkupText( $field, $output, PhutilMarkupEngine $engine) { require_celerity_resource('phabricator-remarkup-css'); return phutil_tag( 'div', array( 'class' => 'phabricator-remarkup', ), $output); } public function shouldUseMarkupCache($field) { return true; } }