diff --git a/scripts/phutil_rebuild_map.php b/scripts/phutil_rebuild_map.php index bd9a6621..865f30dc 100755 --- a/scripts/phutil_rebuild_map.php +++ b/scripts/phutil_rebuild_map.php @@ -1,578 +1,588 @@ #!/usr/bin/env php setTagline('rebuild the library map file'); $args->setSynopsis(<<parseStandardArguments(); $args->parse( array( array( 'name' => 'quiet', 'help' => 'Do not write status messages to stderr.', ), array( 'name' => 'drop-cache', 'help' => 'Drop the symbol cache and rebuild the entire map from '. 'scratch.', ), array( 'name' => 'limit', 'param' => 'N', 'default' => 8, 'help' => 'Controls the number of symbol mapper subprocesses run '. 'at once. Defaults to 8.', ), array( 'name' => 'show', 'help' => 'Print symbol map to stdout instead of writing it to the '. 'map file.', ), array( 'name' => 'ugly', 'help' => 'Use faster but less readable serialization for --show.', ), array( 'name' => 'root', 'wildcard' => true, ) )); $root = $args->getArg('root'); if (count($root) !== 1) { throw new Exception("Provide exactly one library root!"); } $root = Filesystem::resolvePath(head($root)); $builder = new PhutilLibraryMapBuilder($root); $builder->setQuiet($args->getArg('quiet')); $builder->setSubprocessLimit($args->getArg('limit')); if ($args->getArg('drop-cache')) { $builder->dropSymbolCache(); } if ($args->getArg('show')) { $builder->setShowMap(true); $builder->setUgly($args->getArg('ugly')); } $builder->buildMap(); exit(0); /** * Build maps of libphutil libraries. libphutil uses the library map to locate * and load classes and functions in the library. * * @task map Mapping libphutil Libraries * @task path Path Management * @task symbol Symbol Analysis and Caching * @task source Source Management */ final class PhutilLibraryMapBuilder { private $root; private $quiet; private $subprocessLimit = 8; private $ugly; private $showMap; const LIBRARY_MAP_VERSION_KEY = '__library_version__'; const LIBRARY_MAP_VERSION = 2; const SYMBOL_CACHE_VERSION_KEY = '__symbol_cache_version__'; const SYMBOL_CACHE_VERSION = 2; /* -( Mapping libphutil Libraries )---------------------------------------- */ /** * Create a new map builder for a library. * * @param string Path to the library root. * * @task map */ public function __construct($root) { $this->root = $root; } /** * Control status output. Use --quiet to set this. * * @param bool If true, don't show status output. * @return this * * @task map */ public function setQuiet($quiet) { $this->quiet = $quiet; return $this; } /** * Control subprocess parallelism limit. Use --limit to set this. * * @param int Maximum number of subprocesses to run in parallel. * @return this * * @task map */ public function setSubprocessLimit($limit) { $this->subprocessLimit = $limit; return $this; } /** * Control whether the ugly (but fast) or pretty (but slower) JSON formatter * is used. * * @param bool If true, use the fastest formatter. * @return this * * @task map */ public function setUgly($ugly) { $this->ugly = $ugly; return $this; } /** * Control whether the map should be rebuilt, or just shown (printed to * stdout in JSON). * * @param bool If true, show map instead of updating. * @return this * * @task map */ public function setShowMap($show_map) { $this->showMap = $show_map; return $this; } /** * Build or rebuild the library map. * * @return this * * @task map */ public function buildMap() { // Identify all the ".php" source files in the library. $this->log("Finding source files...\n"); $source_map = $this->loadSourceFileMap(); $this->log("Found ".number_format(count($source_map))." files.\n"); // Load the symbol cache with existing parsed symbols. This allows us // to remap libraries quickly by analyzing only changed files. $this->log("Loading symbol cache...\n"); $symbol_cache = $this->loadSymbolCache(); // Build out the symbol analysis for all the files in the library. For // each file, check if it's in cache. If we miss in the cache, do a fresh // analysis. $symbol_map = array(); $futures = array(); foreach ($source_map as $file => $hash) { if (!empty($symbol_cache[$hash])) { $symbol_map[$file] = $symbol_cache[$hash]; continue; } $futures[$file] = $this->buildSymbolAnalysisFuture($file); } $this->log("Found ".number_format(count($symbol_map))." files in cache.\n"); // Run the analyzer on any files which need analysis. if ($futures) { $limit = $this->subprocessLimit; $count = number_format(count($futures)); $this->log("Analyzing {$count} files with {$limit} subprocesses...\n"); foreach (Futures($futures)->limit($limit) as $file => $future) { + $result = $future->resolveJSON(); + if (empty($result['error'])) { + $symbol_map[$file] = $result; + } else { + echo phutil_console_format( + "\n**SYNTAX ERROR!**\nFile: %s\nLine: %d\n\n%s\n", + Filesystem::readablePath($result['file']), + $result['line'], + $result['error']); + exit(1); + } $this->log("."); - $symbol_map[$file] = $future->resolveJSON(); } $this->log("\nDone.\n"); } // We're done building the cache, so write it out immediately. Note that // we've only retained entries for files we found, so this implicitly cleans // out old cache entries. $this->writeSymbolCache($symbol_map, $source_map); // Our map is up to date, so either show it on stdout or write it to disk. if ($this->showMap) { $this->log("Showing map...\n"); if ($this->ugly) { echo json_encode($symbol_map); } else { $json = new PhutilJSON(); echo $json->encodeFormatted($symbol_map); } } else { $this->log("Building library map...\n"); $library_map = $this->buildLibraryMap($symbol_map, $source_map); $this->log("Writing map...\n"); $this->writeLibraryMap($library_map); } $this->log("Done.\n"); return $this; } /** * Write a status message to the user, if not running in quiet mode. * * @param string Message to write. * @return this * * @task map */ private function log($message) { if (!$this->quiet) { @fwrite(STDERR, $message); } return $this; } /* -( Path Management )---------------------------------------------------- */ /** * Get the path to some file in the library. * * @param string A library-relative path. If omitted, returns the library * root path. * @return string An absolute path. * * @task path */ private function getPath($path = '') { return $this->root.'/'.$path; } /** * Get the path to the symbol cache file. * * @return string Absolute path to symbol cache. * * @task path */ private function getPathForSymbolCache() { return $this->getPath('.phutil_module_cache'); } /** * Get the path to the map file. * * @return string Absolute path to the library map. * * @task path */ private function getPathForLibraryMap() { return $this->getPath('__phutil_library_map__.php'); } /** * Get the path to the library init file. * * @return string Absolute path to the library init file * * @task path */ private function getPathForLibraryInit() { return $this->getPath('__phutil_library_init__.php'); } /* -( Symbol Analysis and Caching )---------------------------------------- */ /** * Load the library symbol cache, if it exists and is readable and valid. * * @return dict Map of content hashes to cache of output from * `phutil_symbols.php`. * * @task symbol */ private function loadSymbolCache() { $cache_file = $this->getPathForSymbolCache(); try { $cache = Filesystem::readFile($cache_file); } catch (Exception $ex) { $cache = null; } $symbol_cache = array(); if ($cache) { $symbol_cache = json_decode($cache, true); if (!is_array($symbol_cache)) { $symbol_cache = array(); } } $version = idx($symbol_cache, self::SYMBOL_CACHE_VERSION_KEY); if ($version != self::SYMBOL_CACHE_VERSION) { // Throw away caches from a different version of the library. $symbol_cache = array(); } unset($symbol_cache[self::SYMBOL_CACHE_VERSION_KEY]); return $symbol_cache; } /** * Write a symbol map to disk cache. * * @param dict Symbol map of relative paths to symbols. * @param dict Source map (like @{method:loadSourceFileMap}). * @return void * * @task symbol */ private function writeSymbolCache(array $symbol_map, array $source_map) { $cache_file = $this->getPathForSymbolCache(); $cache = array( self::SYMBOL_CACHE_VERSION_KEY => self::SYMBOL_CACHE_VERSION, ); foreach ($symbol_map as $file => $symbols) { $cache[$source_map[$file]] = $symbols; } $json = json_encode($cache); Filesystem::writeFile($cache_file, $json); } /** * Drop the symbol cache, forcing a clean rebuild. * * @return this * * @task symbol */ public function dropSymbolCache() { $this->log("Dropping symbol cache...\n"); Filesystem::remove($this->getPathForSymbolCache()); } /** * Build a future which returns a `phutil_symbols.php` analysis of a source * file. * * @param string Relative path to the source file to analyze. * @return Future Analysis future. * * @task symbol */ private function buildSymbolAnalysisFuture($file) { $absolute_file = $this->getPath($file); $bin = dirname(__FILE__).'/phutil_symbols.php'; return new ExecFuture('%s --ugly -- %s', $bin, $absolute_file); } /* -( Source Management )-------------------------------------------------- */ /** * Build a map of all source files in a library to hashes of their content. * Returns an array like this: * * array( * 'src/parser/ExampleParser.php' => '60b725f10c9c85c70d97880dfe8191b3', * // ... * ); * * @return dict Map of library-relative paths to content hashes. * @task source */ private function loadSourceFileMap() { $root = $this->getPath(); $init = $this->getPathForLibraryInit(); if (!Filesystem::pathExists($init)) { throw new Exception("Provided path '{$root}' is not a phutil library."); } $files = id(new FileFinder($root)) ->withType('f') ->withSuffix('php') ->excludePath('*/.*') ->setGenerateChecksums(true) ->find(); $map = array(); foreach ($files as $file => $hash) { if (basename($file) == '__init__.php') { // TODO: Remove this once we kill __init__.php. This just makes the // script run faster until we do, so testing and development is less // annoying. continue; } $file = Filesystem::readablePath($file, $root); $file = ltrim($file, '/'); if (dirname($file) == '.') { // We don't permit normal source files at the root level, so just ignore // them; they're special library files. continue; } $map[$file] = $hash; } return $map; } /** * Convert the symbol analysis of all the source files in the library into * a library map. * * @param dict Symbol analysis of all source files. * @return dict Library map. * @task source */ private function buildLibraryMap(array $symbol_map) { $library_map = array( 'class' => array(), 'function' => array(), 'xmap' => array(), ); // Detect duplicate symbols within the library. foreach ($symbol_map as $file => $info) { foreach ($info['have'] as $type => $symbols) { foreach ($symbols as $symbol => $declaration) { $lib_type = ($type == 'interface') ? 'class' : $type; if (!empty($library_map[$lib_type][$symbol])) { $prior = $library_map[$lib_type][$symbol]; throw new Exception( "Definition of {$type} '{$symbol}' in file '{$file}' duplicates ". "prior definition in file '{$prior}'. You can not declare the ". "same symbol twice."); } $library_map[$lib_type][$symbol] = $file; } } $library_map['xmap'] += $info['xmap']; } // Simplify the common case (one parent) to make the file a little easier // to deal with. foreach ($library_map['xmap'] as $class => $extends) { if (count($extends) == 1) { $library_map['xmap'][$class] = reset($extends); } } // Sort the map so it is relatively stable across changes. foreach ($library_map as $key => $symbols) { ksort($symbols); $library_map[$key] = $symbols; } ksort($library_map); return $library_map; } /** * Write a finalized library map. * * @param dict Library map structure to write. * @return void * * @task source */ private function writeLibraryMap(array $library_map) { $map_file = $this->getPathForLibraryMap(); $version = self::LIBRARY_MAP_VERSION; $library_map = array( self::LIBRARY_MAP_VERSION_KEY => $version, ) + $library_map; $library_map = var_export($library_map, $return_string = true); $library_map = preg_replace('/\s+$/m', '', $library_map); $library_map = preg_replace('/array \(/', 'array(', $library_map); $at = '@'; $source_file = <<setTagline('identify symbols in a PHP source file'); $args->setSynopsis(<<parseStandardArguments(); $args->parse( array( array( 'name' => 'all', 'help' => 'Report all symbols, including builtins and declared '. 'externals.', ), array( 'name' => 'ugly', 'help' => 'Do not prettify JSON output.', ), array( 'name' => 'path', 'wildcard' => true, 'help' => 'PHP Source file to analyze.', ), )); $paths = $args->getArg('path'); if (count($paths) !== 1) { throw new Exception("Specify exactly one path!"); } $path = Filesystem::resolvePath(head($paths)); $show_all = $args->getArg('all'); $source_code = Filesystem::readFile($path); -$tree = XHPASTTree::newFromData($source_code); + +try { + $tree = XHPASTTree::newFromData($source_code); +} catch (XHPASTSyntaxErrorException $ex) { + $result = array( + 'error' => $ex->getMessage(), + 'line' => $ex->getErrorLine(), + 'file' => $path, + ); + $json = new PhutilJSON(); + echo $json->encodeFormatted($result); + exit(0); +} + $root = $tree->getRootNode(); $root->buildSelectCache(); // -( Marked Externals )------------------------------------------------------ // Identify symbols marked with "@phutil-external-symbol", so we exclude them // from the dependency list. $externals = array(); $doc_parser = new PhutilDocblockParser(); foreach ($root->getTokens() as $token) { if ($token->getTypeName() == 'T_DOC_COMMENT') { list($block, $special) = $doc_parser->parse($token->getValue()); $ext_list = idx($special, 'phutil-external-symbol'); $ext_list = explode("\n", $ext_list); $ext_list = array_filter($ext_list); foreach ($ext_list as $ext_ref) { $matches = null; if (preg_match('/^\s*(\S+)\s+(\S+)/', $ext_ref, $matches)) { $externals[$matches[1]][$matches[2]] = true; } } } } // -( Declarations and Dependencies )----------------------------------------- // The first stage of analysis is to find all the symbols we declare in the // file (like functions and classes) and all the symbols we use in the file // (like calling functions and invoking classes). Later, we filter this list // to exclude builtins. $have = array(); // For symbols we declare. $need = array(); // For symbols we use. $xmap = array(); // For extended classes and implemented interfaces. // -( Functions )------------------------------------------------------------- // Find functions declared in this file. // This is "function f() { ... }". $functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); foreach ($functions as $function) { $name = $function->getChildByIndex(2); if ($name->getTypeName() == 'n_EMPTY') { // This is an anonymous function; don't record it into the symbol // index. continue; } $have[] = array( 'type' => 'function', 'symbol' => $name, ); } // Find functions used by this file. Uses: // // - Explicit Call // - String literal passed to call_user_func() or call_user_func_array() // // TODO: Possibly support these: // // - String literal in ReflectionFunction(). // This is "f();". $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($calls as $call) { $name = $call->getChildByIndex(0); if ($name->getTypeName() == 'n_VARIABLE' || $name->getTypeName() == 'n_VARIABLE_VARIABLE') { // Ignore these, we can't analyze them. continue; } if ($name->getTypeName() == 'n_CLASS_STATIC_ACCESS') { // These are "C::f()", we'll pick this up later on. continue; } $call_name = $name->getConcreteString(); if ($call_name == 'call_user_func' || $call_name == 'call_user_func_array') { $params = $call->getChildByIndex(1)->getChildren(); if (!count($params)) { // This is a bare call_user_func() with no arguments; just ignore it. continue; } $symbol = array_shift($params); $symbol_value = $symbol->getStringLiteralValue(); if ($symbol_value) { $need[] = array( 'type' => 'function', 'name' => $symbol_value, 'symbol' => $symbol, ); } } else { $need[] = array( 'type' => 'function', 'symbol' => $name, ); } } // -( Classes )--------------------------------------------------------------- // Find classes declared by this file. // This is "class X ... { ... }". $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $class_name = $class->getChildByIndex(1); $have[] = array( 'type' => 'class', 'symbol' => $class_name, ); } // Find classes used by this file. We identify these: // // - class ... extends X // - new X // - Static method call // - Static property access // - Use of class constant // // TODO: Possibly support these: // // - typehints // - instanceof // - catch // - String literal in ReflectionClass(). // - String literal in array literal in call_user_func()/call_user_func_array() // This is "class X ... { ... }". $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $class_name = $class->getChildByIndex(1)->getConcreteString(); $extends = $class->getChildByIndex(2); foreach ($extends->selectDescendantsOfType('n_CLASS_NAME') as $parent) { $need[] = array( 'type' => 'class', 'symbol' => $parent, ); // Track all 'extends' in the extension map. $xmap[$class_name][] = $parent->getConcreteString(); } } // This is "new X()". $uses_of_new = $root->selectDescendantsOfType('n_NEW'); foreach ($uses_of_new as $new_operator) { $name = $new_operator->getChildByIndex(0); if ($name->getTypeName() == 'n_VARIABLE' || $name->getTypeName() == 'n_VARIABLE_VARIABLE') { continue; } $need[] = array( 'type' => 'class', 'symbol' => $name, ); } // This covers all of "X::$y", "X::y()" and "X::CONST". $static_uses = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); foreach ($static_uses as $static_use) { $name = $static_use->getChildByIndex(0); if ($name->getTypeName() != 'n_CLASS_NAME') { continue; } $name_concrete = $name->getConcreteString(); $magic_names = array( 'static' => true, 'parent' => true, 'self' => true, ); if (isset($magic_names[$name_concrete])) { continue; } $need[] = array( 'type' => 'class', 'symbol' => $name, ); } // -( Interfaces )------------------------------------------------------------ // Find interfaces declared in ths file. // This is "interface X .. { ... }". $interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); foreach ($interfaces as $interface) { $interface_name = $interface->getChildByIndex(1); $have[] = array( 'type' => 'interface', 'symbol' => $interface_name, ); } // Find interfaces used by this file. We identify these: // // - class ... implements X // - interface ... extends X // This is "class X ... { ... }". $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $class_name = $class->getChildByIndex(1)->getConcreteString(); $implements = $class->getChildByIndex(3); $interfaces = $implements->selectDescendantsOfType('n_CLASS_NAME'); foreach ($interfaces as $interface) { $need[] = array( 'type' => 'interface', 'symbol' => $interface, ); // Track 'class ... implements' in the extension map. $xmap[$class_name][] = $interface->getConcreteString(); } } // This is "interface X ... { ... }". $interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); foreach ($interfaces as $interface) { $interface_name = $interface->getChildByIndex(1)->getConcreteString(); $extends = $interface->getChildByIndex(2); foreach ($extends->selectDescendantsOfType('n_CLASS_NAME') as $parent) { $need[] = array( 'type' => 'interface', 'symbol' => $parent, ); // Track 'interface ... extends' in the extension map. $xmap[$interface_name][] = $parent->getConcreteString(); } } // -( Analysis )-------------------------------------------------------------- $declared_symbols = array(); foreach ($have as $key => $spec) { $name = $spec['symbol']->getConcreteString(); $declared_symbols[$spec['type']][$name] = $spec['symbol']->getOffset(); } $required_symbols = array(); foreach ($need as $key => $spec) { $name = idx($spec, 'name'); if (!$name) { $name = $spec['symbol']->getConcreteString(); } $type = $spec['type']; if (!$show_all) { if (!empty($externals[$type][$name])) { // Ignore symbols declared as externals. continue; } if (!empty($builtins[$type][$name])) { // Ignore symbols declared as builtins. continue; } } if (!empty($required_symbols[$type][$name])) { // Report only the first use of a symbol, since reporting all of them // isn't terribly informative. continue; } if (!empty($declared_symbols[$type][$name])) { // We declare this symbol, so don't treat it as a requirement. continue; } $required_symbols[$type][$name] = $spec['symbol']->getOffset(); } $result = array( 'have' => $declared_symbols, 'need' => $required_symbols, 'xmap' => $xmap, ); // -( Output )---------------------------------------------------------------- if ($args->getArg('ugly')) { echo json_encode($result); } else { $json = new PhutilJSON(); echo $json->encodeFormatted($result); } // -( Library )--------------------------------------------------------------- function phutil_symbols_get_builtins() { $builtin_classes = get_declared_classes(); $builtin_interfaces = get_declared_interfaces(); $builtin_functions = get_defined_functions(); $builtin_functions = $builtin_functions['internal']; return array( 'class' => array_fill_keys($builtin_classes, true) + array( 'PhutilBootloader' => true, ), 'function' => array_filter( array( 'empty' => true, 'isset' => true, 'die' => true, // These are provided by libphutil but not visible in the map. 'phutil_is_windows' => true, 'phutil_load_library' => true, 'phutil_is_hiphop_runtime' => true, // HPHP/i defines these functions as 'internal', but they are NOT // builtins and do not exist in vanilla PHP. Make sure we don't mark // them as builtin since we need to add dependencies for them. 'idx' => false, 'id' => false, ) + array_fill_keys($builtin_functions, true)), 'interface' => array_fill_keys($builtin_interfaces, true), ); }