diff --git a/scripts/crossref/generate_php_symbols.php b/scripts/symbols/generate_php_symbols.php similarity index 98% rename from scripts/crossref/generate_php_symbols.php rename to scripts/symbols/generate_php_symbols.php index 2a8de4738f..d648d0f010 100755 --- a/scripts/crossref/generate_php_symbols.php +++ b/scripts/symbols/generate_php_symbols.php @@ -1,80 +1,80 @@ #!/usr/bin/env php limit(8) as $file => $future) { $tree = XHPASTTree::newFromDataAndResolvedExecFuture( $data[$file], $future->resolve()); $root = $tree->getRootNode(); $functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); foreach ($functions as $function) { $name = $function->getChildByIndex(2); print_symbol($file, 'function', $name); } $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); foreach ($classes as $class) { $class_name = $class->getChildByIndex(1); print_symbol($file, 'class', $class_name); } $interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); foreach ($interfaces as $interface) { $interface_name = $interface->getChildByIndex(1); print_symbol($file, 'interface', $interface_name); } } function print_symbol($file, $type, $token) { $parts = array( $token->getConcreteString(), $type, 'php', $token->getLineNumber(), '/'.ltrim($file, './'), ); echo implode(' ', $parts)."\n"; } diff --git a/scripts/crossref/import_project_symbols.php b/scripts/symbols/import_project_symbols.php similarity index 99% rename from scripts/crossref/import_project_symbols.php rename to scripts/symbols/import_project_symbols.php index fde7fb9ebd..84e5cea608 100755 --- a/scripts/crossref/import_project_symbols.php +++ b/scripts/symbols/import_project_symbols.php @@ -1,153 +1,155 @@ #!/usr/bin/env php loadOneWhere( 'name = %s', $project_name); if (!$project) { // TODO: Provide a less silly way to do this explicitly, or just do it right // here. echo "Project '{$project_name}' is unknown. Upload a diff to implicitly ". "create it.\n"; exit(1); } echo "Parsing input from stdin...\n"; $input = file_get_contents('php://stdin'); $input = trim($input); $input = explode("\n", $input); $map = array(); $symbols = array(); foreach ($input as $key => $line) { $line_no = $key + 1; $matches = null; $ok = preg_match('/^([^ ]+) ([^ ]+) ([^ ]+) (\d+) (.*)$/', $line, $matches); if (!$ok) { throw new Exception( "Line #{$line_no} of input is invalid. Expected five space-delimited ". "fields: symbol name, symbol type, symbol language, line number, path. ". "For example:\n\n". "idx function php 13 /path/to/some/file.php\n\n". "Actual line was:\n\n". "{$line}"); } list($all, $name, $type, $lang, $line_number, $path) = $matches; if (isset($map[$name][$type][$lang])) { $previous = $map[$name][$type][$lang] + 1; throw new Exception( "Line #{$line_no} of input is invalid. It specifies a duplicate symbol ". "(same name, language, and type) which has already been defined ". "elsewhere. You must preprocess the symbol list to remove duplicates ". "and choose exactly one master definition for each symbol. This symbol ". "was previously defined on line #{$previous}.\n\n". "Line #{$line_no}:\n". $line."\n\n". "Line #{$previous}:\n". $input[$previous - 1]); } else { $map[$name][$type][$lang] = $key; } if (strlen($name) > 128) { throw new Exception( "Symbol name '{$name}' defined on line #{$line_no} is too long, maximum ". "symbol name length is 128 characters."); } if (strlen($type) > 12) { throw new Exception( "Symbol type '{$type}' defined on line #{$line_no} is too long, maximum ". "symbol type length is 12 characters."); } if (strlen($lang) > 32) { throw new Exception( "Symbol language '{$lang}' defined on line #{$line_no} is too long, ". "maximum symbol language length is 32 characters."); } if (!strlen($path) || $path[0] != 0) { throw new Exception( "Path '{$path}' defined on line #{$line_no} is invalid. Paths should be ". "begin with '/' and specify a path from the root of the project, like ". "'/src/utils/utils.php'."); } $symbols[] = array( 'name' => $name, 'type' => $type, 'lang' => $lang, 'line' => $line_number, 'path' => $path, ); } echo "Looking up path IDs...\n"; $path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths( ipull($symbols, 'path')); $symbol = new PhabricatorRepositorySymbol(); $conn_w = $symbol->establishConnection('w'); echo "Preparing queries...\n"; $sql = array(); foreach ($symbols as $dict) { $sql[] = qsprintf( $conn_w, '(%d, %s, %s, %s, %d, %d)', $project->getID(), $dict['name'], $dict['type'], $dict['lang'], $dict['line'], $path_map[$dict['path']]); } echo "Purging old symbols...\n"; queryfx( $conn_w, 'DELETE FROM %T WHERE arcanistProjectID = %d', $symbol->getTableName(), $project->getID()); echo "Loading ".number_format(count($sql))." symbols...\n"; foreach (array_chunk($sql, 128) as $chunk) { queryfx( $conn_w, 'INSERT INTO %T (arcanistProjectID, symbolName, symbolType, symbolLanguage, lineNumber, pathID) VALUES %Q', $symbol->getTableName(), implode(', ', $chunk)); } echo "Done.\n"; diff --git a/src/docs/userguide/diffusion_symbols.diviner b/src/docs/userguide/diffusion_symbols.diviner new file mode 100644 index 0000000000..d17f95560d --- /dev/null +++ b/src/docs/userguide/diffusion_symbols.diviner @@ -0,0 +1,84 @@ +@title Diffusion User Guide: Symbol Indexes +@group userguide + +Guide to configuring and using the symbol index. + += Overview = + +Phabricator can maintain a symbol index, which keeps track of where classes +and functions are defined in the codebase. Once you set up indexing, you can +use the index to do things like: + + - link symbol uses in Differential code reviews to their definitions + - allow you to search for symbols + - let the IRC bot answer questions like "Where is SomeClass?" + +NOTE: Symbol indexing is somewhat new, and has broader support for PHP than for +other languages. + += Populating the Index = + +To populate the index, you need to write a script which identifies symbols in +your codebase and set up a cronjob which pipes its output to: + + ./scripts/symbols/import_project_symbols.php + +Phabricator includes a script which can identify symbols in PHP projects: + + ./scripts/symbols/generate_php_symbols.php + +If you want to identify symbols from another language, you need to write a +script which can export them (for example, maybe by parsing a ##ctags## file). + +The output format of the script should be one symbol per line: + + + +For example: + + ExampleClass class php 13 /src/classes/ExampleClass.php + +Your script should enumerate all the symbols in your project, and provide paths +from the project root (where ".arcconfig" is) beginning with a "/". If there are +any duplicate symbols, it should include logic to pick the "best" one -- symbol +names must be unique within a project, type and language. + +You can look at ##generate_php_symbols.php## for an example of how you might +write such a script, and run this command to see its output: + + $ cd phabricator/ + $ find . -type f -name '*.php' | ./scripts/symbols/generate_php_symbols.php + +To actually build the symbol index, pipe this data to the +##import_project_symbols.php## script, providing the project name: + + $ ./scripts/symbols/import_project_symbols.php yourproject < symbols_data + +Then just set up a cronjob to run that however often you like. + +You can test that the import worked by querying for symbols using the Conduit +method ##differential.findsymbols##. Some features (like that method, and the +IRC bot integration) will start working immediately. Others will require more +configuration. + += Configuring Differential Integration = + +To configure Differential integration, you need to tell Phabricator which +projects have symbol indexes you want to use, and which other projects they +should pull symbols from. To do this, go to +##Repositories -> Arcanist Projects -> Edit## as an administrator. You need to +fill out these fields: + + - **Repository**: Associate the project with a tracked repository. + - **Indexed Languages**: Fill in all the languages you've built indexes for. + - **Uses Symbols From**: If this project depends on other projects, add the + other projects which symbols should be looked for here. For example, + Phabricator lists "Arcanist" and "libphutil" because it uses classes and + functions from these projects. + +Once you've configured a project, new revisions in that project will +automatically link symbols in Differential. + +NOTE: Because this feature depends on the syntax highlighter, it will work +better for some languages than others. It currently works fairly well for PHP, +but your milage may vary for other languages.