diff --git a/scripts/symbols/generate_ctags_symbols.php b/scripts/symbols/generate_ctags_symbols.php index a665d6082a..bce4f1bf98 100755 --- a/scripts/symbols/generate_ctags_symbols.php +++ b/scripts/symbols/generate_ctags_symbols.php @@ -1,135 +1,140 @@ #!/usr/bin/env php limit(8) as $file => $future) { $tags = $future->resolve(); $tags = explode("\n", $tags[1]); foreach ($tags as $tag) { $parts = explode(";", $tag); // skip lines that we can not parse if (count($parts) < 2) { continue; } // split ctags information $tag_info = explode("\t", $parts[0]); // split exuberant ctags "extension fields" (additional information) $parts[1] = trim($parts[1], "\t \""); $extension_fields = explode("\t", $parts[1]); // skip lines that we can not parse if (count($tag_info) < 3 || count($extension_fields) < 2) { continue; } // default $context to empty $extension_fields[] = ''; list($token, $file_path, $line_num) = $tag_info; list($type, $language, $context) = $extension_fields; + // skip lines with tokens containing a space + if (strpos($token, ' ') !== false) { + continue; + } + // strip "language:" $language = substr($language, 9); // To keep consistent with "Separate with commas, for example: php, py" // in Arcanist Project edit form. $language = str_ireplace("python", "py", $language); // also, "normalize" c++ and c# $language = str_ireplace("c++", "cpp", $language); $language = str_ireplace("c#", "csharp", $language); // Ruby has "singleton method", for example $type = substr(str_replace(' ', '_', $type), 0, 12); // class:foo, struct:foo, union:foo, enum:foo, ... $context = last(explode(':', $context, 2)); $ignore = array( 'variable' => true, ); if (empty($ignore[$type])) { print_symbol($file_path, $line_num, $type, $token, $context, $language); } } } function ctags_get_parser_future($file_path) { $future = new ExecFuture('ctags -n --fields=Kls -o - %s', $file_path); return $future; } function ctags_check_executable() { $future = new ExecFuture('ctags --version'); $result = $future->resolve(); if (empty($result[1])) { return false; } return true; } function print_symbol($file, $line_num, $type, $token, $context, $language) { // get rid of relative path $file = explode('/', $file); if ($file[0] == '.' || $file[0] == "..") { array_shift($file); } $file = '/' . implode('/', $file); $parts = array( $context, $token, $type, strtolower($language), $line_num, $file, ); echo implode(' ', $parts)."\n"; } diff --git a/scripts/symbols/import_project_symbols.php b/scripts/symbols/import_project_symbols.php index b09fc6f377..0892e27d61 100755 --- a/scripts/symbols/import_project_symbols.php +++ b/scripts/symbols/import_project_symbols.php @@ -1,179 +1,192 @@ #!/usr/bin/env php setSynopsis(<<parseStandardArguments(); $args->parse( array( array( 'name' => 'no-purge', 'help' => 'Do not clear all symbols for this project before '. 'uploading new symbols. Useful for incremental updating.', ), + array( + 'name' => 'ignore-errors', + 'help' => 'If a line can\'t be parsed, ignore that line and '. + 'continue instead of exiting.', + ), array( 'name' => 'more', 'wildcard' => true, ), )); $more = $args->getArg('more'); if (count($more) !== 1) { $args->printHelpAndExit(); } $project_name = head($more); $project = id(new PhabricatorRepositoryArcanistProject())->loadOneWhere( 'name = %s', $project_name); if (!$project) { // TODO: Provide a less silly way to do this explicitly, or just do it right // here. echo "Project '{$project_name}' is unknown. Upload a diff to implicitly ". "create it.\n"; exit(1); } echo "Parsing input from stdin...\n"; $input = file_get_contents('php://stdin'); $input = trim($input); $input = explode("\n", $input); $symbols = array(); foreach ($input as $key => $line) { - $line_no = $key + 1; - $matches = null; - $ok = preg_match( - '/^((?P[^ ]+)? )?(?P[^ ]+) (?P[^ ]+) '. - '(?P[^ ]+) (?P\d+) (?P.*)$/', - $line, - $matches); - if (!$ok) { - throw new Exception( - "Line #{$line_no} of input is invalid. Expected five or six ". - "space-delimited fields: maybe symbol context, symbol name, symbol ". - "type, symbol language, line number, path. ". - "For example:\n\n". - "idx function php 13 /path/to/some/file.php\n\n". - "Actual line was:\n\n". - "{$line}"); - } - if (empty($matches['context'])) { - $matches['context'] = ''; - } - $context = $matches['context']; - $name = $matches['name']; - $type = $matches['type']; - $lang = $matches['lang']; - $line_number = $matches['line']; - $path = $matches['path']; - - if (strlen($context) > 128) { - throw new Exception( - "Symbol context '{$context}' defined on line #{$line_no} is too long, ". - "maximum symbol context length is 128 characters."); - } - - if (strlen($name) > 128) { - throw new Exception( - "Symbol name '{$name}' defined on line #{$line_no} is too long, maximum ". - "symbol name length is 128 characters."); - } - - if (strlen($type) > 12) { - throw new Exception( - "Symbol type '{$type}' defined on line #{$line_no} is too long, maximum ". - "symbol type length is 12 characters."); - } - - if (strlen($lang) > 32) { - throw new Exception( - "Symbol language '{$lang}' defined on line #{$line_no} is too long, ". - "maximum symbol language length is 32 characters."); + try { + $line_no = $key + 1; + $matches = null; + $ok = preg_match( + '/^((?P[^ ]+)? )?(?P[^ ]+) (?P[^ ]+) '. + '(?P[^ ]+) (?P\d+) (?P.*)$/', + $line, + $matches); + if (!$ok) { + throw new Exception( + "Line #{$line_no} of input is invalid. Expected five or six ". + "space-delimited fields: maybe symbol context, symbol name, symbol ". + "type, symbol language, line number, path. ". + "For example:\n\n". + "idx function php 13 /path/to/some/file.php\n\n". + "Actual line was:\n\n". + "{$line}"); + } + if (empty($matches['context'])) { + $matches['context'] = ''; + } + $context = $matches['context']; + $name = $matches['name']; + $type = $matches['type']; + $lang = $matches['lang']; + $line_number = $matches['line']; + $path = $matches['path']; + + if (strlen($context) > 128) { + throw new Exception( + "Symbol context '{$context}' defined on line #{$line_no} is too long, ". + "maximum symbol context length is 128 characters."); + } + + if (strlen($name) > 128) { + throw new Exception( + "Symbol name '{$name}' defined on line #{$line_no} is too long, ". + "maximum symbol name length is 128 characters."); + } + + if (strlen($type) > 12) { + throw new Exception( + "Symbol type '{$type}' defined on line #{$line_no} is too long, ". + "maximum symbol type length is 12 characters."); + } + + if (strlen($lang) > 32) { + throw new Exception( + "Symbol language '{$lang}' defined on line #{$line_no} is too long, ". + "maximum symbol language length is 32 characters."); + } + + if (!strlen($path) || $path[0] != 0) { + throw new Exception( + "Path '{$path}' defined on line #{$line_no} is invalid. Paths should ". + "begin with '/' and specify a path from the root of the project, like ". + "'/src/utils/utils.php'."); + } + + $symbols[] = array( + 'ctxt' => $context, + 'name' => $name, + 'type' => $type, + 'lang' => $lang, + 'line' => $line_number, + 'path' => $path, + ); + } catch (Exception $e) { + if ($args->getArg('ignore-errors')) { + continue; + } else { + throw $e; + } } - - if (!strlen($path) || $path[0] != 0) { - throw new Exception( - "Path '{$path}' defined on line #{$line_no} is invalid. Paths should be ". - "begin with '/' and specify a path from the root of the project, like ". - "'/src/utils/utils.php'."); - } - - $symbols[] = array( - 'ctxt' => $context, - 'name' => $name, - 'type' => $type, - 'lang' => $lang, - 'line' => $line_number, - 'path' => $path, - ); } echo "Looking up path IDs...\n"; $path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths( ipull($symbols, 'path')); $symbol = new PhabricatorRepositorySymbol(); $conn_w = $symbol->establishConnection('w'); echo "Preparing queries...\n"; $sql = array(); foreach ($symbols as $dict) { $sql[] = qsprintf( $conn_w, '(%d, %s, %s, %s, %s, %d, %d)', $project->getID(), $dict['ctxt'], $dict['name'], $dict['type'], $dict['lang'], $dict['line'], $path_map[$dict['path']]); } if (!$args->getArg('no-purge')) { echo "Purging old symbols...\n"; queryfx( $conn_w, 'DELETE FROM %T WHERE arcanistProjectID = %d', $symbol->getTableName(), $project->getID()); } echo "Loading ".number_format(count($sql))." symbols...\n"; foreach (array_chunk($sql, 128) as $chunk) { queryfx( $conn_w, 'INSERT INTO %T (arcanistProjectID, symbolContext, symbolName, symbolType, symbolLanguage, lineNumber, pathID) VALUES %Q', $symbol->getTableName(), implode(', ', $chunk)); } echo "Done.\n";