diff --git a/src/applications/repository/engine/__tests__/data/CHC.svn.tgz b/src/applications/repository/engine/__tests__/data/CHC.svn.tgz new file mode 100644 index 0000000000..93da53515b Binary files /dev/null and b/src/applications/repository/engine/__tests__/data/CHC.svn.tgz differ diff --git a/src/applications/repository/worker/__tests__/PhabricatorChangeParserTestCase.php b/src/applications/repository/worker/__tests__/PhabricatorChangeParserTestCase.php index 913fa4678f..1194c8922b 100644 --- a/src/applications/repository/worker/__tests__/PhabricatorChangeParserTestCase.php +++ b/src/applications/repository/worker/__tests__/PhabricatorChangeParserTestCase.php @@ -1,469 +1,665 @@ buildDiscoveredRepository('CHA'); $viewer = PhabricatorUser::getOmnipotentUser(); $commits = id(new DiffusionCommitQuery()) ->setViewer($viewer) ->withRepositoryIDs(array($repository->getID())) ->execute(); $this->expectChanges( $repository, $commits, array( // 8ebb73c add +x '8ebb73c3f127625ad090472f4f3bfc72804def54' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892449, ), array( '/file_moved', null, null, DifferentialChangeType::TYPE_CHANGE, DifferentialChangeType::FILE_NORMAL, 1, 1389892449, ), ), // ee9c790 add symlink 'ee9c7909e012da7d75e8e1293c7803a6e73ac26a' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892436, ), array( '/file_link', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_SYMLINK, 1, 1389892436, ), ), // 7260ca4 add directory file '7260ca4b6cec35e755bb5365c4ccdd3f1977772e' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892408, ), array( '/dir', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_DIRECTORY, 1, 1389892408, ), array( '/dir/subfile', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_NORMAL, 1, 1389892408, ), ), // 1fe783c move a file '1fe783cf207c1e5f3e01650d2d9cb80b8a707f0e' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892388, ), array( '/file', null, null, DifferentialChangeType::TYPE_MOVE_AWAY, DifferentialChangeType::FILE_NORMAL, 1, 1389892388, ), array( '/file_moved', '/file', '1fe783cf207c1e5f3e01650d2d9cb80b8a707f0e', DifferentialChangeType::TYPE_MOVE_HERE, DifferentialChangeType::FILE_NORMAL, 1, 1389892388, ), ), // 376af8c copy a file '376af8cd8f5b96ec55b7d9a86ccc85b8df8fb833' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892377, ), array( '/file', null, null, DifferentialChangeType::TYPE_COPY_AWAY, DifferentialChangeType::FILE_NORMAL, 0, 1389892377, ), array( '/file_copy', '/file', '376af8cd8f5b96ec55b7d9a86ccc85b8df8fb833', DifferentialChangeType::TYPE_COPY_HERE, DifferentialChangeType::FILE_NORMAL, 1, 1389892377, ), ), // ece6ea6 changed a file 'ece6ea6c6836e8b11a103e21707b8f30e6840c94' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1389892352, ), array( '/file', null, null, DifferentialChangeType::TYPE_CHANGE, DifferentialChangeType::FILE_NORMAL, 1, 1389892352, ), ), // 513103f added a file '513103f65b8413dd2f1a1b5c1d4852a4a598540f' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, // This is the initial commit and technically created this // directory; arguably the parser should figure this out and // mark this as a direct change. 0, 1389892330, ), array( '/file', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_NORMAL, 1, 1389892330, ), ), )); } public function testMercurialParser() { $repository = $this->buildDiscoveredRepository('CHB'); $viewer = PhabricatorUser::getOmnipotentUser(); $commits = id(new DiffusionCommitQuery()) ->setViewer($viewer) ->withRepositoryIDs(array($repository->getID())) ->execute(); $this->expectChanges( $repository, $commits, array( '970357a2dc4264060e65d68e42240bb4e5984085' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249395, ), array( '/file_moved', null, null, DifferentialChangeType::TYPE_CHANGE, DifferentialChangeType::FILE_NORMAL, 1, 1390249395, ), ), 'fbb49af9788e5dbffbc05a060b680df1fd457be3' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249380, ), array( '/file_link', null, null, DifferentialChangeType::TYPE_ADD, // TODO: This is not correct, and should be FILE_SYMLINK. See // note in the parser about this. This is a known bug. DifferentialChangeType::FILE_NORMAL, 1, 1390249380, ), ), '0e8d3465944c7ed7a7c139da7edc652cf80dba69' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249342, ), array( '/dir', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_DIRECTORY, 1, 1390249342, ), array( '/dir/subfile', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_NORMAL, 1, 1390249342, ), ), '22c75131ff15c8a44d7a729c4542b7f4c8ed27f4' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249320, ), array( '/file', null, null, DifferentialChangeType::TYPE_MOVE_AWAY, DifferentialChangeType::FILE_NORMAL, 1, 1390249320, ), array( '/file_moved', '/file', '22c75131ff15c8a44d7a729c4542b7f4c8ed27f4', DifferentialChangeType::TYPE_MOVE_HERE, DifferentialChangeType::FILE_NORMAL, 1, 1390249320, ), ), 'd9d252df30cb7251ad3ea121eff30c7d2e36dd67' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249308, ), array( '/file', null, null, DifferentialChangeType::TYPE_COPY_AWAY, DifferentialChangeType::FILE_NORMAL, 0, 1390249308, ), array( '/file_copy', '/file', 'd9d252df30cb7251ad3ea121eff30c7d2e36dd67', DifferentialChangeType::TYPE_COPY_HERE, DifferentialChangeType::FILE_NORMAL, 1, 1390249308, ), ), '1fc0445d5e3d0f33e9dcbb68bbe419a847460d25' => array( array( '/', null, null, DifferentialChangeType::TYPE_CHILD, DifferentialChangeType::FILE_DIRECTORY, 0, 1390249294, ), array( '/file', null, null, DifferentialChangeType::TYPE_CHANGE, DifferentialChangeType::FILE_NORMAL, 1, 1390249294, ), ), '61518e196efb7f80700333cc0d00634c2578871a' => array( array( '/', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_DIRECTORY, 1, 1390249286, ), array( '/file', null, null, DifferentialChangeType::TYPE_ADD, DifferentialChangeType::FILE_NORMAL, 1, 1390249286, ), ), )); } + public function testSubversionParser() { + $repository = $this->buildDiscoveredRepository('CHC'); + $viewer = PhabricatorUser::getOmnipotentUser(); + + $commits = id(new DiffusionCommitQuery()) + ->setViewer($viewer) + ->withRepositoryIDs(array($repository->getID())) + ->execute(); + + $this->expectChanges( + $repository, + $commits, + array( + '7' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 7, + ), + array( + '/file_moved', + null, + null, + DifferentialChangeType::TYPE_CHANGE, + DifferentialChangeType::FILE_NORMAL, + 1, + 7, + ), + ), + + '6' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 6, + ), + array( + '/file_link', + null, + null, + DifferentialChangeType::TYPE_ADD, + // TODO: This is not correct, and should be FILE_SYMLINK. + DifferentialChangeType::FILE_NORMAL, + 1, + 6, + ), + ), + + '5' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 5, + ), + array( + '/dir', + null, + null, + DifferentialChangeType::TYPE_ADD, + DifferentialChangeType::FILE_DIRECTORY, + 1, + 5, + ), + array( + '/dir/subfile', + null, + null, + DifferentialChangeType::TYPE_ADD, + DifferentialChangeType::FILE_NORMAL, + 1, + 5, + ), + ), + + '4' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 4, + ), + array( + '/file', + null, + null, + DifferentialChangeType::TYPE_MOVE_AWAY, + DifferentialChangeType::FILE_NORMAL, + 1, + 4, + ), + array( + '/file_moved', + '/file', + '2', + DifferentialChangeType::TYPE_MOVE_HERE, + DifferentialChangeType::FILE_NORMAL, + 1, + 4, + ), + ), + + '3' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 3, + ), + array( + '/file', + null, + null, + DifferentialChangeType::TYPE_COPY_AWAY, + DifferentialChangeType::FILE_NORMAL, + 0, + 3, + ), + array( + '/file_copy', + '/file', + '2', + DifferentialChangeType::TYPE_COPY_HERE, + DifferentialChangeType::FILE_NORMAL, + 1, + 3, + ), + ), + + '2' => array( + array( + '/', + null, + null, + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 2, + ), + array( + '/file', + null, + null, + DifferentialChangeType::TYPE_CHANGE, + DifferentialChangeType::FILE_NORMAL, + 1, + 2, + ), + ), + + '1' => array( + array( + '/', + null, + null, + // The Git and Svn parsers don't recognize the first commit as + // creating "/", while the Mercurial parser does. All the parsers + // should probably behave like the Mercurial parser. + DifferentialChangeType::TYPE_CHILD, + DifferentialChangeType::FILE_DIRECTORY, + 0, + 1, + ), + array( + '/file', + null, + null, + DifferentialChangeType::TYPE_ADD, + DifferentialChangeType::FILE_NORMAL, + 1, + 1, + ), + ), + )); + } + private function expectChanges( PhabricatorRepository $repository, array $commits, array $expect) { switch ($repository->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $parser = 'PhabricatorRepositoryGitCommitChangeParserWorker'; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $parser = 'PhabricatorRepositoryMercurialCommitChangeParserWorker'; break; + case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: + $parser = 'PhabricatorRepositorySvnCommitChangeParserWorker'; + break; default: throw new Exception(pht('No support yet.')); } foreach ($commits as $commit) { $commit_identifier = $commit->getCommitIdentifier(); $expect_changes = idx($expect, $commit_identifier); if ($expect_changes === null) { $this->assertEqual( $commit_identifier, null, pht( 'No test entry for commit "%s" in repository "%s"!', $commit_identifier, $repository->getCallsign())); } $parser_object = newv($parser, array(array())); $changes = $parser_object->parseChangesForUnitTest($repository, $commit); $path_map = id(new DiffusionPathQuery()) ->withPathIDs(mpull($changes, 'getPathID')) ->execute(); $path_map = ipull($path_map, 'path'); $target_commits = array_filter(mpull($changes, 'getTargetCommitID')); if ($target_commits) { $commits = id(new DiffusionCommitQuery()) ->setViewer(PhabricatorUser::getOmnipotentUser()) ->withIDs($target_commits) ->execute(); $target_commits = mpull($commits, 'getCommitIdentifier', 'getID'); } $dicts = array(); foreach ($changes as $key => $change) { $target_path = idx($path_map, $change->getTargetPathID()); $target_commit = idx($target_commits, $change->getTargetCommitID()); $dicts[$key] = array( $path_map[(int)$change->getPathID()], $target_path, - $target_commit, + $target_commit ? (string)$target_commit : null, (int)$change->getChangeType(), (int)$change->getFileType(), (int)$change->getIsDirect(), (int)$change->getCommitSequence(), ); } $dicts = ipull($dicts, null, 0); $expect_changes = ipull($expect_changes, null, 0); ksort($dicts); ksort($expect_changes); $this->assertEqual($expect_changes, $dicts); } } } diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php index b09eefdda6..2aaaf17c33 100644 --- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php +++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php @@ -1,791 +1,782 @@ getSubversionPathURI(); $svn_commit = $commit->getCommitIdentifier(); // Pull the top-level path changes out of "svn log". This is pretty // straightforward; just parse the XML log. - $log = $this->getSVNLogXMLObject($uri, $svn_commit); + $log = $this->getSVNLogXMLObject($repository, $uri, $svn_commit); $entry = $log->logentry[0]; if (!$entry->paths) { // TODO: Explicitly mark this commit as broken elsewhere? This isn't // supposed to happen but we have some cases like rE27 and rG935 in the // Facebook repositories where things got all clowned up. return; } $raw_paths = array(); foreach ($entry->paths->path as $path) { $name = trim((string)$path); $raw_paths[$name] = array( 'rawPath' => $name, 'rawTargetPath' => (string)$path['copyfrom-path'], 'rawChangeType' => (string)$path['action'], 'rawTargetCommit' => (string)$path['copyfrom-rev'], ); } $copied_or_moved_map = array(); $deleted_paths = array(); $add_paths = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawTargetPath']) { $copied_or_moved_map[$raw_info['rawTargetPath']][] = $raw_info; } switch ($raw_info['rawChangeType']) { case 'D': $deleted_paths[$path] = $raw_info; break; case 'A': case 'R': $add_paths[$path] = $raw_info; break; } } // If a path was deleted, we need to look in the repository history to // figure out where the former valid location for it is so we can figure out // if it was a directory or not, among other things. $lookup_here = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] != 'D') { continue; } // If a change copies a directory and then deletes something from it, // we need to look at the old location for information about the path, not // the new location. This workflow is pretty ridiculous -- so much so that // Trac gets it wrong. See Facebook rO6 for an example, if you happen to // work at Facebook. $parents = $this->expandAllParentPaths($path, $include_self = true); foreach ($parents as $parent) { if (isset($add_paths[$parent])) { $relative_path = substr($path, strlen($parent)); $lookup_here[$path] = array( 'rawPath' => $add_paths[$parent]['rawTargetPath'].$relative_path, 'rawCommit' => $add_paths[$parent]['rawTargetCommit'], ); continue 2; } } // Otherwise we can just look at the previous revision. $lookup_here[$path] = array( 'rawPath' => $path, 'rawCommit' => $svn_commit - 1, ); } $lookup = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] == 'D') { $lookup[$path] = $lookup_here[$path]; } else { // For everything that wasn't deleted, we can just look it up directly. $lookup[$path] = array( 'rawPath' => $path, 'rawCommit' => $svn_commit, ); } } $effects = array(); $path_file_types = $this->lookupPathFileTypes($repository, $lookup); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] == 'D' && $path_file_types[$path] == DifferentialChangeType::FILE_DIRECTORY) { // Bad. Child paths aren't enumerated in "svn log" so we need // to go fishing. $list = $this->lookupRecursiveFileList( $repository, $lookup[$path]); foreach ($list as $deleted_path => $path_file_type) { $deleted_path = rtrim($path.'/'.$deleted_path, '/'); if (!empty($raw_paths[$deleted_path])) { // We somehow learned about this deletion explicitly? // TODO: Unclear how this is possible. continue; } $effect_type = DifferentialChangeType::TYPE_DELETE; $effect_target_path = null; if (isset($copied_or_moved_map[$deleted_path])) { $effect_target_path = $path; if (count($copied_or_moved_map[$deleted_path]) > 1) { $effect_type = DifferentialChangeType::TYPE_MULTICOPY; } else { $effect_type = DifferentialChangeType::TYPE_MOVE_AWAY; } } $effects[$deleted_path] = array( 'rawPath' => $deleted_path, 'rawTargetPath' => $effect_target_path, 'rawTargetCommit' => null, 'rawDirect' => true, 'changeType' => $effect_type, 'fileType' => $path_file_type, ); $deleted_paths[$deleted_path] = $effects[$deleted_path]; } } } $resolved_types = array(); $supplemental = array(); foreach ($raw_paths as $path => $raw_info) { if (isset($resolved_types[$path])) { $type = $resolved_types[$path]; } else { switch ($raw_info['rawChangeType']) { case 'D': if (isset($copied_or_moved_map[$path])) { if (count($copied_or_moved_map[$path]) > 1) { $type = DifferentialChangeType::TYPE_MULTICOPY; } else { $type = DifferentialChangeType::TYPE_MOVE_AWAY; } } else { $type = DifferentialChangeType::TYPE_DELETE; } break; case 'A': $copy_from = $raw_info['rawTargetPath']; $copy_rev = $raw_info['rawTargetCommit']; if (!strlen($copy_from)) { $type = DifferentialChangeType::TYPE_ADD; } else { if (isset($deleted_paths[$copy_from])) { $type = DifferentialChangeType::TYPE_MOVE_HERE; $other_type = DifferentialChangeType::TYPE_MOVE_AWAY; } else { $type = DifferentialChangeType::TYPE_COPY_HERE; $other_type = DifferentialChangeType::TYPE_COPY_AWAY; } $source_file_type = $this->lookupPathFileType( $repository, $copy_from, array( 'rawPath' => $copy_from, 'rawCommit' => $copy_rev, )); if ($source_file_type == DifferentialChangeType::FILE_DELETED) { throw new Exception( "Something is wrong; source of a copy must exist."); } if ($source_file_type != DifferentialChangeType::FILE_DIRECTORY) { if (isset($raw_paths[$copy_from]) || isset($effects[$copy_from])) { break; } $effects[$copy_from] = array( 'rawPath' => $copy_from, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => $other_type, 'fileType' => $source_file_type, ); } else { // ULTRADISASTER. We've added a directory which was copied // or moved from somewhere else. This is the most complex and // ridiculous case. $list = $this->lookupRecursiveFileList( $repository, array( 'rawPath' => $copy_from, 'rawCommit' => $copy_rev, )); foreach ($list as $from_path => $from_file_type) { $full_from = rtrim($copy_from.'/'.$from_path, '/'); $full_to = rtrim($path.'/'.$from_path, '/'); if (empty($raw_paths[$full_to])) { $effects[$full_to] = array( 'rawPath' => $full_to, 'rawTargetPath' => $full_from, 'rawTargetCommit' => $copy_rev, 'rawDirect' => true, 'changeType' => $type, 'fileType' => $from_file_type, ); } else { // This means we picked the file up explicitly elsewhere. // If the file as modified, SVN will drop the copy // information. We need to restore it. $supplemental[$full_to]['rawTargetPath'] = $full_from; $supplemental[$full_to]['rawTargetCommit'] = $copy_rev; if ($raw_paths[$full_to]['rawChangeType'] == 'M') { $resolved_types[$full_to] = $type; } } if (empty($raw_paths[$full_from]) && empty($effects[$full_from])) { if ($other_type == DifferentialChangeType::TYPE_COPY_AWAY) { // Add an indirect effect for the copied file, if we // don't already have an entry for it (e.g., a separate // change). $effects[$full_from] = array( 'rawPath' => $full_from, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => $other_type, 'fileType' => $from_file_type, ); } } } } } break; // This is "replaced", caused by "svn rm"-ing a file, putting another // in its place, and then "svn add"-ing it. We do not distinguish // between this and "M". case 'R': case 'M': if (isset($copied_or_moved_map[$path])) { $type = DifferentialChangeType::TYPE_COPY_AWAY; } else { $type = DifferentialChangeType::TYPE_CHANGE; } break; } } $resolved_types[$path] = $type; } foreach ($raw_paths as $path => $raw_info) { $raw_paths[$path]['changeType'] = $resolved_types[$path]; if (isset($supplemental[$path])) { foreach ($supplemental[$path] as $key => $value) { $raw_paths[$path][$key] = $value; } } } foreach ($raw_paths as $path => $raw_info) { $effects[$path] = array( 'rawPath' => $path, 'rawTargetPath' => $raw_info['rawTargetPath'], 'rawTargetCommit' => $raw_info['rawTargetCommit'], 'rawDirect' => true, 'changeType' => $raw_info['changeType'], 'fileType' => $path_file_types[$path], ); } $parents = array(); foreach ($effects as $path => $effect) { foreach ($this->expandAllParentPaths($path) as $parent_path) { $parents[$parent_path] = true; } } $parents = array_keys($parents); foreach ($parents as $parent) { if (isset($effects[$parent])) { continue; } $effects[$parent] = array( 'rawPath' => $parent, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => DifferentialChangeType::TYPE_CHILD, 'fileType' => DifferentialChangeType::FILE_DIRECTORY, ); } $lookup_paths = array(); foreach ($effects as $effect) { $lookup_paths[$effect['rawPath']] = true; if ($effect['rawTargetPath']) { $lookup_paths[$effect['rawTargetPath']] = true; } } $lookup_paths = array_keys($lookup_paths); $lookup_commits = array(); foreach ($effects as $effect) { if ($effect['rawTargetCommit']) { $lookup_commits[$effect['rawTargetCommit']] = true; } } $lookup_commits = array_keys($lookup_commits); $path_map = $this->lookupOrCreatePaths($lookup_paths); $commit_map = $this->lookupSvnCommits($repository, $lookup_commits); - $this->writeChanges($repository, $commit, $effects, $path_map, $commit_map); $this->writeBrowse($repository, $commit, $effects, $path_map); - return array(); + return $this->buildChanges( + $repository, + $commit, + $effects, + $path_map, + $commit_map); } - private function writeChanges( + private function buildChanges( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, array $effects, array $path_map, array $commit_map) { - $conn_w = $repository->establishConnection('w'); - - $sql = array(); + $results = array(); foreach ($effects as $effect) { - $sql[] = qsprintf( - $conn_w, - '(%d, %d, %d, %nd, %nd, %d, %d, %d, %d)', - $repository->getID(), - $path_map[$effect['rawPath']], - $commit->getID(), - $effect['rawTargetPath'] - ? $path_map[$effect['rawTargetPath']] - : null, - $effect['rawTargetCommit'] - ? $commit_map[$effect['rawTargetCommit']] - : null, - $effect['changeType'], - $effect['fileType'], - $effect['rawDirect'] - ? 1 - : 0, - $commit->getCommitIdentifier()); - } + $path_id = $path_map[$effect['rawPath']]; - queryfx( - $conn_w, - 'DELETE FROM %T WHERE commitID = %d', - PhabricatorRepository::TABLE_PATHCHANGE, - $commit->getID()); - foreach (array_chunk($sql, 512) as $sql_chunk) { - queryfx( - $conn_w, - 'INSERT INTO %T - (repositoryID, pathID, commitID, targetPathID, targetCommitID, - changeType, fileType, isDirect, commitSequence) - VALUES %Q', - PhabricatorRepository::TABLE_PATHCHANGE, - implode(', ', $sql_chunk)); + $target_path_id = null; + if ($effect['rawTargetPath']) { + $target_path_id = $path_map[$effect['rawTargetPath']]; + } + + $target_commit_id = null; + if ($effect['rawTargetCommit']) { + $target_commit_id = $commit_map[$effect['rawTargetCommit']]; + } + + $result = id(new PhabricatorRepositoryParsedChange()) + ->setPathID($path_id) + ->setTargetPathID($target_path_id) + ->setTargetCommitID($target_commit_id) + ->setChangeType($effect['changeType']) + ->setFileType($effect['fileType']) + ->setIsDirect($effect['rawDirect']) + ->setCommitSequence($commit->getCommitIdentifier()); + + $results[] = $result; } + + return $results; } private function writeBrowse( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, array $effects, array $path_map) { $conn_w = $repository->establishConnection('w'); $sql = array(); foreach ($effects as $effect) { $type = $effect['changeType']; if (!$effect['rawDirect']) { if ($type == DifferentialChangeType::TYPE_COPY_AWAY) { // Don't write COPY_AWAY to the filesystem table if it isn't a direct // event. continue; } if ($type == DifferentialChangeType::TYPE_CHILD) { // Don't write CHILD to the filesystem table. Although doing these // writes has the nice property of letting you see when a directory's // contents were last changed, it explodes the table tremendously // and makes Diffusion far slower. continue; } } if ($effect['rawPath'] == '/') { // Don't write any events on '/' to the filesystem table; in // particular, it doesn't have a meaningful parentID. continue; } $existed = !DifferentialChangeType::isDeleteChangeType($type); $sql[] = qsprintf( $conn_w, '(%d, %d, %d, %d, %d, %d)', $repository->getID(), $path_map[$this->getParentPath($effect['rawPath'])], $commit->getCommitIdentifier(), $path_map[$effect['rawPath']], $existed ? 1 : 0, $effect['fileType']); } queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d AND svnCommit = %d', PhabricatorRepository::TABLE_FILESYSTEM, $repository->getID(), $commit->getCommitIdentifier()); foreach (array_chunk($sql, 512) as $sql_chunk) { queryfx( $conn_w, 'INSERT INTO %T (repositoryID, parentID, svnCommit, pathID, existed, fileType) VALUES %Q', PhabricatorRepository::TABLE_FILESYSTEM, implode(', ', $sql_chunk)); } } private function lookupSvnCommits( PhabricatorRepository $repository, array $commits) { if (!$commits) { return array(); } $commit_table = new PhabricatorRepositoryCommit(); $commit_data = queryfx_all( $commit_table->establishConnection('w'), 'SELECT id, commitIdentifier FROM %T WHERE repositoryID = %d AND commitIdentifier in (%Ls)', $commit_table->getTableName(), $repository->getID(), $commits); $commit_map = ipull($commit_data, 'id', 'commitIdentifier'); $need = array(); foreach ($commits as $commit) { if (empty($commit_map[$commit])) { $need[] = $commit; } } // If we are parsing a Subversion repository and have been configured to // import only some subdirectory of it, we may find commits which reference // other foreign commits outside of the directory (for instance, because of // a move or copy). Rather than trying to execute full parses on them, just // create stub commits and identify the stubs as foreign commits. if ($need) { $subpath = $repository->getDetail('svn-subpath'); if (!$subpath) { $commits = implode(', ', $need); throw new Exception( "Missing commits ({$need}) in a SVN repository which is not ". "configured for subdirectory-only parsing!"); } foreach ($need as $foreign_commit) { $commit = new PhabricatorRepositoryCommit(); $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($foreign_commit); $commit->setEpoch(0); $commit->save(); $data = new PhabricatorRepositoryCommitData(); $data->setCommitID($commit->getID()); $data->setAuthorName(''); $data->setCommitMessage(''); $data->setCommitDetails( array( 'foreign-svn-stub' => true, // Denormalize this to make it easier to debug cases where someone // did half a parse and then changed the subdirectory or something // like that. 'svn-subpath' => $subpath, )); $data->save(); $commit_map[$foreign_commit] = $commit->getID(); } } return $commit_map; } private function lookupPathFileType( PhabricatorRepository $repository, $path, array $path_info) { $result = $this->lookupPathFileTypes( $repository, array( $path => $path_info, )); return $result[$path]; } private function lookupPathFileTypes( PhabricatorRepository $repository, array $paths) { $result_map = array(); $repository_uri = $repository->getSubversionPathURI(); if (isset($paths['/'])) { $result_map['/'] = DifferentialChangeType::FILE_DIRECTORY; unset($paths['/']); } $parents = array(); $path_mapping = array(); foreach ($paths as $path => $lookup) { $parent = dirname($lookup['rawPath']); $parent = $repository->getSubversionPathURI( $parent, $lookup['rawCommit']); $parent = escapeshellarg($parent); $parents[$parent] = true; $path_mapping[$parent][] = dirname($path); } // Reverse this list so we can pop $path_mapping, as that's more efficient // than shifting it. We need to associate these maps positionally because // a change can copy the same source path from multiple revisions via // "svn cp path@1 a; svn cp path@2 b;" and the XML output gives us no way // to distinguish which revision we're looking at except based on its // position in the document. $all_paths = array_reverse(array_keys($parents)); foreach (array_chunk($all_paths, 64) as $path_chunk) { list($raw_xml) = $repository->execxRemoteCommand( '--xml ls %C', implode(' ', $path_chunk)); $xml = new SimpleXMLElement($raw_xml); foreach ($xml->list as $list) { $list_path = (string)$list['path']; // SVN is a big mess. See Facebook rG8 (a revision which adds files // with spaces in their names) for an example. $list_path = rawurldecode($list_path); if ($list_path == $repository_uri) { $base = '/'; } else { $base = substr($list_path, strlen($repository_uri)); } $mapping = array_pop($path_mapping); foreach ($list->entry as $entry) { $val = $this->getFileTypeFromSVNKind($entry['kind']); foreach ($mapping as $base_path) { // rtrim() causes us to handle top-level directories correctly. $key = rtrim($base_path, '/').'/'.$entry->name; $result_map[$key] = $val; } } } } foreach ($paths as $path => $lookup) { if (empty($result_map[$path])) { $result_map[$path] = DifferentialChangeType::FILE_DELETED; } } return $result_map; } private function getFileTypeFromSVNKind($kind) { $kind = (string)$kind; switch ($kind) { case 'dir': return DifferentialChangeType::FILE_DIRECTORY; case 'file': return DifferentialChangeType::FILE_NORMAL; default: throw new Exception("Unknown SVN file kind '{$kind}'."); } } private function lookupRecursiveFileList( PhabricatorRepository $repository, array $info) { $path = $info['rawPath']; $rev = $info['rawCommit']; $path_uri = $repository->getSubversionPathURI($path, $rev); $hashkey = md5($path_uri); // This method is quite horrible. The underlying challenge is that some // commits in the Facebook repository are enormous, taking multiple hours // to 'ls -R' out of the repository and producing XML files >1GB in size. // If we try to SimpleXML them, the object exhausts available memory on a // 64G machine. Instead, cache the XML output and then parse it line by line // to limit space requirements. $cache_loc = sys_get_temp_dir().'/diffusion.'.$hashkey.'.svnls'; if (!Filesystem::pathExists($cache_loc)) { $tmp = new TempFile(); $repository->execxRemoteCommand( '--xml ls -R %s > %s', $path_uri, $tmp); execx( 'mv %s %s', $tmp, $cache_loc); } $map = $this->parseRecursiveListFileData($cache_loc); Filesystem::remove($cache_loc); return $map; } private function parseRecursiveListFileData($file_path) { $map = array(); $mode = 'xml'; $done = false; $entry = null; foreach (new LinesOfALargeFile($file_path) as $lno => $line) { switch ($mode) { case 'entry': if ($line == '') { $entry = implode('', $entry); $pattern = '@^\s+kind="(file|dir)">'. '(.*?)'. '((.*?))?@'; $matches = null; if (!preg_match($pattern, $entry, $matches)) { throw new Exception("Unable to parse entry!"); } $map[html_entity_decode($matches[2])] = $this->getFileTypeFromSVNKind($matches[1]); $mode = 'entry-or-end'; } else { $entry[] = $line; } break; case 'entry-or-end': if ($line == '') { $done = true; break 2; } else if ($line == ' or = 1) { array_pop($parts); $parents[] = '/'.implode('/', $parts); } return $parents; } - /** - * This method is kind of awkward here but both the SVN message and - * change parsers use it. - */ - private function getSVNLogXMLObject($uri, $revision) { - list($xml) = $this->repository->execxRemoteCommand( - "log --xml --verbose --limit 1 %s@%d", + private function getSVNLogXMLObject( + PhabricatorRepository $repository, + $uri, + $revision) { + list($xml) = $repository->execxRemoteCommand( + 'log --xml --verbose --limit 1 %s@%d', $uri, $revision); // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); return new SimpleXMLElement($xml); } }