diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 3a3bf6eb24..b2df31969e 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -1,956 +1,975 @@ setViewer($actor) ->withClasses(array('PhabricatorApplicationDiffusion')) ->executeOne(); $view_policy = $app->getPolicy(DiffusionCapabilityDefaultView::CAPABILITY); $edit_policy = $app->getPolicy(DiffusionCapabilityDefaultEdit::CAPABILITY); $push_policy = $app->getPolicy(DiffusionCapabilityDefaultPush::CAPABILITY); return id(new PhabricatorRepository()) ->setViewPolicy($view_policy) ->setEditPolicy($edit_policy) ->setPushPolicy($push_policy); } public function getConfiguration() { return array( self::CONFIG_AUX_PHID => true, self::CONFIG_SERIALIZATION => array( 'details' => self::SERIALIZATION_JSON, ), ) + parent::getConfiguration(); } public function generatePHID() { return PhabricatorPHID::generateNewPHID( PhabricatorRepositoryPHIDTypeRepository::TYPECONST); } public function toDictionary() { return array( 'name' => $this->getName(), 'phid' => $this->getPHID(), 'callsign' => $this->getCallsign(), 'vcs' => $this->getVersionControlSystem(), 'uri' => PhabricatorEnv::getProductionURI($this->getURI()), 'remoteURI' => (string)$this->getPublicRemoteURI(), 'tracking' => $this->getDetail('tracking-enabled'), 'description' => $this->getDetail('description'), ); } public function getDetail($key, $default = null) { return idx($this->details, $key, $default); } public function getHumanReadableDetail($key, $default = null) { $value = $this->getDetail($key, $default); switch ($key) { case 'branch-filter': case 'close-commits-filter': $value = array_keys($value); $value = implode(', ', $value); break; } return $value; } public function setDetail($key, $value) { $this->details[$key] = $value; return $this; } public function attachCommitCount($count) { $this->commitCount = $count; return $this; } public function getCommitCount() { return $this->assertAttached($this->commitCount); } public function attachMostRecentCommit( PhabricatorRepositoryCommit $commit = null) { $this->mostRecentCommit = $commit; return $this; } public function getMostRecentCommit() { return $this->assertAttached($this->mostRecentCommit); } public function getDiffusionBrowseURIForPath( PhabricatorUser $user, $path, $line = null, $branch = null) { $drequest = DiffusionRequest::newFromDictionary( array( 'user' => $user, 'repository' => $this, 'path' => $path, 'branch' => $branch, )); return $drequest->generateURI( array( 'action' => 'browse', 'line' => $line, )); } public function getLocalPath() { return $this->getDetail('local-path'); } public function getSubversionBaseURI() { + $subpath = $this->getDetail('svn-subpath'); + if (!strlen($subpath)) { + $subpath = null; + } + return $this->getSubversionPathURI($subpath); + } + + public function getSubversionPathURI($path = null, $commit = null) { $vcs = $this->getVersionControlSystem(); if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_SVN) { throw new Exception("Not a subversion repository!"); } if ($this->isHosted()) { $uri = 'file://'.$this->getLocalPath(); } else { $uri = $this->getDetail('remote-uri'); } - $subpath = $this->getDetail('svn-subpath'); - if ($subpath) { - $subpath = '/'.ltrim($subpath, '/'); + $uri = rtrim($uri, '/'); + + if (strlen($path)) { + $path = rawurlencode($path); + $path = str_replace('%2F', '/', $path); + $uri = $uri.'/'.ltrim($path, '/'); + } + + if ($path !== null || $commit !== null) { + $uri .= '@'; + } + + if ($commit !== null) { + $uri .= $commit; } - return $uri.$subpath; + return $uri; } public function execRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); $args = $this->formatRemoteCommand($args); return call_user_func_array('exec_manual', $args); } public function execxRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); $args = $this->formatRemoteCommand($args); return call_user_func_array('execx', $args); } public function getRemoteCommandFuture($pattern /* , $arg, ... */) { $args = func_get_args(); $args = $this->formatRemoteCommand($args); return newv('ExecFuture', $args); } public function passthruRemoteCommand($pattern /* , $arg, ... */) { $args = func_get_args(); $args = $this->formatRemoteCommand($args); return call_user_func_array('phutil_passthru', $args); } public function execLocalCommand($pattern /* , $arg, ... */) { $this->assertLocalExists(); $args = func_get_args(); $args = $this->formatLocalCommand($args); return call_user_func_array('exec_manual', $args); } public function execxLocalCommand($pattern /* , $arg, ... */) { $this->assertLocalExists(); $args = func_get_args(); $args = $this->formatLocalCommand($args); return call_user_func_array('execx', $args); } public function getLocalCommandFuture($pattern /* , $arg, ... */) { $this->assertLocalExists(); $args = func_get_args(); $args = $this->formatLocalCommand($args); return newv('ExecFuture', $args); } public function passthruLocalCommand($pattern /* , $arg, ... */) { $this->assertLocalExists(); $args = func_get_args(); $args = $this->formatLocalCommand($args); return call_user_func_array('phutil_passthru', $args); } private function formatRemoteCommand(array $args) { $pattern = $args[0]; $args = array_slice($args, 1); $empty = $this->getEmptyReadableDirectoryPath(); if ($this->shouldUseSSH()) { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $pattern = "SVN_SSH=%s svn --non-interactive {$pattern}"; array_unshift( $args, csprintf( 'ssh -l %P -i %P', new PhutilOpaqueEnvelope($this->getSSHLogin()), new PhutilOpaqueEnvelope($this->getSSHKeyfile()))); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $command = call_user_func_array( 'csprintf', array_merge( array( "(ssh-add %P && HOME=%s git {$pattern})", new PhutilOpaqueEnvelope($this->getSSHKeyfile()), $empty, ), $args)); $pattern = "ssh-agent sh -c %s"; $args = array($command); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $pattern = "hg --config ui.ssh=%s {$pattern}"; array_unshift( $args, csprintf( 'ssh -l %P -i %P', new PhutilOpaqueEnvelope($this->getSSHLogin()), new PhutilOpaqueEnvelope($this->getSSHKeyfile()))); break; default: throw new Exception("Unrecognized version control system."); } } else if ($this->shouldUseHTTP()) { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $pattern = "svn ". "--non-interactive ". "--no-auth-cache ". "--trust-server-cert ". "--username %P ". "--password %P ". $pattern; array_unshift( $args, new PhutilOpaqueEnvelope($this->getDetail('http-login')), new PhutilOpaqueEnvelope($this->getDetail('http-pass'))); break; default: throw new Exception( "No support for HTTP Basic Auth in this version control system."); } } else if ($this->shouldUseSVNProtocol()) { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $pattern = "svn ". "--non-interactive ". "--no-auth-cache ". "--username %P ". "--password %P ". $pattern; array_unshift( $args, new PhutilOpaqueEnvelope($this->getDetail('http-login')), new PhutilOpaqueEnvelope($this->getDetail('http-pass'))); break; default: throw new Exception( "SVN protocol is SVN only."); } } else { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $pattern = "svn --non-interactive {$pattern}"; break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $pattern = "HOME=%s git {$pattern}"; array_unshift($args, $empty); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $pattern = "hg {$pattern}"; break; default: throw new Exception("Unrecognized version control system."); } } array_unshift($args, $pattern); return $args; } private function formatLocalCommand(array $args) { $pattern = $args[0]; $args = array_slice($args, 1); $empty = $this->getEmptyReadableDirectoryPath(); switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: $pattern = "(cd %s && svn --non-interactive {$pattern})"; array_unshift($args, $this->getLocalPath()); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $pattern = "(cd %s && HOME=%s git {$pattern})"; array_unshift($args, $this->getLocalPath(), $empty); break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: $hgplain = (phutil_is_windows() ? "set HGPLAIN=1 &&" : "HGPLAIN=1"); $pattern = "(cd %s && {$hgplain} hg {$pattern})"; array_unshift($args, $this->getLocalPath()); break; default: throw new Exception("Unrecognized version control system."); } array_unshift($args, $pattern); return $args; } private function getEmptyReadableDirectoryPath() { // See T2965. Some time after Git 1.7.5.4, Git started fataling if it can // not read $HOME. For many users, $HOME points at /root (this seems to be // a default result of Apache setup). Instead, explicitly point $HOME at a // readable, empty directory so that Git looks for the config file it's // after, fails to locate it, and moves on. This is really silly, but seems // like the least damaging approach to mitigating the issue. $root = dirname(phutil_get_library_root('phabricator')); return $root.'/support/empty/'; } private function getSSHLogin() { return $this->getDetail('ssh-login'); } private function getSSHKeyfile() { if ($this->sshKeyfile === null) { $key = $this->getDetail('ssh-key'); $keyfile = $this->getDetail('ssh-keyfile'); if ($keyfile) { // Make sure we can read the file, that it exists, etc. Filesystem::readFile($keyfile); $this->sshKeyfile = $keyfile; } else if ($key) { $keyfile = new TempFile('phabricator-repository-ssh-key'); chmod($keyfile, 0600); Filesystem::writeFile($keyfile, $key); $this->sshKeyfile = $keyfile; } else { $this->sshKeyfile = ''; } } return (string)$this->sshKeyfile; } public function getURI() { return '/diffusion/'.$this->getCallsign().'/'; } public function isTracked() { return $this->getDetail('tracking-enabled', false); } public function getDefaultBranch() { $default = $this->getDetail('default-branch'); if (strlen($default)) { return $default; } $default_branches = array( PhabricatorRepositoryType::REPOSITORY_TYPE_GIT => 'master', PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL => 'default', ); return idx($default_branches, $this->getVersionControlSystem()); } public function getDefaultArcanistBranch() { return coalesce($this->getDefaultBranch(), 'svn'); } private function isBranchInFilter($branch, $filter_key) { $vcs = $this->getVersionControlSystem(); $is_git = ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_GIT); $use_filter = ($is_git); if ($use_filter) { $filter = $this->getDetail($filter_key, array()); if ($filter && empty($filter[$branch])) { return false; } } // By default, all branches pass. return true; } public function shouldTrackBranch($branch) { return $this->isBranchInFilter($branch, 'branch-filter'); } public function shouldAutocloseBranch($branch) { if ($this->isImporting()) { return false; } if ($this->getDetail('disable-autoclose', false)) { return false; } return $this->isBranchInFilter($branch, 'close-commits-filter'); } public function shouldAutocloseCommit( PhabricatorRepositoryCommit $commit, PhabricatorRepositoryCommitData $data) { if ($this->getDetail('disable-autoclose', false)) { return false; } switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: return true; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: break; case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return true; default: throw new Exception("Unrecognized version control system."); } $branches = $data->getCommitDetail('seenOnBranches', array()); foreach ($branches as $branch) { if ($this->shouldAutocloseBranch($branch)) { return true; } } return false; } public function formatCommitName($commit_identifier) { $vcs = $this->getVersionControlSystem(); $type_git = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; $type_hg = PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL; $is_git = ($vcs == $type_git); $is_hg = ($vcs == $type_hg); if ($is_git || $is_hg) { $short_identifier = substr($commit_identifier, 0, 12); } else { $short_identifier = $commit_identifier; } return 'r'.$this->getCallsign().$short_identifier; } public function isImporting() { return (bool)$this->getDetail('importing', false); } /* -( Repository URI Management )------------------------------------------ */ /** * Get the remote URI for this repository. * * @return string * @task uri */ public function getRemoteURI() { return (string)$this->getRemoteURIObject(); } /** * Get the remote URI for this repository, without authentication information. * * @return string Repository URI. * @task uri */ public function getPublicRemoteURI() { $uri = $this->getRemoteURIObject(); // Make sure we don't leak anything if this repo is using HTTP Basic Auth // with the credentials in the URI or something zany like that. if ($uri instanceof PhutilGitURI) { if (!$this->getDetail('show-user', false)) { $uri->setUser(null); } } else { if (!$this->getDetail('show-user', false)) { $uri->setUser(null); } $uri->setPass(null); } return (string)$uri; } /** * Get the protocol for the repository's remote. * * @return string Protocol, like "ssh" or "git". * @task uri */ public function getRemoteProtocol() { $uri = $this->getRemoteURIObject(); if ($uri instanceof PhutilGitURI) { return 'ssh'; } else { return $uri->getProtocol(); } } /** * Get a parsed object representation of the repository's remote URI. This * may be a normal URI (returned as a @{class@libphutil:PhutilURI}) or a git * URI (returned as a @{class@libphutil:PhutilGitURI}). * * @return wild A @{class@libphutil:PhutilURI} or * @{class@libphutil:PhutilGitURI}. * @task uri */ public function getRemoteURIObject() { $raw_uri = $this->getDetail('remote-uri'); if (!$raw_uri) { return new PhutilURI(''); } if (!strncmp($raw_uri, '/', 1)) { return new PhutilURI('file://'.$raw_uri); } $uri = new PhutilURI($raw_uri); if ($uri->getProtocol()) { if ($this->isSSHProtocol($uri->getProtocol())) { if ($this->getSSHLogin()) { $uri->setUser($this->getSSHLogin()); } } return $uri; } $uri = new PhutilGitURI($raw_uri); if ($uri->getDomain()) { if ($this->getSSHLogin()) { $uri->setUser($this->getSSHLogin()); } return $uri; } throw new Exception("Remote URI '{$raw_uri}' could not be parsed!"); } /** * Determine if we should connect to the remote using SSH flags and * credentials. * * @return bool True to use the SSH protocol. * @task uri */ private function shouldUseSSH() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); if ($this->isSSHProtocol($protocol)) { return (bool)$this->getSSHKeyfile(); } else { return false; } } /** * Determine if we should connect to the remote using HTTP flags and * credentials. * * @return bool True to use the HTTP protocol. * @task uri */ private function shouldUseHTTP() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); if ($protocol == 'http' || $protocol == 'https') { return (bool)$this->getDetail('http-login'); } else { return false; } } /** * Determine if we should connect to the remote using SVN flags and * credentials. * * @return bool True to use the SVN protocol. * @task uri */ private function shouldUseSVNProtocol() { if ($this->isHosted()) { return false; } $protocol = $this->getRemoteProtocol(); if ($protocol == 'svn') { return (bool)$this->getDetail('http-login'); } else { return false; } } /** * Determine if a protocol is SSH or SSH-like. * * @param string A protocol string, like "http" or "ssh". * @return bool True if the protocol is SSH-like. * @task uri */ private function isSSHProtocol($protocol) { return ($protocol == 'ssh' || $protocol == 'svn+ssh'); } public function delete() { $this->openTransaction(); $paths = id(new PhabricatorOwnersPath()) ->loadAllWhere('repositoryPHID = %s', $this->getPHID()); foreach ($paths as $path) { $path->delete(); } $projects = id(new PhabricatorRepositoryArcanistProject()) ->loadAllWhere('repositoryID = %d', $this->getID()); foreach ($projects as $project) { // note each project deletes its PhabricatorRepositorySymbols $project->delete(); } $commits = id(new PhabricatorRepositoryCommit()) ->loadAllWhere('repositoryID = %d', $this->getID()); foreach ($commits as $commit) { // note PhabricatorRepositoryAuditRequests and // PhabricatorRepositoryCommitData are deleted here too. $commit->delete(); } $conn_w = $this->establishConnection('w'); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_FILESYSTEM, $this->getID()); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_PATHCHANGE, $this->getID()); queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d', self::TABLE_SUMMARY, $this->getID()); $result = parent::delete(); $this->saveTransaction(); return $result; } public function isGit() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_GIT); } public function isSVN() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_SVN); } public function isHg() { $vcs = $this->getVersionControlSystem(); return ($vcs == PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL); } public function isHosted() { return (bool)$this->getDetail('hosting-enabled', false); } public function setHosted($enabled) { return $this->setDetail('hosting-enabled', $enabled); } public function getServeOverHTTP() { if ($this->isSVN()) { return self::SERVE_OFF; } $serve = $this->getDetail('serve-over-http', self::SERVE_OFF); return $this->normalizeServeConfigSetting($serve); } public function setServeOverHTTP($mode) { return $this->setDetail('serve-over-http', $mode); } public function getServeOverSSH() { $serve = $this->getDetail('serve-over-ssh', self::SERVE_OFF); return $this->normalizeServeConfigSetting($serve); } public function setServeOverSSH($mode) { return $this->setDetail('serve-over-ssh', $mode); } public static function getProtocolAvailabilityName($constant) { switch ($constant) { case self::SERVE_OFF: return pht('Off'); case self::SERVE_READONLY: return pht('Read Only'); case self::SERVE_READWRITE: return pht('Read/Write'); default: return pht('Unknown'); } } private function normalizeServeConfigSetting($value) { switch ($value) { case self::SERVE_OFF: case self::SERVE_READONLY: return $value; case self::SERVE_READWRITE: if ($this->isHosted()) { return self::SERVE_READWRITE; } else { return self::SERVE_READONLY; } default: return self::SERVE_OFF; } } /** * Raise more useful errors when there are basic filesystem problems. */ private function assertLocalExists() { if (!$this->usesLocalWorkingCopy()) { return; } $local = $this->getLocalPath(); Filesystem::assertExists($local); Filesystem::assertIsDirectory($local); Filesystem::assertReadable($local); } /** * Determine if the working copy is bare or not. In Git, this corresponds * to `--bare`. In Mercurial, `--noupdate`. */ public function isWorkingCopyBare() { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return false; case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: $local = $this->getLocalPath(); if (Filesystem::pathExists($local.'/.git')) { return false; } else { return true; } } } public function usesLocalWorkingCopy() { switch ($this->getVersionControlSystem()) { case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: return $this->isHosted(); case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: return true; } } public function writeStatusMessage( $status_type, $status_code, array $parameters = array()) { $table = new PhabricatorRepositoryStatusMessage(); $conn_w = $table->establishConnection('w'); $table_name = $table->getTableName(); if ($status_code === null) { queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d AND statusType = %s', $table_name, $this->getID(), $status_type); } else { queryfx( $conn_w, 'INSERT INTO %T (repositoryID, statusType, statusCode, parameters, epoch) VALUES (%d, %s, %s, %s, %d) ON DUPLICATE KEY UPDATE statusCode = VALUES(statusCode), parameters = VALUES(parameters), epoch = VALUES(epoch)', $table_name, $this->getID(), $status_type, $status_code, json_encode($parameters), time()); } return $this; } /* -( PhabricatorPolicyInterface )----------------------------------------- */ public function getCapabilities() { return array( PhabricatorPolicyCapability::CAN_VIEW, PhabricatorPolicyCapability::CAN_EDIT, DiffusionCapabilityPush::CAPABILITY, ); } public function getPolicy($capability) { switch ($capability) { case PhabricatorPolicyCapability::CAN_VIEW: return $this->getViewPolicy(); case PhabricatorPolicyCapability::CAN_EDIT: return $this->getEditPolicy(); case DiffusionCapabilityPush::CAPABILITY: return $this->getPushPolicy(); } } public function hasAutomaticCapability($capability, PhabricatorUser $user) { return false; } public function describeAutomaticCapability($capability) { return null; } /* -( PhabricatorMarkupInterface )----------------------------------------- */ public function getMarkupFieldKey($field) { $hash = PhabricatorHash::digestForIndex($this->getMarkupText($field)); return "repo:{$hash}"; } public function newMarkupEngine($field) { return PhabricatorMarkupEngine::newMarkupEngine(array()); } public function getMarkupText($field) { return $this->getDetail('description'); } public function didMarkupText( $field, $output, PhutilMarkupEngine $engine) { require_celerity_resource('phabricator-remarkup-css'); return phutil_tag( 'div', array( 'class' => 'phabricator-remarkup', ), $output); } public function shouldUseMarkupCache($field) { return true; } } diff --git a/src/applications/repository/storage/__tests__/PhabricatorRepositoryTestCase.php b/src/applications/repository/storage/__tests__/PhabricatorRepositoryTestCase.php index 23feced972..c58e7c2dfe 100644 --- a/src/applications/repository/storage/__tests__/PhabricatorRepositoryTestCase.php +++ b/src/applications/repository/storage/__tests__/PhabricatorRepositoryTestCase.php @@ -1,58 +1,100 @@ 'file', 'file:///path/to/repo' => 'file', 'ssh://user@domain.com/path' => 'ssh', 'git@example.com:path' => 'ssh', 'git://git@example.com/path' => 'git', 'svn+ssh://example.com/path' => 'svn+ssh', 'https://example.com/repo/' => 'https', 'http://example.com/' => 'http', 'https://user@example.com/' => 'https', ); foreach ($tests as $uri => $expect) { $repository = new PhabricatorRepository(); $repository->setDetail('remote-uri', $uri); $this->assertEqual( $expect, $repository->getRemoteProtocol(), "Protocol for '{$uri}'."); } } public function testBranchFilter() { $git = PhabricatorRepositoryType::REPOSITORY_TYPE_GIT; $repo = new PhabricatorRepository(); $repo->setVersionControlSystem($git); $this->assertEqual( true, $repo->shouldTrackBranch('imaginary'), 'Track all branches by default.'); $repo->setDetail( 'branch-filter', array( 'master' => true, )); $this->assertEqual( true, $repo->shouldTrackBranch('master'), 'Track listed branches.'); $this->assertEqual( false, $repo->shouldTrackBranch('imaginary'), 'Do not track unlisted branches.'); } + public function testSubversionPathInfo() { + $svn = PhabricatorRepositoryType::REPOSITORY_TYPE_SVN; + + $repo = new PhabricatorRepository(); + $repo->setVersionControlSystem($svn); + + $repo->setDetail('remote-uri', 'http://svn.example.com/repo'); + $this->assertEqual( + 'http://svn.example.com/repo', + $repo->getSubversionPathURI()); + + $repo->setDetail('remote-uri', 'http://svn.example.com/repo/'); + $this->assertEqual( + 'http://svn.example.com/repo', + $repo->getSubversionPathURI()); + + $repo->setDetail('hosting-enabled', true); + + $repo->setDetail('local-path', '/var/repo/SVN'); + $this->assertEqual( + 'file:///var/repo/SVN', + $repo->getSubversionPathURI()); + + $repo->setDetail('local-path', '/var/repo/SVN/'); + $this->assertEqual( + 'file:///var/repo/SVN', + $repo->getSubversionPathURI()); + + $this->assertEqual( + 'file:///var/repo/SVN/a@', + $repo->getSubversionPathURI('a')); + + $this->assertEqual( + 'file:///var/repo/SVN/a@1', + $repo->getSubversionPathURI('a', 1)); + + $this->assertEqual( + 'file:///var/repo/SVN/%3F@22', + $repo->getSubversionPathURI('?', 22)); + } + + } diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php index 386458c7b0..d802a900a2 100644 --- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php +++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php @@ -1,779 +1,772 @@ getDetail('remote-uri'); + $uri = $repository->getSubversionPathURI(); $svn_commit = $commit->getCommitIdentifier(); // Pull the top-level path changes out of "svn log". This is pretty // straightforward; just parse the XML log. $log = $this->getSVNLogXMLObject($uri, $svn_commit, $verbose = true); $entry = $log->logentry[0]; if (!$entry->paths) { // TODO: Explicitly mark this commit as broken elsewhere? This isn't // supposed to happen but we have some cases like rE27 and rG935 in the // Facebook repositories where things got all clowned up. return; } $raw_paths = array(); foreach ($entry->paths->path as $path) { $name = trim((string)$path); $raw_paths[$name] = array( 'rawPath' => $name, 'rawTargetPath' => (string)$path['copyfrom-path'], 'rawChangeType' => (string)$path['action'], 'rawTargetCommit' => (string)$path['copyfrom-rev'], ); } $copied_or_moved_map = array(); $deleted_paths = array(); $add_paths = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawTargetPath']) { $copied_or_moved_map[$raw_info['rawTargetPath']][] = $raw_info; } switch ($raw_info['rawChangeType']) { case 'D': $deleted_paths[$path] = $raw_info; break; case 'A': case 'R': $add_paths[$path] = $raw_info; break; } } // If a path was deleted, we need to look in the repository history to // figure out where the former valid location for it is so we can figure out // if it was a directory or not, among other things. $lookup_here = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] != 'D') { continue; } // If a change copies a directory and then deletes something from it, // we need to look at the old location for information about the path, not // the new location. This workflow is pretty ridiculous -- so much so that // Trac gets it wrong. See Facebook rO6 for an example, if you happen to // work at Facebook. $parents = $this->expandAllParentPaths($path, $include_self = true); foreach ($parents as $parent) { if (isset($add_paths[$parent])) { $relative_path = substr($path, strlen($parent)); $lookup_here[$path] = array( 'rawPath' => $add_paths[$parent]['rawTargetPath'].$relative_path, 'rawCommit' => $add_paths[$parent]['rawTargetCommit'], ); continue 2; } } // Otherwise we can just look at the previous revision. $lookup_here[$path] = array( 'rawPath' => $path, 'rawCommit' => $svn_commit - 1, ); } $lookup = array(); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] == 'D') { $lookup[$path] = $lookup_here[$path]; } else { // For everything that wasn't deleted, we can just look it up directly. $lookup[$path] = array( 'rawPath' => $path, 'rawCommit' => $svn_commit, ); } } $effects = array(); $path_file_types = $this->lookupPathFileTypes($repository, $lookup); foreach ($raw_paths as $path => $raw_info) { if ($raw_info['rawChangeType'] == 'D' && $path_file_types[$path] == DifferentialChangeType::FILE_DIRECTORY) { // Bad. Child paths aren't enumerated in "svn log" so we need // to go fishing. $list = $this->lookupRecursiveFileList( $repository, $lookup[$path]); foreach ($list as $deleted_path => $path_file_type) { $deleted_path = rtrim($path.'/'.$deleted_path, '/'); if (!empty($raw_paths[$deleted_path])) { // We somehow learned about this deletion explicitly? // TODO: Unclear how this is possible. continue; } $effect_type = DifferentialChangeType::TYPE_DELETE; $effect_target_path = null; if (isset($copied_or_moved_map[$deleted_path])) { $effect_target_path = $path; if (count($copied_or_moved_map[$deleted_path]) > 1) { $effect_type = DifferentialChangeType::TYPE_MULTICOPY; } else { $effect_type = DifferentialChangeType::TYPE_MOVE_AWAY; } } $effects[$deleted_path] = array( 'rawPath' => $deleted_path, 'rawTargetPath' => $effect_target_path, 'rawTargetCommit' => null, 'rawDirect' => true, 'changeType' => $effect_type, 'fileType' => $path_file_type, ); $deleted_paths[$deleted_path] = $effects[$deleted_path]; } } } $resolved_types = array(); $supplemental = array(); foreach ($raw_paths as $path => $raw_info) { if (isset($resolved_types[$path])) { $type = $resolved_types[$path]; } else { switch ($raw_info['rawChangeType']) { case 'D': if (isset($copied_or_moved_map[$path])) { if (count($copied_or_moved_map[$path]) > 1) { $type = DifferentialChangeType::TYPE_MULTICOPY; } else { $type = DifferentialChangeType::TYPE_MOVE_AWAY; } } else { $type = DifferentialChangeType::TYPE_DELETE; } break; case 'A': $copy_from = $raw_info['rawTargetPath']; $copy_rev = $raw_info['rawTargetCommit']; if (!strlen($copy_from)) { $type = DifferentialChangeType::TYPE_ADD; } else { if (isset($deleted_paths[$copy_from])) { $type = DifferentialChangeType::TYPE_MOVE_HERE; $other_type = DifferentialChangeType::TYPE_MOVE_AWAY; } else { $type = DifferentialChangeType::TYPE_COPY_HERE; $other_type = DifferentialChangeType::TYPE_COPY_AWAY; } $source_file_type = $this->lookupPathFileType( $repository, $copy_from, array( 'rawPath' => $copy_from, 'rawCommit' => $copy_rev, )); if ($source_file_type == DifferentialChangeType::FILE_DELETED) { throw new Exception( "Something is wrong; source of a copy must exist."); } if ($source_file_type != DifferentialChangeType::FILE_DIRECTORY) { if (isset($raw_paths[$copy_from]) || isset($effects[$copy_from])) { break; } $effects[$copy_from] = array( 'rawPath' => $copy_from, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => $other_type, 'fileType' => $source_file_type, ); } else { // ULTRADISASTER. We've added a directory which was copied // or moved from somewhere else. This is the most complex and // ridiculous case. $list = $this->lookupRecursiveFileList( $repository, array( 'rawPath' => $copy_from, 'rawCommit' => $copy_rev, )); foreach ($list as $from_path => $from_file_type) { $full_from = rtrim($copy_from.'/'.$from_path, '/'); $full_to = rtrim($path.'/'.$from_path, '/'); if (empty($raw_paths[$full_to])) { $effects[$full_to] = array( 'rawPath' => $full_to, 'rawTargetPath' => $full_from, 'rawTargetCommit' => $copy_rev, 'rawDirect' => true, 'changeType' => $type, 'fileType' => $from_file_type, ); } else { // This means we picked the file up explicitly elsewhere. // If the file as modified, SVN will drop the copy // information. We need to restore it. $supplemental[$full_to]['rawTargetPath'] = $full_from; $supplemental[$full_to]['rawTargetCommit'] = $copy_rev; if ($raw_paths[$full_to]['rawChangeType'] == 'M') { $resolved_types[$full_to] = $type; } } if (empty($raw_paths[$full_from]) && empty($effects[$full_from])) { if ($other_type == DifferentialChangeType::TYPE_COPY_AWAY) { // Add an indirect effect for the copied file, if we // don't already have an entry for it (e.g., a separate // change). $effects[$full_from] = array( 'rawPath' => $full_from, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => $other_type, 'fileType' => $from_file_type, ); } } } } } break; // This is "replaced", caused by "svn rm"-ing a file, putting another // in its place, and then "svn add"-ing it. We do not distinguish // between this and "M". case 'R': case 'M': if (isset($copied_or_moved_map[$path])) { $type = DifferentialChangeType::TYPE_COPY_AWAY; } else { $type = DifferentialChangeType::TYPE_CHANGE; } break; } } $resolved_types[$path] = $type; } foreach ($raw_paths as $path => $raw_info) { $raw_paths[$path]['changeType'] = $resolved_types[$path]; if (isset($supplemental[$path])) { foreach ($supplemental[$path] as $key => $value) { $raw_paths[$path][$key] = $value; } } } foreach ($raw_paths as $path => $raw_info) { $effects[$path] = array( 'rawPath' => $path, 'rawTargetPath' => $raw_info['rawTargetPath'], 'rawTargetCommit' => $raw_info['rawTargetCommit'], 'rawDirect' => true, 'changeType' => $raw_info['changeType'], 'fileType' => $path_file_types[$path], ); } $parents = array(); foreach ($effects as $path => $effect) { foreach ($this->expandAllParentPaths($path) as $parent_path) { $parents[$parent_path] = true; } } $parents = array_keys($parents); foreach ($parents as $parent) { if (isset($effects[$parent])) { continue; } $effects[$parent] = array( 'rawPath' => $parent, 'rawTargetPath' => null, 'rawTargetCommit' => null, 'rawDirect' => false, 'changeType' => DifferentialChangeType::TYPE_CHILD, 'fileType' => DifferentialChangeType::FILE_DIRECTORY, ); } $lookup_paths = array(); foreach ($effects as $effect) { $lookup_paths[$effect['rawPath']] = true; if ($effect['rawTargetPath']) { $lookup_paths[$effect['rawTargetPath']] = true; } } $lookup_paths = array_keys($lookup_paths); $lookup_commits = array(); foreach ($effects as $effect) { if ($effect['rawTargetCommit']) { $lookup_commits[$effect['rawTargetCommit']] = true; } } $lookup_commits = array_keys($lookup_commits); $path_map = $this->lookupOrCreatePaths($lookup_paths); $commit_map = $this->lookupSvnCommits($repository, $lookup_commits); $this->writeChanges($repository, $commit, $effects, $path_map, $commit_map); $this->writeBrowse($repository, $commit, $effects, $path_map); } private function writeChanges( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, array $effects, array $path_map, array $commit_map) { $conn_w = $repository->establishConnection('w'); $sql = array(); foreach ($effects as $effect) { $sql[] = qsprintf( $conn_w, '(%d, %d, %d, %nd, %nd, %d, %d, %d, %d)', $repository->getID(), $path_map[$effect['rawPath']], $commit->getID(), $effect['rawTargetPath'] ? $path_map[$effect['rawTargetPath']] : null, $effect['rawTargetCommit'] ? $commit_map[$effect['rawTargetCommit']] : null, $effect['changeType'], $effect['fileType'], $effect['rawDirect'] ? 1 : 0, $commit->getCommitIdentifier()); } queryfx( $conn_w, 'DELETE FROM %T WHERE commitID = %d', PhabricatorRepository::TABLE_PATHCHANGE, $commit->getID()); foreach (array_chunk($sql, 512) as $sql_chunk) { queryfx( $conn_w, 'INSERT INTO %T (repositoryID, pathID, commitID, targetPathID, targetCommitID, changeType, fileType, isDirect, commitSequence) VALUES %Q', PhabricatorRepository::TABLE_PATHCHANGE, implode(', ', $sql_chunk)); } } private function writeBrowse( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, array $effects, array $path_map) { $conn_w = $repository->establishConnection('w'); $sql = array(); foreach ($effects as $effect) { $type = $effect['changeType']; if (!$effect['rawDirect']) { if ($type == DifferentialChangeType::TYPE_COPY_AWAY) { // Don't write COPY_AWAY to the filesystem table if it isn't a direct // event. continue; } if ($type == DifferentialChangeType::TYPE_CHILD) { // Don't write CHILD to the filesystem table. Although doing these // writes has the nice property of letting you see when a directory's // contents were last changed, it explodes the table tremendously // and makes Diffusion far slower. continue; } } if ($effect['rawPath'] == '/') { // Don't write any events on '/' to the filesystem table; in // particular, it doesn't have a meaningful parentID. continue; } $existed = !DifferentialChangeType::isDeleteChangeType($type); $sql[] = qsprintf( $conn_w, '(%d, %d, %d, %d, %d, %d)', $repository->getID(), $path_map[$this->getParentPath($effect['rawPath'])], $commit->getCommitIdentifier(), $path_map[$effect['rawPath']], $existed ? 1 : 0, $effect['fileType']); } queryfx( $conn_w, 'DELETE FROM %T WHERE repositoryID = %d AND svnCommit = %d', PhabricatorRepository::TABLE_FILESYSTEM, $repository->getID(), $commit->getCommitIdentifier()); foreach (array_chunk($sql, 512) as $sql_chunk) { queryfx( $conn_w, 'INSERT INTO %T (repositoryID, parentID, svnCommit, pathID, existed, fileType) VALUES %Q', PhabricatorRepository::TABLE_FILESYSTEM, implode(', ', $sql_chunk)); } } private function lookupSvnCommits( PhabricatorRepository $repository, array $commits) { if (!$commits) { return array(); } $commit_table = new PhabricatorRepositoryCommit(); $commit_data = queryfx_all( $commit_table->establishConnection('w'), 'SELECT id, commitIdentifier FROM %T WHERE repositoryID = %d AND commitIdentifier in (%Ls)', $commit_table->getTableName(), $repository->getID(), $commits); $commit_map = ipull($commit_data, 'id', 'commitIdentifier'); $need = array(); foreach ($commits as $commit) { if (empty($commit_map[$commit])) { $need[] = $commit; } } // If we are parsing a Subversion repository and have been configured to // import only some subdirectory of it, we may find commits which reference // other foreign commits outside of the directory (for instance, because of // a move or copy). Rather than trying to execute full parses on them, just // create stub commits and identify the stubs as foreign commits. if ($need) { $subpath = $repository->getDetail('svn-subpath'); if (!$subpath) { $commits = implode(', ', $need); throw new Exception( "Missing commits ({$need}) in a SVN repository which is not ". "configured for subdirectory-only parsing!"); } foreach ($need as $foreign_commit) { $commit = new PhabricatorRepositoryCommit(); $commit->setRepositoryID($repository->getID()); $commit->setCommitIdentifier($foreign_commit); $commit->setEpoch(0); $commit->save(); $data = new PhabricatorRepositoryCommitData(); $data->setCommitID($commit->getID()); $data->setAuthorName(''); $data->setCommitMessage(''); $data->setCommitDetails( array( 'foreign-svn-stub' => true, // Denormalize this to make it easier to debug cases where someone // did half a parse and then changed the subdirectory or something // like that. 'svn-subpath' => $subpath, )); $data->save(); $commit_map[$foreign_commit] = $commit->getID(); } } return $commit_map; } private function lookupPathFileType( PhabricatorRepository $repository, $path, array $path_info) { $result = $this->lookupPathFileTypes( $repository, array( $path => $path_info, )); return $result[$path]; } private function lookupPathFileTypes( PhabricatorRepository $repository, array $paths) { $result_map = array(); - $repository_uri = $repository->getDetail('remote-uri'); + $repository_uri = $repository->getSubversionPathURI(); if (isset($paths['/'])) { $result_map['/'] = DifferentialChangeType::FILE_DIRECTORY; unset($paths['/']); } $parents = array(); $path_mapping = array(); foreach ($paths as $path => $lookup) { $parent = dirname($lookup['rawPath']); - $parent = ltrim($parent, '/'); - $parent = $this->encodeSVNPath($parent); - $parent = $repository_uri.$parent.'@'.$lookup['rawCommit']; + $parent = $repository->getSubversionPathURI( + $parent, + $lookup['rawCommit']); + $parent = escapeshellarg($parent); $parents[$parent] = true; $path_mapping[$parent][] = dirname($path); } // Reverse this list so we can pop $path_mapping, as that's more efficient // than shifting it. We need to associate these maps positionally because // a change can copy the same source path from multiple revisions via // "svn cp path@1 a; svn cp path@2 b;" and the XML output gives us no way // to distinguish which revision we're looking at except based on its // position in the document. $all_paths = array_reverse(array_keys($parents)); foreach (array_chunk($all_paths, 64) as $path_chunk) { list($raw_xml) = $repository->execxRemoteCommand( '--xml ls %C', implode(' ', $path_chunk)); $xml = new SimpleXMLElement($raw_xml); foreach ($xml->list as $list) { $list_path = (string)$list['path']; // SVN is a big mess. See Facebook rG8 (a revision which adds files // with spaces in their names) for an example. $list_path = rawurldecode($list_path); if ($list_path == $repository_uri) { $base = '/'; } else { $base = substr($list_path, strlen($repository_uri)); } $mapping = array_pop($path_mapping); foreach ($list->entry as $entry) { $val = $this->getFileTypeFromSVNKind($entry['kind']); foreach ($mapping as $base_path) { // rtrim() causes us to handle top-level directories correctly. $key = rtrim($base_path, '/').'/'.$entry->name; $result_map[$key] = $val; } } } } foreach ($paths as $path => $lookup) { if (empty($result_map[$path])) { $result_map[$path] = DifferentialChangeType::FILE_DELETED; } } return $result_map; } - private function encodeSVNPath($path) { - $path = rawurlencode($path); - $path = str_replace('%2F', '/', $path); - return $path; - } - private function getFileTypeFromSVNKind($kind) { $kind = (string)$kind; switch ($kind) { case 'dir': return DifferentialChangeType::FILE_DIRECTORY; case 'file': return DifferentialChangeType::FILE_NORMAL; default: throw new Exception("Unknown SVN file kind '{$kind}'."); } } private function lookupRecursiveFileList( PhabricatorRepository $repository, array $info) { $path = $info['rawPath']; $rev = $info['rawCommit']; - $path = $this->encodeSVNPath($path); - $hashkey = md5($repository->getDetail('remote-uri').$path.'@'.$rev); + $path_uri = $repository->getSubversionPathURI($path, $rev); + $hashkey = md5($path_uri); // This method is quite horrible. The underlying challenge is that some // commits in the Facebook repository are enormous, taking multiple hours // to 'ls -R' out of the repository and producing XML files >1GB in size. // If we try to SimpleXML them, the object exhausts available memory on a // 64G machine. Instead, cache the XML output and then parse it line by line // to limit space requirements. $cache_loc = sys_get_temp_dir().'/diffusion.'.$hashkey.'.svnls'; if (!Filesystem::pathExists($cache_loc)) { $tmp = new TempFile(); $repository->execxRemoteCommand( - '--xml ls -R %s%s@%d > %s', - $repository->getDetail('remote-uri'), - $path, - $rev, + '--xml ls -R %s > %s', + $path_uri, $tmp); execx( 'mv %s %s', $tmp, $cache_loc); } $map = $this->parseRecursiveListFileData($cache_loc); Filesystem::remove($cache_loc); return $map; } private function parseRecursiveListFileData($file_path) { $map = array(); $mode = 'xml'; $done = false; $entry = null; foreach (new LinesOfALargeFile($file_path) as $lno => $line) { switch ($mode) { case 'entry': if ($line == '') { $entry = implode('', $entry); $pattern = '@^\s+kind="(file|dir)">'. '(.*?)'. '((.*?))?@'; $matches = null; if (!preg_match($pattern, $entry, $matches)) { throw new Exception("Unable to parse entry!"); } $map[html_entity_decode($matches[2])] = $this->getFileTypeFromSVNKind($matches[1]); $mode = 'entry-or-end'; } else { $entry[] = $line; } break; case 'entry-or-end': if ($line == '') { $done = true; break 2; } else if ($line == ' or = 1) { array_pop($parts); $parents[] = '/'.implode('/', $parts); } return $parents; } }