Correct Aphlict websocket URI construction after PHP8 compatibility changes
[phabricator.git] / src / applications / repository / engine / PhabricatorRepositoryPullEngine.php
blob90033e492893646d65806532f289cf358cadc269
1 <?php
3 /**
4 * Manages execution of `git pull` and `hg pull` commands for
5 * @{class:PhabricatorRepository} objects. Used by
6 * @{class:PhabricatorRepositoryPullLocalDaemon}.
8 * This class also covers initial working copy setup through `git clone`,
9 * `git init`, `hg clone`, `hg init`, or `svnadmin create`.
11 * @task pull Pulling Working Copies
12 * @task git Pulling Git Working Copies
13 * @task hg Pulling Mercurial Working Copies
14 * @task svn Pulling Subversion Working Copies
15 * @task internal Internals
17 final class PhabricatorRepositoryPullEngine
18 extends PhabricatorRepositoryEngine {
21 /* -( Pulling Working Copies )--------------------------------------------- */
24 public function pullRepository() {
25 $repository = $this->getRepository();
27 $lock = $this->newRepositoryLock($repository, 'repo.pull', true);
29 try {
30 $lock->lock();
31 } catch (PhutilLockException $ex) {
32 throw new DiffusionDaemonLockException(
33 pht(
34 'Another process is currently updating repository "%s", '.
35 'skipping pull.',
36 $repository->getDisplayName()));
39 try {
40 $result = $this->pullRepositoryWithLock();
41 } catch (Exception $ex) {
42 $lock->unlock();
43 throw $ex;
46 $lock->unlock();
48 return $result;
51 private function pullRepositoryWithLock() {
52 $repository = $this->getRepository();
53 $viewer = PhabricatorUser::getOmnipotentUser();
55 if ($repository->isReadOnly()) {
56 $this->skipPull(
57 pht(
58 "Skipping pull on read-only repository.\n\n%s",
59 $repository->getReadOnlyMessageForDisplay()));
62 $is_hg = false;
63 $is_git = false;
64 $is_svn = false;
66 $vcs = $repository->getVersionControlSystem();
68 switch ($vcs) {
69 case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
70 // We never pull a local copy of non-hosted Subversion repositories.
71 if (!$repository->isHosted()) {
72 $this->skipPull(
73 pht(
74 'Repository "%s" is a non-hosted Subversion repository, which '.
75 'does not require a local working copy to be pulled.',
76 $repository->getDisplayName()));
77 return;
79 $is_svn = true;
80 break;
81 case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
82 $is_git = true;
83 break;
84 case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
85 $is_hg = true;
86 break;
87 default:
88 $this->abortPull(pht('Unknown VCS "%s"!', $vcs));
89 break;
92 $local_path = $repository->getLocalPath();
93 if ($local_path === null) {
94 $this->abortPull(
95 pht(
96 'No local path is configured for repository "%s".',
97 $repository->getDisplayName()));
100 try {
101 $dirname = dirname($local_path);
102 if (!Filesystem::pathExists($dirname)) {
103 Filesystem::createDirectory($dirname, 0755, $recursive = true);
106 if (!Filesystem::pathExists($local_path)) {
107 $this->logPull(
108 pht(
109 'Creating a new working copy for repository "%s".',
110 $repository->getDisplayName()));
111 if ($is_git) {
112 $this->executeGitCreate();
113 } else if ($is_hg) {
114 $this->executeMercurialCreate();
115 } else {
116 $this->executeSubversionCreate();
120 id(new DiffusionRepositoryClusterEngine())
121 ->setViewer($viewer)
122 ->setRepository($repository)
123 ->synchronizeWorkingCopyBeforeRead();
125 if (!$repository->isHosted()) {
126 $this->logPull(
127 pht(
128 'Updating the working copy for repository "%s".',
129 $repository->getDisplayName()));
131 if ($is_git) {
132 $this->executeGitUpdate();
133 } else if ($is_hg) {
134 $this->executeMercurialUpdate();
138 if ($repository->isHosted()) {
139 if ($is_git) {
140 $this->installGitHook();
141 } else if ($is_svn) {
142 $this->installSubversionHook();
143 } else if ($is_hg) {
144 $this->installMercurialHook();
147 foreach ($repository->getHookDirectories() as $directory) {
148 $this->installHookDirectory($directory);
152 if ($is_git) {
153 $this->updateGitWorkingCopyConfiguration();
156 } catch (Exception $ex) {
157 $this->abortPull(
158 pht(
159 "Pull of '%s' failed: %s",
160 $repository->getDisplayName(),
161 $ex->getMessage()),
162 $ex);
165 $this->donePull();
167 return $this;
170 private function skipPull($message) {
171 $this->log($message);
172 $this->donePull();
175 private function abortPull($message, Exception $ex = null) {
176 $code_error = PhabricatorRepositoryStatusMessage::CODE_ERROR;
177 $this->updateRepositoryInitStatus($code_error, $message);
178 if ($ex) {
179 throw $ex;
180 } else {
181 throw new Exception($message);
185 private function logPull($message) {
186 $this->log($message);
189 private function donePull() {
190 $code_okay = PhabricatorRepositoryStatusMessage::CODE_OKAY;
191 $this->updateRepositoryInitStatus($code_okay);
194 private function updateRepositoryInitStatus($code, $message = null) {
195 $this->getRepository()->writeStatusMessage(
196 PhabricatorRepositoryStatusMessage::TYPE_INIT,
197 $code,
198 array(
199 'message' => $message,
203 private function installHook($path, array $hook_argv = array()) {
204 $this->log(pht('Installing commit hook to "%s"...', $path));
206 $repository = $this->getRepository();
207 $identifier = $this->getHookContextIdentifier($repository);
209 $root = dirname(phutil_get_library_root('phabricator'));
210 $bin = $root.'/bin/commit-hook';
212 $full_php_path = Filesystem::resolveBinary('php');
213 $cmd = csprintf(
214 'exec %s -f %s -- %s %Ls "$@"',
215 $full_php_path,
216 $bin,
217 $identifier,
218 $hook_argv);
220 $hook = "#!/bin/sh\nexport TERM=dumb\n{$cmd}\n";
222 Filesystem::writeFile($path, $hook);
223 Filesystem::changePermissions($path, 0755);
226 private function installHookDirectory($path) {
227 $readme = pht(
228 "To add custom hook scripts to this repository, add them to this ".
229 "directory.\n\nPhabricator will run any executables in this directory ".
230 "after running its own checks, as though they were normal hook ".
231 "scripts.");
233 Filesystem::createDirectory($path, 0755);
234 Filesystem::writeFile($path.'/README', $readme);
237 private function getHookContextIdentifier(PhabricatorRepository $repository) {
238 $identifier = $repository->getPHID();
240 $instance = PhabricatorEnv::getEnvConfig('cluster.instance');
241 if ($instance !== null && strlen($instance)) {
242 $identifier = "{$identifier}:{$instance}";
245 return $identifier;
249 /* -( Pulling Git Working Copies )----------------------------------------- */
253 * @task git
255 private function executeGitCreate() {
256 $repository = $this->getRepository();
258 $path = rtrim($repository->getLocalPath(), '/');
260 // See T13448. In all cases, we create repositories by using "git init"
261 // to build a bare, empty working copy. If we try to use "git clone"
262 // instead, we'll pull in too many refs if "Fetch Refs" is also
263 // configured. There's no apparent way to make "git clone" behave narrowly
264 // and no apparent reason to bother.
266 $repository->execxRemoteCommand(
267 'init --bare -- %s',
268 $path);
273 * @task git
275 private function executeGitUpdate() {
276 $repository = $this->getRepository();
278 // See T13479. We previously used "--show-toplevel", but this stopped
279 // working in Git 2.25.0 when run in a bare repository.
281 // NOTE: As of Git 2.21.1, "git rev-parse" can not parse "--" in its
282 // argument list, so we can not specify arguments unambiguously. Any
283 // version of Git which does not recognize the "--git-dir" flag will
284 // treat this as a request to parse the literal refname "--git-dir".
286 list($err, $stdout) = $repository->execLocalCommand(
287 'rev-parse --git-dir');
289 $repository_root = null;
290 $path = $repository->getLocalPath();
292 if (!$err) {
293 $repository_root = Filesystem::resolvePath(
294 rtrim($stdout, "\n"),
295 $path);
297 // If we're in a bare Git repository, the "--git-dir" will be the
298 // root directory. If we're in a working copy, the "--git-dir" will
299 // be the ".git/" directory.
301 // Test if the result is the root directory. If it is, we're in good
302 // shape and appear to be inside a bare repository. If not, take the
303 // parent directory to get out of the ".git/" folder.
305 if (!Filesystem::pathsAreEquivalent($repository_root, $path)) {
306 $repository_root = dirname($repository_root);
310 $message = null;
311 if ($err) {
312 // Try to raise a more tailored error message in the more common case
313 // of the user creating an empty directory. (We could try to remove it,
314 // but might not be able to, and it's much simpler to raise a good
315 // message than try to navigate those waters.)
316 if (is_dir($path)) {
317 $files = Filesystem::listDirectory($path, $include_hidden = true);
318 if (!$files) {
319 $message = pht(
320 'Expected to find a Git repository at "%s", but there is an '.
321 'empty directory there. Remove the directory. A daemon will '.
322 'construct the working copy for you.',
323 $path);
324 } else {
325 $message = pht(
326 'Expected to find a Git repository at "%s", but there is '.
327 'a non-repository directory (with other stuff in it) there. '.
328 'Move or remove this directory. A daemon will construct '.
329 'the working copy for you.',
330 $path);
332 } else if (is_file($path)) {
333 $message = pht(
334 'Expected to find a Git repository at "%s", but there is a '.
335 'file there instead. Move or remove this file. A daemon will '.
336 'construct the working copy for you.',
337 $path);
338 } else {
339 $message = pht(
340 'Expected to find a git repository at "%s", but did not.',
341 $path);
343 } else {
345 // Prior to Git 2.25.0, we used "--show-toplevel", which had a weird
346 // case here when the working copy was inside another working copy.
347 // The switch to "--git-dir" seems to have resolved this; we now seem
348 // to find the nearest git directory and thus the correct repository
349 // root.
351 if (!Filesystem::pathsAreEquivalent($repository_root, $path)) {
352 $err = true;
353 $message = pht(
354 'Expected to find a Git repository at "%s", but the actual Git '.
355 'repository root for this directory is "%s". Something is '.
356 'misconfigured. This directory should be writable by the daemons '.
357 'and not inside another Git repository.',
358 $path,
359 $repository_root);
363 if ($err && $repository->canDestroyWorkingCopy()) {
364 phlog(
365 pht(
366 "Repository working copy at '%s' failed sanity check; ".
367 "destroying and re-cloning. %s",
368 $path,
369 $message));
370 Filesystem::remove($path);
371 $this->executeGitCreate();
372 } else if ($err) {
373 throw new Exception($message);
376 // Load the refs we're planning to fetch from the remote repository.
377 $remote_refs = $this->loadGitRemoteRefs(
378 $repository,
379 $repository->getRemoteURIEnvelope(),
380 $is_local = false);
382 // Load the refs we're planning to fetch from the local repository, by
383 // using the local working copy path as the "remote" repository URI.
384 $local_refs = $this->loadGitRemoteRefs(
385 $repository,
386 new PhutilOpaqueEnvelope($path),
387 $is_local = true);
389 // See T13448. The "git fetch --prune ..." flag only prunes local refs
390 // matching the refspecs we pass it. If "Fetch Refs" is configured, we'll
391 // pass it a very narrow list of refspecs, and it won't prune older refs
392 // that aren't currently subject to fetching.
394 // Since we want to prune everything that isn't (a) on the fetch list and
395 // (b) in the remote, handle pruning of any surplus leftover refs ourselves
396 // before we fetch anything.
398 // (We don't have to do this if "Fetch Refs" isn't set up, since "--prune"
399 // will work in that case, but it's a little simpler to always go down the
400 // same code path.)
402 $surplus_refs = array();
403 foreach ($local_refs as $local_ref => $local_hash) {
404 $remote_hash = idx($remote_refs, $local_ref);
405 if ($remote_hash === null) {
406 $surplus_refs[] = $local_ref;
410 if ($surplus_refs) {
411 $this->log(
412 pht(
413 'Found %s surplus local ref(s) to delete.',
414 phutil_count($surplus_refs)));
415 foreach ($surplus_refs as $surplus_ref) {
416 $this->log(
417 pht(
418 'Deleting surplus local ref "%s" ("%s").',
419 $surplus_ref,
420 $local_refs[$surplus_ref]));
422 $repository->execLocalCommand(
423 'update-ref -d %R --',
424 $surplus_ref);
426 unset($local_refs[$surplus_ref]);
430 if ($remote_refs === $local_refs) {
431 $this->log(
432 pht(
433 'Skipping fetch because local and remote refs are already '.
434 'identical.'));
435 return false;
438 $this->logRefDifferences($remote_refs, $local_refs);
440 $fetch_rules = $this->getGitFetchRules($repository);
442 // For very old non-bare working copies, we need to use "--update-head-ok"
443 // to tell Git that it is allowed to overwrite whatever is currently
444 // checked out. See T13280.
446 $future = $repository->getRemoteCommandFuture(
447 'fetch --no-tags --update-head-ok -- %P %Ls',
448 $repository->getRemoteURIEnvelope(),
449 $fetch_rules);
451 $future
452 ->setCWD($path)
453 ->resolvex();
456 private function getGitRefRules(PhabricatorRepository $repository) {
457 $ref_rules = $repository->getFetchRules($repository);
459 if (!$ref_rules) {
460 $ref_rules = array(
461 'refs/*',
465 return $ref_rules;
468 private function getGitFetchRules(PhabricatorRepository $repository) {
469 $ref_rules = $this->getGitRefRules($repository);
471 // Rewrite each ref rule "X" into "+X:X".
473 // The "X" means "fetch ref X".
474 // The "...:X" means "...and copy it into local ref X".
475 // The "+..." means "...and overwrite the local ref if it already exists".
477 $fetch_rules = array();
478 foreach ($ref_rules as $key => $ref_rule) {
479 $fetch_rules[] = sprintf(
480 '+%s:%s',
481 $ref_rule,
482 $ref_rule);
485 return $fetch_rules;
489 * @task git
491 private function installGitHook() {
492 $repository = $this->getRepository();
493 $root = $repository->getLocalPath();
495 if ($repository->isWorkingCopyBare()) {
496 $path = '/hooks/pre-receive';
497 } else {
498 $path = '/.git/hooks/pre-receive';
501 $this->installHook($root.$path);
504 private function updateGitWorkingCopyConfiguration() {
505 $repository = $this->getRepository();
507 // See T5963. When you "git clone" from a remote with no "master", the
508 // client warns you that it isn't sure what it should check out as an
509 // initial state:
511 // warning: remote HEAD refers to nonexistent ref, unable to checkout
513 // We can tell the client what it should check out by making "HEAD"
514 // point somewhere. However:
516 // (1) If we don't set "receive.denyDeleteCurrent" to "ignore" and a user
517 // tries to delete the default branch, Git raises an error and refuses.
518 // We want to allow this; we already have sufficient protections around
519 // dangerous changes and do not need to special case the default branch.
521 // (2) A repository may have a nonexistent default branch configured.
522 // For now, we just respect configuration. This will raise a warning when
523 // users clone the repository.
525 // In any case, these changes are both advisory, so ignore any errors we
526 // may encounter.
528 // We do this for both hosted and observed repositories. Although it is
529 // not terribly common to clone from Phabricator's copy of an observed
530 // repository, it works fine and makes sense occasionally.
532 if ($repository->isWorkingCopyBare()) {
533 $repository->execLocalCommand(
534 'config -- receive.denyDeleteCurrent ignore');
535 $repository->execLocalCommand(
536 'symbolic-ref HEAD %s',
537 'refs/heads/'.$repository->getDefaultBranch());
541 private function loadGitRemoteRefs(
542 PhabricatorRepository $repository,
543 PhutilOpaqueEnvelope $remote_envelope,
544 $is_local) {
546 // See T13448. When listing local remotes, we want to list everything,
547 // not just refs we expect to fetch. This allows us to detect that we have
548 // undesirable refs (which have been deleted in the remote, but are still
549 // present locally) so we can update our state to reflect the correct
550 // remote state.
552 if ($is_local) {
553 $ref_rules = array();
554 } else {
555 $ref_rules = $this->getGitRefRules($repository);
557 // NOTE: "git ls-remote" does not support "--" until circa January 2016.
558 // See T12416. None of the flags to "ls-remote" appear dangerous, but
559 // refuse to list any refs beginning with "-" just in case.
561 foreach ($ref_rules as $ref_rule) {
562 if (preg_match('/^-/', $ref_rule)) {
563 throw new Exception(
564 pht(
565 'Refusing to list potentially dangerous ref ("%s") beginning '.
566 'with "-".',
567 $ref_rule));
572 list($stdout) = $repository->execxRemoteCommand(
573 'ls-remote %P %Ls',
574 $remote_envelope,
575 $ref_rules);
577 // Empty repositories don't have any refs.
578 if ($stdout === null || !strlen(rtrim($stdout))) {
579 return array();
582 $map = array();
583 $lines = phutil_split_lines($stdout, false);
584 foreach ($lines as $line) {
585 list($hash, $name) = preg_split('/\s+/', $line, 2);
587 // If the remote has a HEAD, just ignore it.
588 if ($name == 'HEAD') {
589 continue;
592 // If the remote ref is itself a remote ref, ignore it.
593 if (preg_match('(^refs/remotes/)', $name)) {
594 continue;
597 $map[$name] = $hash;
600 ksort($map);
602 return $map;
605 private function loadGitLocalRefs(PhabricatorRepository $repository) {
606 $refs = id(new DiffusionLowLevelGitRefQuery())
607 ->setRepository($repository)
608 ->execute();
610 $map = array();
611 foreach ($refs as $ref) {
612 $fields = $ref->getRawFields();
613 $map[idx($fields, 'refname')] = $ref->getCommitIdentifier();
616 ksort($map);
618 return $map;
621 private function logRefDifferences(array $remote, array $local) {
622 $all = $local + $remote;
624 $differences = array();
625 foreach ($all as $key => $ignored) {
626 $remote_ref = idx($remote, $key, pht('<null>'));
627 $local_ref = idx($local, $key, pht('<null>'));
628 if ($remote_ref !== $local_ref) {
629 $differences[] = pht(
630 '%s (remote: "%s", local: "%s")',
631 $key,
632 $remote_ref,
633 $local_ref);
637 $this->log(
638 pht(
639 "Updating repository after detecting ref differences:\n%s",
640 implode("\n", $differences)));
645 /* -( Pulling Mercurial Working Copies )----------------------------------- */
649 * @task hg
651 private function executeMercurialCreate() {
652 $repository = $this->getRepository();
654 $path = rtrim($repository->getLocalPath(), '/');
656 if ($repository->isHosted()) {
657 $repository->execxRemoteCommand(
658 'init -- %s',
659 $path);
660 } else {
661 $remote = $repository->getRemoteURIEnvelope();
663 // NOTE: Mercurial prior to 3.2.4 has an severe command injection
664 // vulnerability. See: <http://bit.ly/19B58E9>
666 // On vulnerable versions of Mercurial, we refuse to clone remotes which
667 // contain characters which may be interpreted by the shell.
668 $hg_binary = PhutilBinaryAnalyzer::getForBinary('hg');
669 $is_vulnerable = $hg_binary->isMercurialVulnerableToInjection();
670 if ($is_vulnerable) {
671 $cleartext = $remote->openEnvelope();
672 // The use of "%R" here is an attempt to limit collateral damage
673 // for normal URIs because it isn't clear how long this vulnerability
674 // has been around for.
676 $escaped = csprintf('%R', $cleartext);
677 if ((string)$escaped !== (string)$cleartext) {
678 throw new Exception(
679 pht(
680 'You have an old version of Mercurial (%s) which has a severe '.
681 'command injection security vulnerability. The remote URI for '.
682 'this repository (%s) is potentially unsafe. Upgrade Mercurial '.
683 'to at least 3.2.4 to clone it.',
684 $hg_binary->getBinaryVersion(),
685 $repository->getMonogram()));
689 try {
690 $repository->execxRemoteCommand(
691 'clone --noupdate -- %P %s',
692 $remote,
693 $path);
694 } catch (Exception $ex) {
695 $message = $ex->getMessage();
696 $message = $this->censorMercurialErrorMessage($message);
697 throw new Exception($message);
704 * @task hg
706 private function executeMercurialUpdate() {
707 $repository = $this->getRepository();
708 $path = $repository->getLocalPath();
710 // This is a local command, but needs credentials.
711 $remote = $repository->getRemoteURIEnvelope();
712 $future = $repository->getRemoteCommandFuture('pull -- %P', $remote);
713 $future->setCWD($path);
715 try {
716 $future->resolvex();
717 } catch (CommandException $ex) {
718 $err = $ex->getError();
719 $stdout = $ex->getStdout();
721 // NOTE: Between versions 2.1 and 2.1.1, Mercurial changed the behavior
722 // of "hg pull" to return 1 in case of a successful pull with no changes.
723 // This behavior has been reverted, but users who updated between Feb 1,
724 // 2012 and Mar 1, 2012 will have the erroring version. Do a dumb test
725 // against stdout to check for this possibility.
727 // NOTE: Mercurial has translated versions, which translate this error
728 // string. In a translated version, the string will be something else,
729 // like "aucun changement trouve". There didn't seem to be an easy way
730 // to handle this (there are hard ways but this is not a common problem
731 // and only creates log spam, not application failures). Assume English.
733 // TODO: Remove this once we're far enough in the future that deployment
734 // of 2.1 is exceedingly rare?
735 if ($err == 1 && preg_match('/no changes found/', $stdout)) {
736 return;
737 } else {
738 $message = $ex->getMessage();
739 $message = $this->censorMercurialErrorMessage($message);
740 throw new Exception($message);
747 * Censor response bodies from Mercurial error messages.
749 * When Mercurial attempts to clone an HTTP repository but does not
750 * receive a response it expects, it emits the response body in the
751 * command output.
753 * This represents a potential SSRF issue, because an attacker with
754 * permission to create repositories can create one which points at the
755 * remote URI for some local service, then read the response from the
756 * error message. To prevent this, censor response bodies out of error
757 * messages.
759 * @param string Uncensored Mercurial command output.
760 * @return string Censored Mercurial command output.
762 private function censorMercurialErrorMessage($message) {
763 return preg_replace(
764 '/^---%<---.*/sm',
765 pht('<Response body omitted from Mercurial error message.>')."\n",
766 $message);
771 * @task hg
773 private function installMercurialHook() {
774 $repository = $this->getRepository();
775 $path = $repository->getLocalPath().'/.hg/hgrc';
777 $identifier = $this->getHookContextIdentifier($repository);
779 $root = dirname(phutil_get_library_root('phabricator'));
780 $bin = $root.'/bin/commit-hook';
782 $data = array();
783 $data[] = '[hooks]';
785 // This hook handles normal pushes.
786 $data[] = csprintf(
787 'pretxnchangegroup.phabricator = TERM=dumb %s %s %s',
788 $bin,
789 $identifier,
790 'pretxnchangegroup');
792 // This one handles creating bookmarks.
793 $data[] = csprintf(
794 'prepushkey.phabricator = TERM=dumb %s %s %s',
795 $bin,
796 $identifier,
797 'prepushkey');
799 $data[] = null;
801 $data = implode("\n", $data);
803 $this->log('%s', pht('Installing commit hook config to "%s"...', $path));
805 Filesystem::writeFile($path, $data);
809 /* -( Pulling Subversion Working Copies )---------------------------------- */
813 * @task svn
815 private function executeSubversionCreate() {
816 $repository = $this->getRepository();
818 $path = rtrim($repository->getLocalPath(), '/');
819 execx('svnadmin create -- %s', $path);
824 * @task svn
826 private function installSubversionHook() {
827 $repository = $this->getRepository();
828 $root = $repository->getLocalPath();
830 $path = '/hooks/pre-commit';
831 $this->installHook($root.$path);
833 $revprop_path = '/hooks/pre-revprop-change';
835 $revprop_argv = array(
836 '--hook-mode',
837 'svn-revprop',
840 $this->installHook($root.$revprop_path, $revprop_argv);