llvm/utils/Reviewing/find_interesting_reviews.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import print_function
   4
   5 import argparse
   6 import email.mime.multipart
   7 import email.mime.text
   8 import logging
   9 import os.path
  10 import pickle
  11 import re
  12 import smtplib
  13 import subprocess
  14 import sys
  15 from datetime import datetime, timedelta
  16 from phabricator import Phabricator
  17
  18 # Setting up a virtualenv to run this script can be done by running the
  19 # following commands:
  20 # $ virtualenv venv
  21 # $ . ./venv/bin/activate
  22 # $ pip install Phabricator
  23
  24 GIT_REPO_METADATA = (("llvm-monorepo", "https://github.com/llvm/llvm-project"),)
  25
  26 # The below PhabXXX classes represent objects as modelled by Phabricator.
  27 # The classes can be serialized to disk, to try and make sure that we don't
  28 # needlessly have to re-fetch lots of data from Phabricator, as that would
  29 # make this script unusably slow.
  30
  31
  32 class PhabObject:
  33     OBJECT_KIND = None
  34
  35     def __init__(self, id):
  36         self.id = id
  37
  38
  39 class PhabObjectCache:
  40     def __init__(self, PhabObjectClass):
  41         self.PhabObjectClass = PhabObjectClass
  42         self.most_recent_info = None
  43         self.oldest_info = None
  44         self.id2PhabObjects = {}
  45
  46     def get_name(self):
  47         return self.PhabObjectClass.OBJECT_KIND + "sCache"
  48
  49     def get(self, id):
  50         if id not in self.id2PhabObjects:
  51             self.id2PhabObjects[id] = self.PhabObjectClass(id)
  52         return self.id2PhabObjects[id]
  53
  54     def get_ids_in_cache(self):
  55         return list(self.id2PhabObjects.keys())
  56
  57     def get_objects(self):
  58         return list(self.id2PhabObjects.values())
  59
  60     DEFAULT_DIRECTORY = "PhabObjectCache"
  61
  62     def _get_pickle_name(self, directory):
  63         file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle"
  64         return os.path.join(directory, file_name)
  65
  66     def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY):
  67         """
  68         FIXME: consider if serializing to JSON would bring interoperability
  69         advantages over serializing to pickle.
  70         """
  71         try:
  72             f = open(self._get_pickle_name(directory), "rb")
  73         except IOError as err:
  74             print("Could not find cache. Error message: {0}. Continuing...".format(err))
  75         else:
  76             with f:
  77                 try:
  78                     d = pickle.load(f)
  79                     self.__dict__.update(d)
  80                 except EOFError as err:
  81                     print(
  82                         "Cache seems to be corrupt. "
  83                         + "Not using cache. Error message: {0}".format(err)
  84                     )
  85
  86     def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY):
  87         if not os.path.exists(directory):
  88             os.makedirs(directory)
  89         with open(self._get_pickle_name(directory), "wb") as f:
  90             pickle.dump(self.__dict__, f)
  91         print(
  92             "wrote cache to disk, most_recent_info= {0}".format(
  93                 datetime.fromtimestamp(self.most_recent_info)
  94                 if self.most_recent_info is not None
  95                 else None
  96             )
  97         )
  98
  99
 100 class PhabReview(PhabObject):
 101     OBJECT_KIND = "Review"
 102
 103     def __init__(self, id):
 104         PhabObject.__init__(self, id)
 105
 106     def update(self, title, dateCreated, dateModified, author):
 107         self.title = title
 108         self.dateCreated = dateCreated
 109         self.dateModified = dateModified
 110         self.author = author
 111
 112     def setPhabDiffs(self, phabDiffs):
 113         self.phabDiffs = phabDiffs
 114
 115
 116 class PhabUser(PhabObject):
 117     OBJECT_KIND = "User"
 118
 119     def __init__(self, id):
 120         PhabObject.__init__(self, id)
 121
 122     def update(self, phid, realName):
 123         self.phid = phid
 124         self.realName = realName
 125
 126
 127 class PhabHunk:
 128     def __init__(self, rest_api_hunk):
 129         self.oldOffset = int(rest_api_hunk["oldOffset"])
 130         self.oldLength = int(rest_api_hunk["oldLength"])
 131         # self.actual_lines_changed_offset will contain the offsets of the
 132         # lines that were changed in this hunk.
 133         self.actual_lines_changed_offset = []
 134         offset = self.oldOffset
 135         inHunk = False
 136         hunkStart = -1
 137         contextLines = 3
 138         for line in rest_api_hunk["corpus"].split("\n"):
 139             if line.startswith("+"):
 140                 # line is a new line that got introduced in this patch.
 141                 # Do not record it as a changed line.
 142                 if inHunk is False:
 143                     inHunk = True
 144                     hunkStart = max(self.oldOffset, offset - contextLines)
 145                 continue
 146             if line.startswith("-"):
 147                 # line was changed or removed from the older version of the
 148                 # code. Record it as a changed line.
 149                 if inHunk is False:
 150                     inHunk = True
 151                     hunkStart = max(self.oldOffset, offset - contextLines)
 152                 offset += 1
 153                 continue
 154             # line is a context line.
 155             if inHunk is True:
 156                 inHunk = False
 157                 hunkEnd = offset + contextLines
 158                 self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
 159             offset += 1
 160         if inHunk is True:
 161             hunkEnd = offset + contextLines
 162             self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
 163
 164         # The above algorithm could result in adjacent or overlapping ranges
 165         # being recorded into self.actual_lines_changed_offset.
 166         # Merge the adjacent and overlapping ranges in there:
 167         t = []
 168         lastRange = None
 169         for start, end in self.actual_lines_changed_offset + [
 170             (sys.maxsize, sys.maxsize)
 171         ]:
 172             if lastRange is None:
 173                 lastRange = (start, end)
 174             else:
 175                 if lastRange[1] >= start:
 176                     lastRange = (lastRange[0], end)
 177                 else:
 178                     t.append(lastRange)
 179                     lastRange = (start, end)
 180         self.actual_lines_changed_offset = t
 181
 182
 183 class PhabChange:
 184     def __init__(self, rest_api_change):
 185         self.oldPath = rest_api_change["oldPath"]
 186         self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]]
 187
 188
 189 class PhabDiff(PhabObject):
 190     OBJECT_KIND = "Diff"
 191
 192     def __init__(self, id):
 193         PhabObject.__init__(self, id)
 194
 195     def update(self, rest_api_results):
 196         self.revisionID = rest_api_results["revisionID"]
 197         self.dateModified = int(rest_api_results["dateModified"])
 198         self.dateCreated = int(rest_api_results["dateCreated"])
 199         self.changes = [PhabChange(c) for c in rest_api_results["changes"]]
 200
 201
 202 class ReviewsCache(PhabObjectCache):
 203     def __init__(self):
 204         PhabObjectCache.__init__(self, PhabReview)
 205
 206
 207 class UsersCache(PhabObjectCache):
 208     def __init__(self):
 209         PhabObjectCache.__init__(self, PhabUser)
 210
 211
 212 reviews_cache = ReviewsCache()
 213 users_cache = UsersCache()
 214
 215
 216 def init_phab_connection():
 217     phab = Phabricator()
 218     phab.update_interfaces()
 219     return phab
 220
 221
 222 def update_cached_info(
 223     phab,
 224     cache,
 225     phab_query,
 226     order,
 227     record_results,
 228     max_nr_entries_per_fetch,
 229     max_nr_days_to_cache,
 230 ):
 231     q = phab
 232     LIMIT = max_nr_entries_per_fetch
 233     for query_step in phab_query:
 234         q = getattr(q, query_step)
 235     results = q(order=order, limit=LIMIT)
 236     most_recent_info, oldest_info = record_results(cache, results, phab)
 237     oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - timedelta(
 238         days=max_nr_days_to_cache
 239     )
 240     most_recent_info_overall = most_recent_info
 241     cache.write_cache_to_disk()
 242     after = results["cursor"]["after"]
 243     print("after: {0!r}".format(after))
 244     print("most_recent_info: {0}".format(datetime.fromtimestamp(most_recent_info)))
 245     while (
 246         after is not None and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch
 247     ):
 248         need_more_older_data = (
 249             cache.oldest_info is None
 250             or datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch
 251         )
 252         print(
 253             (
 254                 "need_more_older_data={0} cache.oldest_info={1} "
 255                 + "oldest_info_to_fetch={2}"
 256             ).format(
 257                 need_more_older_data,
 258                 datetime.fromtimestamp(cache.oldest_info)
 259                 if cache.oldest_info is not None
 260                 else None,
 261                 oldest_info_to_fetch,
 262             )
 263         )
 264         need_more_newer_data = (
 265             cache.most_recent_info is None or cache.most_recent_info < most_recent_info
 266         )
 267         print(
 268             (
 269                 "need_more_newer_data={0} cache.most_recent_info={1} "
 270                 + "most_recent_info={2}"
 271             ).format(need_more_newer_data, cache.most_recent_info, most_recent_info)
 272         )
 273         if not need_more_older_data and not need_more_newer_data:
 274             break
 275         results = q(order=order, after=after, limit=LIMIT)
 276         most_recent_info, oldest_info = record_results(cache, results, phab)
 277         after = results["cursor"]["after"]
 278         print("after: {0!r}".format(after))
 279         print("most_recent_info: {0}".format(datetime.fromtimestamp(most_recent_info)))
 280         cache.write_cache_to_disk()
 281     cache.most_recent_info = most_recent_info_overall
 282     if after is None:
 283         # We did fetch all records. Mark the cache to contain all info since
 284         # the start of time.
 285         oldest_info = 0
 286     cache.oldest_info = oldest_info
 287     cache.write_cache_to_disk()
 288
 289
 290 def record_reviews(cache, reviews, phab):
 291     most_recent_info = None
 292     oldest_info = None
 293     for reviewInfo in reviews["data"]:
 294         if reviewInfo["type"] != "DREV":
 295             continue
 296         id = reviewInfo["id"]
 297         # phid = reviewInfo["phid"]
 298         dateModified = int(reviewInfo["fields"]["dateModified"])
 299         dateCreated = int(reviewInfo["fields"]["dateCreated"])
 300         title = reviewInfo["fields"]["title"]
 301         author = reviewInfo["fields"]["authorPHID"]
 302         phabReview = cache.get(id)
 303         if (
 304             "dateModified" not in phabReview.__dict__
 305             or dateModified > phabReview.dateModified
 306         ):
 307             diff_results = phab.differential.querydiffs(revisionIDs=[id])
 308             diff_ids = sorted(diff_results.keys())
 309             phabDiffs = []
 310             for diff_id in diff_ids:
 311                 diffInfo = diff_results[diff_id]
 312                 d = PhabDiff(diff_id)
 313                 d.update(diffInfo)
 314                 phabDiffs.append(d)
 315             phabReview.update(title, dateCreated, dateModified, author)
 316             phabReview.setPhabDiffs(phabDiffs)
 317             print(
 318                 "Updated D{0} modified on {1} ({2} diffs)".format(
 319                     id, datetime.fromtimestamp(dateModified), len(phabDiffs)
 320                 )
 321             )
 322
 323         if most_recent_info is None:
 324             most_recent_info = dateModified
 325         elif most_recent_info < dateModified:
 326             most_recent_info = dateModified
 327
 328         if oldest_info is None:
 329             oldest_info = dateModified
 330         elif oldest_info > dateModified:
 331             oldest_info = dateModified
 332     return most_recent_info, oldest_info
 333
 334
 335 def record_users(cache, users, phab):
 336     most_recent_info = None
 337     oldest_info = None
 338     for info in users["data"]:
 339         if info["type"] != "USER":
 340             continue
 341         id = info["id"]
 342         phid = info["phid"]
 343         dateModified = int(info["fields"]["dateModified"])
 344         # dateCreated = int(info["fields"]["dateCreated"])
 345         realName = info["fields"]["realName"]
 346         phabUser = cache.get(id)
 347         phabUser.update(phid, realName)
 348         if most_recent_info is None:
 349             most_recent_info = dateModified
 350         elif most_recent_info < dateModified:
 351             most_recent_info = dateModified
 352         if oldest_info is None:
 353             oldest_info = dateModified
 354         elif oldest_info > dateModified:
 355             oldest_info = dateModified
 356     return most_recent_info, oldest_info
 357
 358
 359 PHABCACHESINFO = (
 360     (
 361         reviews_cache,
 362         ("differential", "revision", "search"),
 363         "updated",
 364         record_reviews,
 365         5,
 366         7,
 367     ),
 368     (users_cache, ("user", "search"), "newest", record_users, 100, 1000),
 369 )
 370
 371
 372 def load_cache():
 373     for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO:
 374         cache.populate_cache_from_disk()
 375         print(
 376             "Loaded {0} nr entries: {1}".format(
 377                 cache.get_name(), len(cache.get_ids_in_cache())
 378             )
 379         )
 380         print(
 381             "Loaded {0} has most recent info: {1}".format(
 382                 cache.get_name(),
 383                 datetime.fromtimestamp(cache.most_recent_info)
 384                 if cache.most_recent_info is not None
 385                 else None,
 386             )
 387         )
 388
 389
 390 def update_cache(phab):
 391     load_cache()
 392     for (
 393         cache,
 394         phab_query,
 395         order,
 396         record_results,
 397         max_nr_entries_per_fetch,
 398         max_nr_days_to_cache,
 399     ) in PHABCACHESINFO:
 400         update_cached_info(
 401             phab,
 402             cache,
 403             phab_query,
 404             order,
 405             record_results,
 406             max_nr_entries_per_fetch,
 407             max_nr_days_to_cache,
 408         )
 409         ids_in_cache = cache.get_ids_in_cache()
 410         print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name()))
 411         cache.write_cache_to_disk()
 412
 413
 414 def get_most_recent_reviews(days):
 415     newest_reviews = sorted(reviews_cache.get_objects(), key=lambda r: -r.dateModified)
 416     if len(newest_reviews) == 0:
 417         return newest_reviews
 418     most_recent_review_time = datetime.fromtimestamp(newest_reviews[0].dateModified)
 419     cut_off_date = most_recent_review_time - timedelta(days=days)
 420     result = []
 421     for review in newest_reviews:
 422         if datetime.fromtimestamp(review.dateModified) < cut_off_date:
 423             return result
 424         result.append(review)
 425     return result
 426
 427
 428 # All of the above code is about fetching data from Phabricator and caching it
 429 # on local disk. The below code contains the actual "business logic" for this
 430 # script.
 431
 432 _userphid2realname = None
 433
 434
 435 def get_real_name_from_author(user_phid):
 436     global _userphid2realname
 437     if _userphid2realname is None:
 438         _userphid2realname = {}
 439         for user in users_cache.get_objects():
 440             _userphid2realname[user.phid] = user.realName
 441     return _userphid2realname.get(user_phid, "unknown")
 442
 443
 444 def print_most_recent_reviews(phab, days, filter_reviewers):
 445     msgs = []
 446
 447     def add_msg(msg):
 448         msgs.append(msg)
 449         print(msg.encode("utf-8"))
 450
 451     newest_reviews = get_most_recent_reviews(days)
 452     add_msg(
 453         "These are the reviews that look interesting to be reviewed. "
 454         + "The report below has 2 sections. The first "
 455         + "section is organized per review; the second section is organized "
 456         + "per potential reviewer.\n"
 457     )
 458     oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None
 459     oldest_datetime = (
 460         datetime.fromtimestamp(oldest_review.dateModified) if oldest_review else None
 461     )
 462     add_msg(
 463         (
 464             "The report below is based on analyzing the reviews that got "
 465             + "touched in the past {0} days (since {1}). "
 466             + "The script found {2} such reviews.\n"
 467         ).format(days, oldest_datetime, len(newest_reviews))
 468     )
 469     reviewer2reviews_and_scores = {}
 470     for i, review in enumerate(newest_reviews):
 471         matched_reviewers = find_reviewers_for_review(review)
 472         matched_reviewers = filter_reviewers(matched_reviewers)
 473         if len(matched_reviewers) == 0:
 474             continue
 475         add_msg(
 476             (
 477                 "{0:>3}. https://reviews.llvm.org/D{1} by {2}\n     {3}\n"
 478                 + "     Last updated on {4}"
 479             ).format(
 480                 i,
 481                 review.id,
 482                 get_real_name_from_author(review.author),
 483                 review.title,
 484                 datetime.fromtimestamp(review.dateModified),
 485             )
 486         )
 487         for reviewer, scores in matched_reviewers:
 488             add_msg(
 489                 "    potential reviewer {0}, score {1}".format(
 490                     reviewer,
 491                     "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")",
 492                 )
 493             )
 494             if reviewer not in reviewer2reviews_and_scores:
 495                 reviewer2reviews_and_scores[reviewer] = []
 496             reviewer2reviews_and_scores[reviewer].append((review, scores))
 497
 498     # Print out a summary per reviewer.
 499     for reviewer in sorted(reviewer2reviews_and_scores.keys()):
 500         reviews_and_scores = reviewer2reviews_and_scores[reviewer]
 501         reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True)
 502         add_msg(
 503             "\n\nSUMMARY FOR {0} (found {1} reviews):".format(
 504                 reviewer, len(reviews_and_scores)
 505             )
 506         )
 507         for review, scores in reviews_and_scores:
 508             add_msg(
 509                 "[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format(
 510                     "/".join(["{0:.1f}%".format(s) for s in scores]),
 511                     review.id,
 512                     review.title,
 513                     get_real_name_from_author(review.author),
 514                 )
 515             )
 516     return "\n".join(msgs)
 517
 518
 519 def get_git_cmd_output(cmd):
 520     output = None
 521     try:
 522         logging.debug(cmd)
 523         output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
 524     except subprocess.CalledProcessError as e:
 525         logging.debug(str(e))
 526     if output is None:
 527         return None
 528     return output.decode("utf-8", errors="ignore")
 529
 530
 531 reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
 532
 533
 534 def parse_blame_output_line_porcelain(blame_output_lines):
 535     email2nr_occurences = {}
 536     if blame_output_lines is None:
 537         return email2nr_occurences
 538     for line in blame_output_lines:
 539         m = reAuthorMail.match(line)
 540         if m:
 541             author_email_address = m.group(1)
 542             if author_email_address not in email2nr_occurences:
 543                 email2nr_occurences[author_email_address] = 1
 544             else:
 545                 email2nr_occurences[author_email_address] += 1
 546     return email2nr_occurences
 547
 548
 549 class BlameOutputCache:
 550     def __init__(self):
 551         self.cache = {}
 552
 553     def _populate_cache_for(self, cache_key):
 554         assert cache_key not in self.cache
 555         git_repo, base_revision, path = cache_key
 556         cmd = (
 557             "git -C {0} blame --encoding=utf-8 --date iso -f -e -w "
 558             + "--line-porcelain {1} -- {2}"
 559         ).format(git_repo, base_revision, path)
 560         blame_output = get_git_cmd_output(cmd)
 561         self.cache[cache_key] = (
 562             blame_output.split("\n") if blame_output is not None else None
 563         )
 564         # FIXME: the blame cache could probably be made more effective still if
 565         # instead of storing the requested base_revision in the cache, the last
 566         # revision before the base revision this file/path got changed in gets
 567         # stored. That way multiple project revisions for which this specific
 568         # file/patch hasn't changed would get cache hits (instead of misses in
 569         # the current implementation).
 570
 571     def get_blame_output_for(
 572         self, git_repo, base_revision, path, start_line=-1, end_line=-1
 573     ):
 574         cache_key = (git_repo, base_revision, path)
 575         if cache_key not in self.cache:
 576             self._populate_cache_for(cache_key)
 577         assert cache_key in self.cache
 578         all_blame_lines = self.cache[cache_key]
 579         if all_blame_lines is None:
 580             return None
 581         if start_line == -1 and end_line == -1:
 582             return all_blame_lines
 583         assert start_line >= 0
 584         assert end_line >= 0
 585         assert end_line <= len(all_blame_lines)
 586         assert start_line <= len(all_blame_lines)
 587         assert start_line <= end_line
 588         return all_blame_lines[start_line:end_line]
 589
 590     def get_parsed_git_blame_for(
 591         self, git_repo, base_revision, path, start_line=-1, end_line=-1
 592     ):
 593         return parse_blame_output_line_porcelain(
 594             self.get_blame_output_for(
 595                 git_repo, base_revision, path, start_line, end_line
 596             )
 597         )
 598
 599
 600 blameOutputCache = BlameOutputCache()
 601
 602
 603 def find_reviewers_for_diff_heuristic(diff):
 604     # Heuristic 1: assume good reviewers are the ones that touched the same
 605     # lines before as this patch is touching.
 606     # Heuristic 2: assume good reviewers are the ones that touched the same
 607     # files before as this patch is touching.
 608     reviewers2nr_lines_touched = {}
 609     reviewers2nr_files_touched = {}
 610     # Assume last revision before diff was modified is the revision the diff
 611     # applies to.
 612     assert len(GIT_REPO_METADATA) == 1
 613     git_repo = os.path.join("git_repos", GIT_REPO_METADATA[0][0])
 614     cmd = 'git -C {0} rev-list -n 1 --before="{1}" main'.format(
 615         git_repo,
 616         datetime.fromtimestamp(diff.dateModified).strftime("%Y-%m-%d %H:%M:%s"),
 617     )
 618     base_revision = get_git_cmd_output(cmd).strip()
 619     logging.debug("Base revision={0}".format(base_revision))
 620     for change in diff.changes:
 621         path = change.oldPath
 622         # Compute heuristic 1: look at context of patch lines.
 623         for hunk in change.hunks:
 624             for start_line, end_line in hunk.actual_lines_changed_offset:
 625                 # Collect git blame results for authors in those ranges.
 626                 for (
 627                     reviewer,
 628                     nr_occurences,
 629                 ) in blameOutputCache.get_parsed_git_blame_for(
 630                     git_repo, base_revision, path, start_line, end_line
 631                 ).items():
 632                     if reviewer not in reviewers2nr_lines_touched:
 633                         reviewers2nr_lines_touched[reviewer] = 0
 634                     reviewers2nr_lines_touched[reviewer] += nr_occurences
 635         # Compute heuristic 2: don't look at context, just at files touched.
 636         # Collect git blame results for authors in those ranges.
 637         for reviewer, nr_occurences in blameOutputCache.get_parsed_git_blame_for(
 638             git_repo, base_revision, path
 639         ).items():
 640             if reviewer not in reviewers2nr_files_touched:
 641                 reviewers2nr_files_touched[reviewer] = 0
 642             reviewers2nr_files_touched[reviewer] += 1
 643
 644     # Compute "match scores"
 645     total_nr_lines = sum(reviewers2nr_lines_touched.values())
 646     total_nr_files = len(diff.changes)
 647     reviewers_matchscores = [
 648         (
 649             reviewer,
 650             (
 651                 reviewers2nr_lines_touched.get(reviewer, 0) * 100.0 / total_nr_lines
 652                 if total_nr_lines != 0
 653                 else 0,
 654                 reviewers2nr_files_touched[reviewer] * 100.0 / total_nr_files
 655                 if total_nr_files != 0
 656                 else 0,
 657             ),
 658         )
 659         for reviewer, nr_lines in reviewers2nr_files_touched.items()
 660     ]
 661     reviewers_matchscores.sort(key=lambda i: i[1], reverse=True)
 662     return reviewers_matchscores
 663
 664
 665 def find_reviewers_for_review(review):
 666     # Process the newest diff first.
 667     diffs = sorted(review.phabDiffs, key=lambda d: d.dateModified, reverse=True)
 668     if len(diffs) == 0:
 669         return
 670     diff = diffs[0]
 671     matched_reviewers = find_reviewers_for_diff_heuristic(diff)
 672     # Show progress, as this is a slow operation:
 673     sys.stdout.write(".")
 674     sys.stdout.flush()
 675     logging.debug("matched_reviewers: {0}".format(matched_reviewers))
 676     return matched_reviewers
 677
 678
 679 def update_git_repos():
 680     git_repos_directory = "git_repos"
 681     for name, url in GIT_REPO_METADATA:
 682         dirname = os.path.join(git_repos_directory, name)
 683         if not os.path.exists(dirname):
 684             cmd = "git clone {0} {1}".format(url, dirname)
 685             output = get_git_cmd_output(cmd)
 686         cmd = "git -C {0} pull --rebase".format(dirname)
 687         output = get_git_cmd_output(cmd)
 688
 689
 690 def send_emails(email_addresses, sender, msg):
 691     s = smtplib.SMTP()
 692     s.connect()
 693     for email_address in email_addresses:
 694         email_msg = email.mime.multipart.MIMEMultipart()
 695         email_msg["From"] = sender
 696         email_msg["To"] = email_address
 697         email_msg["Subject"] = "LLVM patches you may be able to review."
 698         email_msg.attach(email.mime.text.MIMEText(msg.encode("utf-8"), "plain"))
 699         # python 3.x: s.send_message(email_msg)
 700         s.sendmail(email_msg["From"], email_msg["To"], email_msg.as_string())
 701     s.quit()
 702
 703
 704 def filter_reviewers_to_report_for(people_to_look_for):
 705     # The below is just an example filter, to only report potential reviews
 706     # to do for the people that will receive the report email.
 707     return lambda potential_reviewers: [
 708         r for r in potential_reviewers if r[0] in people_to_look_for
 709     ]
 710
 711
 712 def main():
 713     parser = argparse.ArgumentParser(
 714         description="Match open reviews to potential reviewers."
 715     )
 716     parser.add_argument(
 717         "--no-update-cache",
 718         dest="update_cache",
 719         action="store_false",
 720         default=True,
 721         help="Do not update cached Phabricator objects",
 722     )
 723     parser.add_argument(
 724         "--email-report",
 725         dest="email_report",
 726         nargs="*",
 727         default="",
 728         help="A email addresses to send the report to.",
 729     )
 730     parser.add_argument(
 731         "--sender",
 732         dest="sender",
 733         default="",
 734         help="The email address to use in 'From' on messages emailed out.",
 735     )
 736     parser.add_argument(
 737         "--email-addresses",
 738         dest="email_addresses",
 739         nargs="*",
 740         help="The email addresses (as known by LLVM git) of "
 741         + "the people to look for reviews for.",
 742     )
 743     parser.add_argument("--verbose", "-v", action="count")
 744
 745     args = parser.parse_args()
 746
 747     if args.verbose >= 1:
 748         logging.basicConfig(level=logging.DEBUG)
 749
 750     people_to_look_for = [e.decode("utf-8") for e in args.email_addresses]
 751     logging.debug(
 752         "Will look for reviews that following contributors could "
 753         + "review: {}".format(people_to_look_for)
 754     )
 755     logging.debug("Will email a report to: {}".format(args.email_report))
 756
 757     phab = init_phab_connection()
 758
 759     if args.update_cache:
 760         update_cache(phab)
 761
 762     load_cache()
 763     update_git_repos()
 764     msg = print_most_recent_reviews(
 765         phab,
 766         days=1,
 767         filter_reviewers=filter_reviewers_to_report_for(people_to_look_for),
 768     )
 769
 770     if args.email_report != []:
 771         send_emails(args.email_report, args.sender, msg)
 772
 773
 774 if __name__ == "__main__":
 775     main()