3 # Licensed to the Apache Software Foundation (ASF) under one
4 # or more contributor license agreements. See the NOTICE file
5 # distributed with this work for additional information
6 # regarding copyright ownership. The ASF licenses this file
7 # to you under the Apache License, Version 2.0 (the
8 # "License"); you may not use this file except in compliance
9 # with the License. You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
19 # Build a database from git commit histories. Can be used to audit git vs. jira. For usage,
21 """An application to assist Release Managers with ensuring that histories in Git and fixVersions in
22 JIRA are in agreement. See README.md for a detailed explanation.
40 """Manages an instance of Sqlite on behalf of the application.
43 db_path (str): Path to the Sqlite database file. ':memory:' for an ephemeral database.
44 **_kwargs: Convenience for CLI argument parsing. Ignored.
47 conn (:obj:`sqlite3.db2api.Connection`): The underlying connection object.
49 class Action(enum
.Enum
):
50 """Describes an action to be taken against the database."""
55 def __init__(self
, db_path
, initialize_db
, **_kwargs
):
56 self
._conn
= sqlite3
.connect(db_path
)
59 for table
in 'git_commits', 'jira_versions':
60 self
._conn
.execute("DROP TABLE IF EXISTS %s" % table
)
62 self
._conn
.execute("""
63 CREATE TABLE IF NOT EXISTS "git_commits"(
64 jira_id TEXT NOT NULL,
66 git_sha TEXT NOT NULL,
68 CONSTRAINT pk PRIMARY KEY (jira_id, branch, git_sha)
70 self
._conn
.execute("""
71 CREATE TABLE IF NOT EXISTS "jira_versions"(
72 jira_id TEXT NOT NULL,
73 fix_version TEXT NOT NULL,
74 CONSTRAINT pk PRIMARY KEY (jira_id, fix_version)
81 def __exit__(self
, exc_type
, exc_val
, exc_tb
):
86 """:obj:`sqlite3.db2api.Connection`: Underlying database handle."""
89 def apply_commit(self
, action
, jira_id
, branch
, git_sha
):
90 """Apply an edit to the commits database.
93 action (:obj:`_DB.Action`): The action to execute.
94 jira_id (str): The applicable Issue ID from JIRA.
95 branch (str): The name of the git branch from which the commit originates.
96 git_sha (str): The commit's SHA.
98 if action
== _DB
.Action
.ADD
:
100 "INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)",
101 (jira_id
, branch
, git_sha
))
102 elif action
== _DB
.Action
.REVERT
:
103 self
.conn
.execute("""
104 DELETE FROM git_commits WHERE
107 """, (jira_id
.upper(), branch
))
109 def flush_commits(self
):
110 """Commit any pending changes to the database."""
113 def apply_git_tag(self
, branch
, git_sha
, git_tag
):
114 """Annotate a commit in the commits database as being a part of the specified release.
117 branch (str): The name of the git branch from which the commit originates.
118 git_sha (str): The commit's SHA.
119 git_tag (str): The first release tag following the commit.
121 self
.conn
.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?",
122 (git_tag
, branch
, git_sha
))
124 def apply_fix_version(self
, jira_id
, fix_version
):
125 """Annotate a Jira issue in the jira database as being part of the specified release
129 jira_id (str): The applicable Issue ID from JIRA.
130 fix_version (str): The annotated `fixVersion` as seen in JIRA.
132 self
.conn
.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)",
133 (jira_id
, fix_version
))
135 def unique_jira_ids_from_git(self
):
136 """Query the commits database for the population of Jira Issue IDs."""
137 results
= self
.conn
.execute("SELECT distinct jira_id FROM git_commits").fetchall()
138 return [x
[0] for x
in results
]
140 def backup(self
, target
):
141 """Write a copy of the database to the `target` destination.
144 target (str): The backup target, a filesystem path.
146 dst
= sqlite3
.connect(target
)
148 self
._conn
.backup(dst
)
153 """This class interacts with the git repo, and encapsulates actions specific to HBase's git
157 db (:obj:`_DB`): A handle to the database manager.
158 fallback_actions_path (str): Path to the file containing sha-specific actions
160 remote_name (str): The name of the remote to query for branches and histories
162 development_branch (str): The name of the branch on which active development occurs
164 release_line_regexp (str): Filter criteria used to select "release line" branches (such
165 as "branch-1," "branch-2," &c.).
166 **_kwargs: Convenience for CLI argument parsing. Ignored.
168 _extract_release_tag_pattern
= re
.compile(r
'^rel/(\d+\.\d+\.\d+)(\^0)?$', re
.IGNORECASE
)
170 re
.compile(r
'^preparing development version.+', re
.IGNORECASE
),
171 re
.compile(r
'^preparing hbase release.+', re
.IGNORECASE
),
172 re
.compile(r
'^\s*updated? pom.xml version (for|to) .+', re
.IGNORECASE
),
173 re
.compile(r
'^\s*updated? chang', re
.IGNORECASE
),
174 re
.compile(r
'^\s*updated? (book|docs|documentation)', re
.IGNORECASE
),
175 re
.compile(r
'^\s*updating (docs|changes).+', re
.IGNORECASE
),
176 re
.compile(r
'^\s*bump (pom )?versions?', re
.IGNORECASE
),
177 re
.compile(r
'^\s*updated? (version|poms|changes).+', re
.IGNORECASE
),
179 _identify_leading_jira_id_pattern
= re
.compile(r
'^[\s\[]*(hbase-\d+)', re
.IGNORECASE
)
180 _identify_backport_jira_id_patterns
= [
181 re
.compile(r
'^backport "(.+)".*', re
.IGNORECASE
),
182 re
.compile(r
'^backport (.+)', re
.IGNORECASE
),
184 _identify_revert_jira_id_pattern
= re
.compile(r
'^revert:? "(.+)"', re
.IGNORECASE
)
185 _identify_revert_revert_jira_id_pattern
= re
.compile(
186 '^revert "revert "(.+)"\\.?"\\.?', re
.IGNORECASE
)
187 _identify_amend_jira_id_pattern
= re
.compile(r
'^amend (.+)', re
.IGNORECASE
)
189 def __init__(self
, db
, fallback_actions_path
, remote_name
, development_branch
,
190 release_line_regexp
, parse_release_tags
, **_kwargs
):
192 self
._repo
= _RepoReader
._open
_repo
()
193 self
._fallback
_actions
= _RepoReader
._load
_fallback
_actions
(fallback_actions_path
)
194 self
._remote
_name
= remote_name
195 self
._development
_branch
= development_branch
196 self
._release
_line
_regexp
= release_line_regexp
197 self
._parse
_release
_tags
= parse_release_tags
201 """:obj:`git.repo.base.Repo`: Underlying Repo handle."""
205 def remote_name(self
):
206 """str: The name of the remote used for querying branches and histories."""
207 return self
._remote
_name
210 def development_branch_ref(self
):
211 """:obj:`git.refs.reference.Reference`: The git branch where active development occurs."""
212 refs
= self
.repo
.remote(self
._remote
_name
).refs
213 return [ref
for ref
in refs
214 if ref
.name
== '%s/%s' % (self
._remote
_name
, self
._development
_branch
)][0]
217 def release_line_refs(self
):
218 """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as
219 "release lines", i.e., "branch-2"."""
220 refs
= self
.repo
.remote(self
._remote
_name
).refs
221 pattern
= re
.compile('%s/%s' % (self
._remote
_name
, self
._release
_line
_regexp
))
222 return [ref
for ref
in refs
if pattern
.match(ref
.name
)]
225 def release_branch_refs(self
):
226 """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as
227 "release branches", i.e., "branch-2.2"."""
228 refs
= self
.repo
.remote(self
._remote
_name
).refs
229 release_line_refs
= self
.release_line_refs
230 return [ref
for ref
in refs
231 if any([ref
.name
.startswith(release_line
.name
+ '.')
232 for release_line
in release_line_refs
])]
236 return git
.Repo(pathlib
.Path(__file__
).parent
.absolute(), search_parent_directories
=True)
238 def identify_least_common_commit(self
, ref_a
, ref_b
):
239 """Given a pair of references, attempt to identify the commit that they have in common,
240 i.e., the commit at which a "release branch" originates from a "release line" branch.
242 commits
= self
._repo
.merge_base(ref_a
, ref_b
, "--all")
245 raise Exception("could not identify merge base between %s, %s" % (ref_a
, ref_b
))
249 return any([p
.match(summary
) for p
in _RepoReader
._skip
_patterns
])
252 def _identify_leading_jira_id(summary
):
253 match
= _RepoReader
._identify
_leading
_jira
_id
_pattern
.match(summary
)
255 return match
.groups()[0]
259 def _identify_backport_jira_id(summary
):
260 for pattern
in _RepoReader
._identify
_backport
_jira
_id
_patterns
:
261 match
= pattern
.match(summary
)
263 return _RepoReader
._identify
_leading
_jira
_id
(match
.groups()[0])
267 def _identify_revert_jira_id(summary
):
268 match
= _RepoReader
._identify
_revert
_jira
_id
_pattern
.match(summary
)
270 return _RepoReader
._identify
_leading
_jira
_id
(match
.groups()[0])
274 def _identify_revert_revert_jira_id(summary
):
275 match
= _RepoReader
._identify
_revert
_revert
_jira
_id
_pattern
.match(summary
)
277 return _RepoReader
._identify
_leading
_jira
_id
(match
.groups()[0])
281 def _identify_amend_jira_id(summary
):
282 match
= _RepoReader
._identify
_amend
_jira
_id
_pattern
.match(summary
)
284 return _RepoReader
._identify
_leading
_jira
_id
(match
.groups()[0])
288 def _action_jira_id_for(summary
):
289 jira_id
= _RepoReader
._identify
_leading
_jira
_id
(summary
)
291 return _DB
.Action
.ADD
, jira_id
292 jira_id
= _RepoReader
._identify
_backport
_jira
_id
(summary
)
294 return _DB
.Action
.ADD
, jira_id
295 jira_id
= _RepoReader
._identify
_revert
_jira
_id
(summary
)
297 return _DB
.Action
.REVERT
, jira_id
298 jira_id
= _RepoReader
._identify
_revert
_revert
_jira
_id
(summary
)
300 return _DB
.Action
.ADD
, jira_id
301 jira_id
= _RepoReader
._identify
_amend
_jira
_id
(summary
)
303 return _DB
.Action
.ADD
, jira_id
306 def _extract_release_tag(self
, commit
):
307 """works for extracting the tag, but need a way to retro-actively tag
308 commits we've already seen."""
309 names
= self
._repo
.git
.name_rev(commit
, tags
=True, refs
='rel/*')
310 for name
in names
.split(' '):
311 match
= _RepoReader
._extract
_release
_tag
_pattern
.match(name
)
313 return match
.groups()[0]
316 def _set_release_tag(self
, branch
, tag
, shas
):
319 self
._db
.apply_git_tag(branch
, sha
, tag
)
322 self
._db
.flush_commits()
323 self
._db
.flush_commits()
325 def _resolve_ambiguity(self
, commit
):
326 if commit
.hexsha
not in self
._fallback
_actions
:
327 logging
.warning('Unable to resolve action for %s: %s', commit
.hexsha
, commit
.summary
)
328 return _DB
.Action
.SKIP
, None
329 action
, jira_id
= self
._fallback
_actions
[commit
.hexsha
]
332 return _DB
.Action
[action
], jira_id
334 def _row_generator(self
, branch
, commit
):
335 if _RepoReader
._skip
(commit
.summary
):
337 result
= _RepoReader
._action
_jira
_id
_for
(commit
.summary
)
339 result
= self
._resolve
_ambiguity
(commit
)
341 raise Exception('Cannot resolve action for %s: %s' % (commit
.hexsha
, commit
.summary
))
342 action
, jira_id
= result
343 return action
, jira_id
, branch
, commit
.hexsha
345 def populate_db_release_branch(self
, origin_commit
, release_branch
):
346 """List all commits on `release_branch` since `origin_commit`, recording them as
347 observations in the commits database.
350 origin_commit (:obj:`git.objects.commit.Commit`): The sha of the first commit to
352 release_branch (str): The name of the ref whose history is to be parsed.
355 commits
= list(self
._repo
.iter_commits(
356 "%s...%s" % (origin_commit
.hexsha
, release_branch
), reverse
=True))
357 logging
.info("%s has %d commits since its origin at %s.", release_branch
, len(commits
),
359 counter
= MANAGER
.counter(total
=len(commits
), desc
=release_branch
, unit
='commit')
360 commits_since_release
= list()
362 for commit
in counter(commits
):
363 row
= self
._row
_generator
(release_branch
, commit
)
365 self
._db
.apply_commit(*row
)
368 self
._db
.flush_commits()
369 commits_since_release
.append(commit
.hexsha
)
370 if self
._parse
_release
_tags
:
371 tag
= self
._extract
_release
_tag
(commit
)
373 self
._set
_release
_tag
(release_branch
, tag
, commits_since_release
)
374 commits_since_release
= list()
375 self
._db
.flush_commits()
378 def _load_fallback_actions(file):
380 if pathlib
.Path(file).exists():
381 with
open(file, 'r') as handle
:
382 reader
= csv
.DictReader(filter(lambda line
: line
[0] != '#', handle
))
385 result
[row
['hexsha']] = (row
['action'], row
['jira_id'])
390 """This class interacts with the Jira instance.
393 db (:obj:`_DB`): A handle to the database manager.
394 jira_url (str): URL of the Jira instance to query.
395 **_kwargs: Convenience for CLI argument parsing. Ignored.
397 def __init__(self
, db
, jira_url
, **_kwargs
):
399 self
.client
= jira
.JIRA(jira_url
)
400 self
.throttle_time_in_sec
= 1
402 def populate_db(self
):
403 """Query Jira for issue IDs found in the commits database, writing them to the jira
406 jira_ids
= self
._db
.unique_jira_ids_from_git()
407 logging
.info("retrieving %s jira_ids from the issue tracker", len(jira_ids
))
408 counter
= MANAGER
.counter(total
=len(jira_ids
), desc
='fetch from Jira', unit
='issue')
410 chunks
= [jira_ids
[i
:i
+ chunk_size
] for i
in range(0, len(jira_ids
), chunk_size
)]
414 query
= "key in (" + ",".join([("'" + jira_id
+ "'") for jira_id
in chunk
]) + ")"
415 results
= self
.client
.search_issues(jql_str
=query
, maxResults
=chunk_size
,
416 fields
='fixVersions')
417 for result
in results
:
419 fix_versions
= [version
.name
for version
in result
.fields
.fixVersions
]
420 for fix_version
in fix_versions
:
421 self
._db
.apply_fix_version(jira_id
, fix_version
)
424 self
._db
.flush_commits()
425 counter
.update(incr
=len(chunk
))
427 self
._db
.flush_commits()
429 def fetch_issues(self
, jira_ids
):
430 """Retrieve the specified jira Ids."""
432 logging
.info("retrieving %s jira_ids from the issue tracker", len(jira_ids
))
433 counter
= MANAGER
.counter(total
=len(jira_ids
), desc
='fetch from Jira', unit
='issue')
435 chunks
= [jira_ids
[i
:i
+ chunk_size
] for i
in range(0, len(jira_ids
), chunk_size
)]
438 query
= "key IN (" + ",".join([("'" + jira_id
+ "'") for jira_id
in chunk
]) + ")"\
439 + " ORDER BY issuetype ASC, priority DESC, key ASC"
440 results
= self
.client
.search_issues(
441 jql_str
=query
, maxResults
=chunk_size
,
442 fields
='summary,issuetype,priority,resolution,components')
443 for result
in results
:
445 val
['key'] = result
.key
446 val
['summary'] = result
.fields
.summary
.strip()
447 val
['priority'] = result
.fields
.priority
.name
.strip()
448 val
['issue_type'] = result
.fields
.issuetype
.name
.strip() \
449 if result
.fields
.issuetype
else None
450 val
['resolution'] = result
.fields
.resolution
.name
.strip() \
451 if result
.fields
.resolution
else None
452 val
['components'] = [x
.name
.strip() for x
in result
.fields
.components
if x
] \
453 if result
.fields
.components
else []
455 counter
.update(incr
=len(chunk
))
460 """This class builds databases from git and Jira, making it possible to audit the two for
461 discrepancies. At some point, it will provide pre-canned audit queries against those databases.
462 It is the entrypoint to this application.
465 repo_reader (:obj:`_RepoReader`): An instance of the `_RepoReader`.
466 jira_reader (:obj:`_JiraReader`): An instance of the `JiraReader`.
467 db (:obj:`_DB`): A handle to the database manager.
468 **_kwargs: Convenience for CLI argument parsing. Ignored.
470 def __init__(self
, repo_reader
, jira_reader
, db
, **_kwargs
):
471 self
._repo
_reader
= repo_reader
472 self
._jira
_reader
= jira_reader
474 self
._release
_line
_fix
_versions
= dict()
475 for k
, v
in _kwargs
.items():
476 if k
.endswith('_fix_version'):
477 release_line
= k
[:-len('_fix_version')]
478 self
._release
_line
_fix
_versions
[release_line
] = v
480 def populate_db_from_git(self
):
481 """Process the git repository, populating the commits database."""
482 for release_line
in self
._repo
_reader
.release_line_refs
:
483 branch_origin
= self
._repo
_reader
.identify_least_common_commit(
484 self
._repo
_reader
.development_branch_ref
.name
, release_line
.name
)
485 self
._repo
_reader
.populate_db_release_branch(branch_origin
, release_line
.name
)
486 for release_branch
in self
._repo
_reader
.release_branch_refs
:
487 if not release_branch
.name
.startswith(release_line
.name
):
489 self
._repo
_reader
.populate_db_release_branch(branch_origin
, release_branch
.name
)
491 def populate_db_from_jira(self
):
492 """Process the Jira issues identified by the commits database, populating the jira
494 self
._jira
_reader
.populate_db()
497 def _write_report(filename
, issues
):
498 with
open(filename
, 'w') as file:
499 fieldnames
= ['key', 'issue_type', 'priority', 'summary', 'resolution', 'components']
500 writer
= csv
.DictWriter(file, fieldnames
=fieldnames
)
503 writer
.writerow(issue
)
504 logging
.info('generated report at %s', filename
)
506 def report_new_for_release_line(self
, release_line
):
507 """Builds a report of the Jira issues that are new on the target release line, not present
508 on any of the associated release branches. (i.e., on branch-2 but not
509 branch-{2.0,2.1,...})"""
510 matches
= [x
for x
in self
._repo
_reader
.release_line_refs
511 if x
.name
== release_line
or x
.name
.endswith('/%s' % release_line
)]
512 release_line_ref
= next(iter(matches
), None)
513 if not release_line_ref
:
514 logging
.error('release line %s not found. available options are %s.',
515 release_line
, [x
.name
for x
in self
._repo
_reader
.release_line_refs
])
517 cursor
= self
._db
.conn
.execute("""
518 SELECT distinct jira_id FROM git_commits
520 EXCEPT SELECT distinct jira_id FROM git_commits
522 """, (release_line_ref
.name
, '%s.%%' % release_line_ref
.name
))
523 jira_ids
= [x
[0] for x
in cursor
.fetchall()]
524 issues
= self
._jira
_reader
.fetch_issues(jira_ids
)
525 filename
= 'new_for_%s.csv' % release_line
.replace('/', '-')
526 Auditor
._write
_report
(filename
, issues
)
529 def _str_to_bool(val
):
532 return val
.lower() in ['true', 't', 'yes', 'y']
535 def _build_first_pass_parser():
536 parser
= argparse
.ArgumentParser(add_help
=False)
537 building_group
= parser
.add_argument_group(title
='Building the audit database')
538 building_group
.add_argument(
539 '--populate-from-git',
540 help='When true, populate the audit database from the Git repository.',
541 type=Auditor
._str
_to
_bool
,
543 building_group
.add_argument(
544 '--populate-from-jira',
545 help='When true, populate the audit database from Jira.',
546 type=Auditor
._str
_to
_bool
,
548 building_group
.add_argument(
550 help='Path to the database file, or leave unspecified for a transient db.',
552 building_group
.add_argument(
554 help='When true, initialize the database tables. This is destructive to the contents'
555 + ' of an existing database.',
556 type=Auditor
._str
_to
_bool
,
558 report_group
= parser
.add_argument_group('Generating reports')
559 report_group
.add_argument(
560 '--report-new-for-release-line',
561 help=Auditor
.report_new_for_release_line
.__doc
__,
564 git_repo_group
= parser
.add_argument_group('Interactions with the Git repo')
565 git_repo_group
.add_argument(
567 help='Path to the git repo, or leave unspecified to infer from the current'
570 git_repo_group
.add_argument(
572 help='The name of the git remote to use when identifying branches.'
573 + ' Default: \'origin\'',
575 git_repo_group
.add_argument(
576 '--development-branch',
577 help='The name of the branch from which all release lines originate.'
578 + ' Default: \'master\'',
580 git_repo_group
.add_argument(
581 '--development-branch-fix-version',
582 help='The Jira fixVersion used to indicate an issue is committed to the development'
583 + ' branch. Default: \'3.0.0\'',
585 git_repo_group
.add_argument(
586 '--release-line-regexp',
587 help='A regexp used to identify release lines.',
588 default
=r
'branch-\d+$')
589 git_repo_group
.add_argument(
590 '--parse-release-tags',
591 help='When true, look for release tags and annotate commits according to their release'
592 + ' version. An Expensive calculation, disabled by default.',
593 type=Auditor
._str
_to
_bool
,
595 git_repo_group
.add_argument(
596 '--fallback-actions-path',
597 help='Path to a file containing _DB.Actions applicable to specific git shas.',
598 default
='fallback_actions.csv')
599 jira_group
= parser
.add_argument_group('Interactions with Jira')
600 jira_group
.add_argument(
602 help='A URL locating the target JIRA instance.',
603 default
='https://issues.apache.org/jira')
604 return parser
, git_repo_group
607 def _build_second_pass_parser(repo_reader
, parent_parser
, git_repo_group
):
608 for release_line
in repo_reader
.release_line_refs
:
609 name
= release_line
.name
610 git_repo_group
.add_argument(
611 '--%s-fix-version' % name
[len(repo_reader
.remote_name
) + 1:],
612 help='The Jira fixVersion used to indicate an issue is committed to the specified '
613 + 'release line branch',
615 return argparse
.ArgumentParser(parents
=[parent_parser
])
624 first_pass_parser
, git_repo_group
= Auditor
._build
_first
_pass
_parser
()
625 first_pass_args
, extras
= first_pass_parser
.parse_known_args()
626 first_pass_args_dict
= vars(first_pass_args
)
627 with
_DB(**first_pass_args_dict
) as db
:
628 logging
.basicConfig(level
=logging
.INFO
)
629 repo_reader
= _RepoReader(db
, **first_pass_args_dict
)
630 jira_reader
= _JiraReader(db
, **first_pass_args_dict
)
631 second_pass_parser
= Auditor
._build
_second
_pass
_parser
(
632 repo_reader
, first_pass_parser
, git_repo_group
)
633 second_pass_args
= second_pass_parser
.parse_args(extras
, first_pass_args
)
634 second_pass_args_dict
= vars(second_pass_args
)
635 auditor
= Auditor(repo_reader
, jira_reader
, db
, **second_pass_args_dict
)
636 with enlighten
.get_manager() as MANAGER
:
637 if second_pass_args
.populate_from_git
:
638 auditor
.populate_db_from_git()
639 if second_pass_args
.populate_from_jira
:
640 auditor
.populate_db_from_jira()
641 if second_pass_args
.report_new_for_release_line
:
642 release_line
= second_pass_args
.report_new_for_release_line
643 auditor
.report_new_for_release_line(release_line
)
646 if __name__
== '__main__':