4 # Generate the AUTHORS file combining existing AUTHORS file with
7 # Usage: generate_authors.py AUTHORS.src
9 # Copyright 2022 Moshe Kaplan
10 # Based on generate_authors.pl by Michael Mann
12 # Wireshark - Network traffic analyzer
13 # By Gerald Combs <gerald@wireshark.org>
14 # Copyright 1998 Gerald Combs
16 # SPDX-License-Identifier: GPL-2.0-or-later
24 from pyuca
import Collator
26 except ModuleNotFoundError
:
28 sys
.stderr
.write('pyuca module not found. Sorting names using the built-in locale module.\n')
31 def get_git_authors():
34 # 4321 Navin R. Johnson <nrjohnson@example.com>
36 GIT_LINE_REGEX
= r
"^\s*\d+\s+([^<]*)\s*<([^>]*)>"
37 cmd
= "git --no-pager shortlog --email --summary HEAD".split(' ')
38 # check_output is used for Python 3.4 compatibility
39 git_cmd_output
= subprocess
.check_output(cmd
, universal_newlines
=True, encoding
='utf-8')
42 for line
in git_cmd_output
.splitlines():
43 # Check if this is needed:
45 match
= re
.match(GIT_LINE_REGEX
, line
)
46 name
= match
.group(1).strip()
47 email
= match
.group(2).strip()
48 # Try to lower how much spam people get:
49 email
= email
.replace('@', '[AT]')
50 git_authors
.append((name
, email
))
53 return sorted(git_authors
, key
=lambda x
: c
.sort_key(x
[0]))
54 return sorted(git_authors
, key
=lambda x
: locale
.strxfrm(x
[0].casefold()))
57 def extract_contributors(authors_content
):
58 # Extract names and email addresses from the AUTHORS file Contributors
59 contributors_content
= authors_content
.split("= Contributors =", 1)[1]
60 CONTRIBUTOR_LINE_REGEX
= r
"^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+<([^>]*)>"
63 for line
in contributors_content
.splitlines():
64 contributor_match
= re
.match(CONTRIBUTOR_LINE_REGEX
, line
)
65 if re
.search(r
'([^\{]*)\{', line
):
67 name
= contributor_match
.group(1)
68 email
= contributor_match
.group(3)
69 contributors
.append((name
, email
))
70 state
= "s_in_bracket"
71 elif state
== "s_in_bracket":
72 if re
.search(r
'([^\}]*)\}', line
):
74 elif re
.search('<', line
):
76 name
= contributor_match
.group(1)
77 email
= contributor_match
.group(3)
78 contributors
.append((name
, email
))
79 elif re
.search(r
"(e-mail address removed at contributor's request)", line
):
81 name
= contributor_match
.group(1)
82 email
= contributor_match
.group(3)
83 contributors
.append((name
, email
))
89 def generate_git_contributors_text(contributors_emails
, git_authors_emails
):
90 # Track the email addresses seen to avoid including the same email address twice
91 emails_addresses_seen
= set()
92 for name
, email
in contributors_emails
:
93 emails_addresses_seen
.add(email
.lower())
96 for name
, email
in git_authors_emails
:
97 if email
.lower() in emails_addresses_seen
:
100 # Skip Gerald, since he's part of the header:
101 if email
== "gerald[AT]wireshark.org":
105 if len(name
) >= 8*ntab
:
106 line
= "{name} <{email}>".format(name
=name
, email
=email
)
111 tabs
= '\t'*int(ntab
)
112 line
= "{name}{tabs}<{email}>".format(name
=name
, tabs
=tabs
, email
=email
)
114 emails_addresses_seen
.add(email
.lower())
115 output_lines
+= [line
]
116 return "\n".join(output_lines
)
119 # Read authors file until we find gitlog entries, then stop
120 def read_authors(parsed_args
):
122 with
open(parsed_args
.authors
[0], 'r', encoding
='utf-8') as fh
:
123 for line
in fh
.readlines():
124 if '= From git log =' in line
:
127 return ''.join(lines
)
131 parser
= argparse
.ArgumentParser(description
="Generate the AUTHORS file combining existing AUTHORS file with git commit log.")
132 parser
.add_argument("authors", metavar
='authors', nargs
=1, help="path to AUTHORS file")
133 parsed_args
= parser
.parse_args()
135 author_content
= read_authors(parsed_args
)
137 # Collect the listed contributors emails so that we don't duplicate them
138 # in the listing of git contributors
139 contributors_emails
= extract_contributors(author_content
)
140 git_authors_emails
= get_git_authors()
141 # Then generate the text output for git contributors
142 git_contributors_text
= generate_git_contributors_text(contributors_emails
, git_authors_emails
)
144 # Now we can write our output:
145 git_contributor_header
= '= From git log =\n\n'
146 output
= author_content
+ git_contributor_header
+ git_contributors_text
+ '\n'
148 with
open(parsed_args
.authors
[0], 'w', encoding
='utf-8') as fh
:
152 if __name__
== '__main__':