2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # Use: find_copyrights.pl <start-from> [exclude-dir ...]
12 sub check_is_generated_file
($);
13 sub start_copyright_parsing
();
15 my $progname = basename
($0);
17 my $root_dir = shift @ARGV;
20 my $path = shift @ARGV;
21 push @find_args, qw
'-not ( -path', "*/$path/*", qw
'-prune )'
23 push @find_args, qw(-follow -type f -print);
25 open FIND
, '-|', 'find', $root_dir, @find_args
26 or die "$progname: Couldn't exec find: $!\n";
27 my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
33 push @files, $_ unless (-z
$_ || !m
%$check_regex%);
37 my $generated_file_scan_boundary = 25;
39 my $file = shift @files;
42 open (F
, "<$file") or die "$progname: Unable to access $file\n";
43 my $parse_copyright = start_copyright_parsing
();
45 $file_header .= $_ unless $. > $generated_file_scan_boundary;
46 my $copyright_match = $parse_copyright->($_, $.);
47 if ($copyright_match) {
48 $copyrights{lc("$copyright_match")} = "$copyright_match";
52 my $copyright = join(" / ", sort values %copyrights);
54 if (check_is_generated_file
($file_header)) {
55 print "GENERATED FILE";
57 print ($copyright or "*No copyright*");
62 sub check_is_generated_file
($) {
63 my $license = uc($_[0]);
64 # Remove Python multiline comments to avoid false positives
65 if (index($license, '"""') != -1) {
66 $license =~ s/"""[^"]*(?:"""|$)//mg;
68 if (index($license, "'''") != -1) {
69 $license =~ s/'''[^']*(?:'''|$)//mg;
71 # Quick checks using index.
72 if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) {
75 if (index($license, 'DO NOT EDIT') != -1 ||
76 index($license, 'DO NOT DELETE') != -1 ||
77 index($license, 'GENERATED') != -1) {
78 return ($license =~ /(All changes made
in this file will be lost
' .
79 'DO NOT
(EDIT
|delete this file
)|Generated
(at
|automatically
|data
)' .
80 '|Automatically generated
|\Wgenerated\s
+(?
:\w
+\s
+)*file\W
)/i
);
85 sub are_within_increasing_progression
($$$) {
86 my $delta = $_[0] - $_[1];
87 return $delta >= 0 && $delta <= $_[2];
90 sub start_copyright_parsing
() {
91 my $max_line_numbers_proximity = 3;
92 # Set up the defaults the way that proximity checks will not succeed.
93 my $last_a_item_line_number = -200;
94 my $last_b_item_line_number = -100;
98 my $line_number = $_[1];
100 # Remove C / C++ strings to avoid false positives.
101 if (index($line, '"') != -1) {
102 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
105 my $uc_line = uc($line);
107 # Record '(a)' and '(b)' last occurences in C++ comments.
108 my $cpp_comment_idx = index($uc_line, '//');
109 if ($cpp_comment_idx != -1) {
110 if (index($uc_line, '(A)') > $cpp_comment_idx) {
111 $last_a_item_line_number = $line_number;
113 if (index($uc_line, '(B)') > $cpp_comment_idx) {
114 $last_b_item_line_number = $line_number;
118 # Fast bailout, uses the same patterns as the regexp.
119 if (index($uc_line, 'COPYRIGHT') == -1 &&
120 index($uc_line, 'COPR.') == -1 &&
121 index($uc_line, '\x{00a9}') == -1 &&
122 index($uc_line, '\xc2\xa9') == -1) {
124 my $c_item_index = index($uc_line, '(C)');
125 return '' if ($c_item_index == -1);
126 # Filter out 'c' used as a list item inside C++ comments.
127 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
128 if ($c_item_index > $cpp_comment_idx &&
129 are_within_increasing_progression
(
131 $last_b_item_line_number,
132 $max_line_numbers_proximity) &&
133 are_within_increasing_progression
(
134 $last_b_item_line_number,
135 $last_a_item_line_number,
136 $max_line_numbers_proximity)) {
141 my $copyright_indicator_regex =
142 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
143 my $full_copyright_indicator_regex =
144 sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex;
145 my $copyright_disindicator_regex =
146 '\b(?:info(?:rmation)?|notice|and|or)\b';
149 if ($line =~ m
%$full_copyright_indicator_regex%i) {
151 if ($match !~ m
%^\s
*$copyright_disindicator_regex%i) {
152 $match =~ s/([,.])?\s*$//;
153 $match =~ s/$copyright_indicator_regex//ig;
155 $match =~ s/\s{2,}/ /g;