mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-06-23 09:38:02 +03:00
update_authors: consider Signed-off-by trailers for committer idents
I've increasingly found that commits from new contributors have the author set in the "Github noreply" obfuscated style. If they do have a better canonical choice, it's usually in the Signed-off-by: trailer in the commit message. I had avoided using these in the first version of this program because they aren't always present, aren't always correct, and some commits have multiple signoffs. It seems however that requiring either the name or the email address to match the commit author sufficiently narrows the scope to be useful for the "Github noreply" situation, which is really the main sticking point. And of course, if it gets it wrong, overriding in .mailmap or AUTHORS is always an option. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <robn@despairlabs.com>
This commit is contained in:
parent
b2284aedab
commit
8e318fda80
@ -59,6 +59,17 @@
|
|||||||
# the display version. We use this slug to update two maps, one of email->name,
|
# the display version. We use this slug to update two maps, one of email->name,
|
||||||
# the other of name->email.
|
# the other of name->email.
|
||||||
#
|
#
|
||||||
|
# Where possible, we also consider Signed-off-by: trailers in the commit
|
||||||
|
# message, and if they match the commit author, enter them into the maps also.
|
||||||
|
# Because a commit can contain multiple signoffs, we only track one if either
|
||||||
|
# the name or the email address match the commit author (by slug). This is
|
||||||
|
# mostly aimed at letting an explicit signoff override a generated name or
|
||||||
|
# email on the same commit (usually a Github noreply), while avoiding every
|
||||||
|
# signoff ever being treated as a possible canonical ident for some other
|
||||||
|
# committer. (Also note that this behaviour only works for signoffs that can be
|
||||||
|
# extracted with git-interpret-trailers, which misses many seen in the OpenZFS
|
||||||
|
# git history, for various reasons).
|
||||||
|
#
|
||||||
# Once collected, we then walk all the emails we've seen and get all the names
|
# Once collected, we then walk all the emails we've seen and get all the names
|
||||||
# associated with every instance. Then for each of those names, we get all the
|
# associated with every instance. Then for each of those names, we get all the
|
||||||
# emails associated, and so on until we've seen all the connected names and
|
# emails associated, and so on until we've seen all the connected names and
|
||||||
@ -118,31 +129,42 @@ for my $line (do { local (@ARGV) = ('AUTHORS'); <> }) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Next, we load all the commit authors. and form name<->email mappings, keyed
|
# Next, we load all the commit authors and signoff pairs, and form name<->email
|
||||||
# on slug. Note that this format is getting the .mailmap-converted form. This
|
# mappings, keyed on slug. Note that this format is getting the
|
||||||
# lets us control the input to some extent by making changes there.
|
# .mailmap-converted form. This lets us control the input to some extent by
|
||||||
|
# making changes there.
|
||||||
my %git_names;
|
my %git_names;
|
||||||
my %git_emails;
|
my %git_emails;
|
||||||
|
|
||||||
for my $line (reverse qx(git log --pretty=tformat:'%aN:::%aE')) {
|
for my $line (reverse qx(git log --pretty=tformat:'%aN:::%aE:::%(trailers:key=signed-off-by,valueonly,separator=:::)')) {
|
||||||
chomp $line;
|
chomp $line;
|
||||||
my ($name, $email) = $line =~ m/^(.*):::(.*)/;
|
my ($name, $email, @signoffs) = split ':::', $line;
|
||||||
next unless $name && $email;
|
next unless $name && $email;
|
||||||
|
|
||||||
my $semail = email_slug($email);
|
my $semail = email_slug($email);
|
||||||
my $sname = name_slug($name);
|
my $sname = name_slug($name);
|
||||||
|
|
||||||
|
# Track the committer name and email.
|
||||||
$git_names{$semail}{$sname} = 1;
|
$git_names{$semail}{$sname} = 1;
|
||||||
$git_emails{$sname}{$semail} = 1;
|
$git_emails{$sname}{$semail} = 1;
|
||||||
|
|
||||||
# Update the "best looking" display value, but only if we don't already
|
# Consider if these are the best we've ever seen.
|
||||||
# have something from the AUTHORS file. If we do, we must not change it.
|
|
||||||
if (!$authors_name{email_slug($email)}) {
|
|
||||||
update_display_email($email);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$authors_email{name_slug($name)}) {
|
|
||||||
update_display_name($name);
|
update_display_name($name);
|
||||||
|
update_display_email($email);
|
||||||
|
|
||||||
|
# Check signoffs. any that have a matching name or email as the
|
||||||
|
# committer (by slug), also track them.
|
||||||
|
for my $signoff (@signoffs) {
|
||||||
|
my ($soname, $soemail) = $signoff =~ m/^([^<]+)\s+<(.+)>$/;
|
||||||
|
next unless $soname && $soemail;
|
||||||
|
my $ssoname = name_slug($soname);
|
||||||
|
my $ssoemail = email_slug($soemail);
|
||||||
|
if (($semail eq $ssoemail) ^ ($sname eq $ssoname)) {
|
||||||
|
$git_names{$ssoemail}{$ssoname} = 1;
|
||||||
|
$git_emails{$ssoname}{$ssoemail} = 1;
|
||||||
|
update_display_name($soname);
|
||||||
|
update_display_email($soemail);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,7 +216,6 @@ for my $committer (@committers) {
|
|||||||
|
|
||||||
# Now output the new AUTHORS file
|
# Now output the new AUTHORS file
|
||||||
open my $fh, '>', 'AUTHORS' or die "E: couldn't open AUTHORS for write: $!\n";
|
open my $fh, '>', 'AUTHORS' or die "E: couldn't open AUTHORS for write: $!\n";
|
||||||
#my $fh = \*STDOUT;
|
|
||||||
say $fh join("\n", @authors_header, "");
|
say $fh join("\n", @authors_header, "");
|
||||||
for my $name (sort keys %authors_email) {
|
for my $name (sort keys %authors_email) {
|
||||||
my $cname = $display_name{$name};
|
my $cname = $display_name{$name};
|
||||||
@ -233,9 +254,18 @@ sub email_slug {
|
|||||||
return lc $email;
|
return lc $email;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# As we accumulate new names and addresses, record the "best looking" version
|
||||||
|
# of each. Once we decide to add a committer to AUTHORS, we'll take the best
|
||||||
|
# version of their name and address from here.
|
||||||
|
#
|
||||||
|
# Note that we don't record them if they're already in AUTHORS (that is, in
|
||||||
|
# %authors_name or %authors_email) because that file already contains the
|
||||||
|
# "best" version, by definition. So we return immediately if we've seen it
|
||||||
|
# there already.
|
||||||
sub update_display_name {
|
sub update_display_name {
|
||||||
my ($name) = @_;
|
my ($name) = @_;
|
||||||
my $sname = name_slug($name);
|
my $sname = name_slug($name);
|
||||||
|
return if $authors_email{$sname};
|
||||||
|
|
||||||
# For names, "more specific" means "has more non-lower-case characters"
|
# For names, "more specific" means "has more non-lower-case characters"
|
||||||
# (in ASCII), guessing that if a person has gone to some effort to
|
# (in ASCII), guessing that if a person has gone to some effort to
|
||||||
@ -252,9 +282,11 @@ sub update_display_name {
|
|||||||
sub update_display_email {
|
sub update_display_email {
|
||||||
my ($email) = @_;
|
my ($email) = @_;
|
||||||
my $semail = email_slug($email);
|
my $semail = email_slug($email);
|
||||||
|
return if $authors_name{$semail};
|
||||||
|
|
||||||
# Like names, we prefer uppercase when possible. We also remove any
|
# Like names, we prefer uppercase when possible. We also remove any
|
||||||
# leading "plus address" for Github noreply addresses.
|
# leading "plus address" for Github noreply addresses.
|
||||||
|
|
||||||
$email =~ s/^[^\+]*\+//g if $email =~ m/\.noreply\.github\.com$/;
|
$email =~ s/^[^\+]*\+//g if $email =~ m/\.noreply\.github\.com$/;
|
||||||
|
|
||||||
my $cemail = $display_email{$semail};
|
my $cemail = $display_email{$semail};
|
||||||
|
Loading…
Reference in New Issue
Block a user