Added filtering of commits from specific authors or emails (Fixes issue 5).

To access this functionality; the -x flag can now be called in the
following ways:

-x file
-x file:<file>
-x author:<author name>
-x email:<email>

Just passing -x file will presume that the filtering rule is intended for
a file (just like the previous behavior).

All the filtering is case sensitive (even filtering by email) in order to
not break any regular expressions used. Case-insensitive matching can
instead be easily achieved with the appropriate regular expression.

To get reversed filtering (excluding everything not matched within -x) a
regular expression with the a syntax such as '^(?!<rule>)' can be used.
This commit is contained in:
Adam Waldenberg 2013-07-30 06:32:58 +02:00
parent af29a59c3c
commit 1edae66fee
5 changed files with 108 additions and 61 deletions

View File

@ -47,10 +47,11 @@ __thread_lock__ = threading.BoundedSemaphore(NUM_THREADS)
__blame_lock__ = threading.Lock()
class BlameThread(threading.Thread):
def __init__(self, blame_string, extension, blames, filename):
def __init__(self, changes, blame_string, extension, blames, filename):
__thread_lock__.acquire() # Lock controlling the number of threads running
threading.Thread.__init__(self)
self.changes = changes
self.blame_string = blame_string
self.extension = extension
self.blames = blames
@ -63,16 +64,19 @@ class BlameThread(threading.Thread):
for j in git_blame_r.readlines():
j = j.decode("utf-8", "replace")
if Blame.is_blame_line(j):
author_mail = Blame.get_author_mail(j)
email = Blame.get_author_email(j)
author = self.changes.get_latest_author_by_email(email)
content = Blame.get_content(j)
__blame_lock__.acquire() # Global lock used to protect calls from here...
if self.blames.get((author_mail, self.filename), None) == None:
self.blames[(author_mail, self.filename)] = BlameEntry()
if not filtering.set_filtered(author, "author") and not filtering.set_filtered(email, "email"):
if self.blames.get((author, self.filename), None) == None:
self.blames[(author, self.filename)] = BlameEntry()
(comments, is_inside_comment) = comment.handle_comment_block(is_inside_comment, self.extension, content)
self.blames[(author, self.filename)].comments += comments
self.blames[(author, self.filename)].rows += 1
(comments, is_inside_comment) = comment.handle_comment_block(is_inside_comment, self.extension, content)
self.blames[(author_mail, self.filename)].comments += comments
self.blames[(author_mail, self.filename)].rows += 1
__blame_lock__.release() # ...to here.
git_blame_r.close()
@ -81,7 +85,7 @@ class BlameThread(threading.Thread):
PROGRESS_TEXT = N_("Checking how many rows belong to each author (Progress): {0:.0f}%")
class Blame:
def __init__(self, hard):
def __init__(self, hard, changes):
self.blames = {}
ls_tree_r = subprocess.Popen("git ls-tree --name-only -r " + interval.get_ref(), shell=True, bufsize=1,
stdout=subprocess.PIPE).stdout
@ -94,9 +98,9 @@ class Blame:
if FileDiff.is_valid_extension(row) and not filtering.set_filtered(FileDiff.get_filename(row)):
if not missing.add(row):
blame_string = "git blame -w {0} ".format("-C -C -M" if hard else "") + \
blame_string = "git blame -e -w {0} ".format("-C -C -M" if hard else "") + \
interval.get_since() + interval.get_ref() + " -- \"" + row + "\""
thread = BlameThread(blame_string, FileDiff.get_extension(row), self.blames, row.strip())
thread = BlameThread(changes, blame_string, FileDiff.get_extension(row), self.blames, row.strip())
thread.daemon = True
thread.start()
@ -119,9 +123,9 @@ class Blame:
return string.find(" (") != -1
@staticmethod
def get_author_mail(string):
author_mail = re.search(" \((.*?)\d\d\d\d-\d\d-\d\d", string)
return author_mail.group(1).strip().lstrip("<").rstrip(">")
def get_author_email(string):
author_email = re.search(" \((.*?)\d\d\d\d-\d\d-\d\d", string)
return author_email.group(1).strip().lstrip("<").rstrip(">")
@staticmethod
def get_content(string):
@ -141,10 +145,10 @@ class Blame:
__blame__ = None
def get(hard):
def get(hard, changes):
global __blame__
if __blame__ == None:
__blame__ = Blame(hard)
__blame__ = Blame(hard, changes)
return __blame__
@ -155,11 +159,10 @@ class BlameOutput(Outputable):
def __init__(self, hard):
self.hard = hard
self.changes = changes.get(hard)
get(self.hard, self.changes)
Outputable.__init__(self)
def output_html(self):
get(self.hard)
blame_xml = "<div><div class=\"box\">"
blame_xml += "<p>" + _(BLAME_INFO_TEXT) + ".</p><div><table id=\"blame\" class=\"git\">"
blame_xml += "<thead><tr> <th>{0}</th> <th>{1}</th> <th>{2}</th> </tr></thead>".format(_("Author"),
@ -177,7 +180,7 @@ class BlameOutput(Outputable):
blame_xml += "<tr " + ("class=\"odd\">" if i % 2 == 1 else ">")
if format.get_selected() == "html":
author_email = self.changes.get_author_email(entry[0])
author_email = self.changes.get_latest_email_by_author(entry[0])
blame_xml += "<td><img src=\"{0}\"/>{1}</td>".format(gravatar.get_url(author_email), entry[0])
else:
blame_xml += "<td>" + entry[0] + "</td>"
@ -214,7 +217,6 @@ class BlameOutput(Outputable):
def output_text(self):
print("")
get(self.hard)
if self.hard and sys.stdout.isatty():
terminal.clear_row()
@ -228,13 +230,11 @@ class BlameOutput(Outputable):
print("{0:.2f}".format(100.0 * i[1].comments / i[1].rows).rjust(19))
def output_xml(self):
get(self.hard)
message_xml = "\t\t<message>" + _(BLAME_INFO_TEXT) + "</message>\n"
blame_xml = ""
for i in sorted(__blame__.get_summed_blames().items()):
author_email = self.changes.get_author_email(i[0])
author_email = self.changes.get_latest_email_by_author(i[0])
name_xml = "\t\t\t\t<name>" + i[0] + "</name>\n"
gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(author_email) + "</gravatar>\n"

View File

@ -100,7 +100,8 @@ class AuthorInfo:
class Changes:
authors = {}
authors_dateinfo = {}
authors_email = {}
authors_by_email = {}
emails_by_author = {}
def __init__(self, hard):
self.commits = []
@ -119,7 +120,8 @@ class Changes:
if Commit.is_commit_line(j):
(author, email) = Commit.get_author_and_email(j)
self.authors_email[author] = email
self.emails_by_author[author] = email
self.authors_by_email[email] = author
if Commit.is_commit_line(j) or i is lines[-1]:
if found_valid_extension:
@ -128,7 +130,8 @@ class Changes:
found_valid_extension = False
commit = Commit(j)
if FileDiff.is_filediff_line(j) and not filtering.set_filtered(FileDiff.get_filename(j)):
if FileDiff.is_filediff_line(j) and not filtering.set_filtered(FileDiff.get_filename(j)) and not \
filtering.set_filtered(commit.author, "author") and not filtering.set_filtered(commit.email, "email"):
extensions.add_located(FileDiff.get_extension(j))
if FileDiff.is_valid_extension(j):
@ -167,8 +170,11 @@ class Changes:
return self.authors_dateinfo
def get_author_email(self, name):
return self.authors_email[name]
def get_latest_author_by_email(self, name):
return self.authors_by_email[name]
def get_latest_email_by_author(self, name):
return self.emails_by_author[name]
__changes__ = None
@ -211,7 +217,7 @@ class ChangesOutput(Outputable):
if format.get_selected() == "html":
changes_xml += "<td><img src=\"{0}\"/>{1}</td>".format(
gravatar.get_url(self.changes.get_author_email(entry)), entry)
gravatar.get_url(self.changes.get_latest_email_by_author(entry)), entry)
else:
changes_xml += "<td>" + entry + "</td>"
@ -290,7 +296,7 @@ class ChangesOutput(Outputable):
authorinfo = authorinfo_list.get(i)
percentage = 0 if total_changes == 0 else (authorinfo.insertions + authorinfo.deletions) / total_changes * 100
name_xml = "\t\t\t\t<name>" + i + "</name>\n"
gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(self.changes.get_author_email(i)) + "</gravatar>\n"
gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(self.changes.get_latest_email_by_author(i)) + "</gravatar>\n"
commits_xml = "\t\t\t\t<commits>" + str(authorinfo.commits) + "</commits>\n"
insertions_xml = "\t\t\t\t<insertions>" + str(authorinfo.insertions) + "</insertions>\n"
deletions_xml = "\t\t\t\t<deletions>" + str(authorinfo.deletions) + "</deletions>\n"

View File

@ -25,8 +25,7 @@ import re
import terminal
import textwrap
__filters__ = []
__filtered_files__ = set()
__filters__ = {"file": [[], set()], "author": [[], set()], "email": [[], set()]}
class InvalidRegExpError(ValueError):
def __init__(self, msg):
@ -37,56 +36,98 @@ def get():
return __filters__
def add(string):
__filters__.append(string)
global __filters__
for i in __filters__:
if (i + ":").lower() == string[0:len(i) + 1].lower():
__filters__[i][0].append(string[len(i) + 1:])
return
__filters__["file"][0].append(string)
def clear():
global __filters__
__filters__ = []
for i in __filters__:
__filters__[i][0] = []
def get_filered():
return __filtered_files__
def get_filered(filter_type="file"):
return __filters__[filter_type][1]
def set_filtered(file_name):
string = file_name.strip()
def has_filtered():
for i in __filters__:
if __filters__[i][1]:
return True
return False
def set_filtered(string, filter_type="file"):
string = string.strip()
if len(string) > 0:
for i in __filters__:
for i in __filters__[filter_type][0]:
try:
if re.search(i, string) != None:
__filtered_files__.add(string)
__filters__[filter_type][1].add(string)
return True
except:
raise InvalidRegExpError(_("invalid regular expression specified"))
return False
FILTERING_INFO_TEXT = N_("The following files were excluded from the statistics due to the specified exclusion patterns")
FILTERING_AUTHOR_INFO_TEXT = N_("The following authors were excluded from the statistics due to the specified exclusion patterns")
FILTERING_EMAIL_INFO_TEXT = N_("The authors with the following emails were excluded from the statistics due to the specified exclusion patterns")
class Filtering(Outputable):
def output_html(self):
if __filtered_files__:
filtering_xml = "<div><div class=\"box\">"
filtering_xml += "<p>" + _(FILTERING_INFO_TEXT) + "."+ "</p>"
@staticmethod
def __output_html_section__(info_string, filtered):
filtering_xml = ""
for i in __filtered_files__:
if filtered:
filtering_xml += "<p>" + info_string + "."+ "</p>"
for i in filtered:
filtering_xml += "<p>" + i + "</p>"
return filtering_xml
def output_html(self):
if has_filtered():
filtering_xml = "<div><div class=\"box\">"
Filtering.__output_html_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]);
Filtering.__output_html_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]);
Filtering.__output_html_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]);
filtering_xml += "</div></div>"
print(filtering_xml)
def output_text(self):
if __filtered_files__:
print("\n" + textwrap.fill(_(FILTERING_INFO_TEXT) + ":", width=terminal.get_size()[0]))
@staticmethod
def __output_text_section__(info_string, filtered):
if filtered:
print("\n" + textwrap.fill(info_string + ":", width=terminal.get_size()[0]))
for i in __filtered_files__:
for i in filtered:
(width, _unused) = terminal.get_size()
print("...%s" % i[-width+3:] if len(i) > width else i)
def output_xml(self):
if __filtered_files__:
message_xml = "\t\t<message>" + _(FILTERING_INFO_TEXT) + "</message>\n"
def output_text(self):
Filtering.__output_text_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]);
Filtering.__output_text_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]);
Filtering.__output_text_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]);
@staticmethod
def __output_xml_section__(info_string, filtered, container_tagname):
if filtered:
message_xml = "\t\t\t<message>" +info_string + "</message>\n"
filtering_xml = ""
for i in __filtered_files__:
filtering_xml += "\t\t\t<file>" + i + "</file>\n"
for i in filtered:
filtering_xml += "\t\t\t\t<entry>".format(container_tagname) + i + "</entry>\n".format(container_tagname)
print("\t<filering>\n" + message_xml + "\t\t<files>\n" + filtering_xml + "\t\t</files>\n\t</filtering>")
print("\t\t<{0}>".format(container_tagname))
print(message_xml + "\t\t\t<entries>\n" + filtering_xml + "\t\t\t</entries>\n")
print("\t\t</{0}>".format(container_tagname))
def output_xml(self):
if has_filtered():
print("\t<filtering>")
Filtering.__output_xml_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1], "files");
Filtering.__output_xml_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1], "authors");
Filtering.__output_xml_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1], "emails");
print("\t</filtering>")

View File

@ -36,7 +36,7 @@ class Responsibilities:
def get(hard, author_name):
author_blames = {}
for i in blame.get(hard).blames.items():
for i in blame.get(hard, changes.get(hard)).blames.items():
if (author_name == i[0][0]):
total_rows = i[1].rows - i[1].comments
if total_rows > 0:
@ -58,7 +58,7 @@ class ResponsibilitiesOutput(Outputable):
def output_text(self):
print("\n" + textwrap.fill(_(RESPONSIBILITIES_INFO_TEXT) + ":", width=terminal.get_size()[0]))
for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
if responsibilities:
print("\n" + i, _(MOSTLY_RESPONSIBLE_FOR_TEXT) + ":")
@ -77,13 +77,13 @@ class ResponsibilitiesOutput(Outputable):
resp_xml = "<div><div class=\"box\" id=\"responsibilities\">"
resp_xml += "<p>" + _(RESPONSIBILITIES_INFO_TEXT) + ".</p>"
for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
if responsibilities:
resp_xml += "<div>"
if format.get_selected() == "html":
author_email = self.changes.get_author_email(i)
author_email = self.changes.get_latest_email_by_author(i)
resp_xml += "<h3><img src=\"{0}\"/>{1} {2}</h3>".format(gravatar.get_url(author_email, size=32),
i, _(MOSTLY_RESPONSIBLE_FOR_TEXT))
else:
@ -103,10 +103,10 @@ class ResponsibilitiesOutput(Outputable):
message_xml = "\t\t<message>" + _(RESPONSIBILITIES_INFO_TEXT) + "</message>\n"
resp_xml = ""
for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
if responsibilities:
author_email = self.changes.get_author_email(i)
author_email = self.changes.get_latest_email_by_author(i)
resp_xml += "\t\t\t<author>\n"
resp_xml += "\t\t\t\t<name>" + i + "</name>\n"

View File

@ -70,7 +70,7 @@ class TimelineData:
return self.total_changes_by_period[period]
def get_authors(self):
return sorted(set([(i[0][0], self.changes.get_author_email(i[0][0])) for i in self.entries.items()]))
return sorted(set([(i[0][0], self.changes.get_latest_email_by_author(i[0][0])) for i in self.entries.items()]))
def get_author_signs_in_period(self, author, period, multiplier):
authorinfo = self.entries.get((author, period), None)