Added filtering of commits from specific authors or emails (Fixes issue 5).

To access this functionality; the -x flag can now be called in the following ways: -x file -x file:<file> -x author:<author name> -x email:<email> Just passing -x file will presume that the filtering rule is intended for a file (just like the previous behavior). All the filtering is case sensitive (even filtering by email) in order to not break any regular expressions used. Case-insensitive matching can instead be easily achieved with the appropriate regular expression. To get reversed filtering (excluding everything not matched within -x) a regular expression with the a syntax such as '^(?!<rule>)' can be used.
2025-03-23 16:58:00 +01:00 · 2013-07-30 06:32:58 +02:00 · 2013-07-30 06:32:58 +02:00 · 1edae66fee
commit 1edae66fee
parent af29a59c3c
5 changed files with 108 additions and 61 deletions
--- a/gitinspector/blame.py
+++ b/gitinspector/blame.py
@ -47,10 +47,11 @@ __thread_lock__ = threading.BoundedSemaphore(NUM_THREADS)
 __blame_lock__ = threading.Lock()

 class BlameThread(threading.Thread):
-	def __init__(self, blame_string, extension, blames, filename):
+	def __init__(self, changes, blame_string, extension, blames, filename):
 		__thread_lock__.acquire() # Lock controlling the number of threads running
 		threading.Thread.__init__(self)

+		self.changes = changes
 		self.blame_string = blame_string
 		self.extension = extension
 		self.blames = blames
@ -63,16 +64,19 @@ class BlameThread(threading.Thread):
 		for j in git_blame_r.readlines():
 			j = j.decode("utf-8", "replace")
 			if Blame.is_blame_line(j):
-				author_mail = Blame.get_author_mail(j)
+				email = Blame.get_author_email(j)
+				author = self.changes.get_latest_author_by_email(email)
 				content = Blame.get_content(j)
 				__blame_lock__.acquire() # Global lock used to protect calls from here...

-				if self.blames.get((author_mail, self.filename), None) == None:
-					self.blames[(author_mail, self.filename)] = BlameEntry()
+				if not filtering.set_filtered(author, "author") and not filtering.set_filtered(email, "email"):
+					if self.blames.get((author, self.filename), None) == None:
+						self.blames[(author, self.filename)] = BlameEntry()
+
+					(comments, is_inside_comment) = comment.handle_comment_block(is_inside_comment, self.extension, content)
+					self.blames[(author, self.filename)].comments += comments
+					self.blames[(author, self.filename)].rows += 1

-				(comments, is_inside_comment) = comment.handle_comment_block(is_inside_comment, self.extension, content)
-				self.blames[(author_mail, self.filename)].comments += comments
-				self.blames[(author_mail, self.filename)].rows += 1
 				__blame_lock__.release() # ...to here.

 		git_blame_r.close()
@ -81,7 +85,7 @@ class BlameThread(threading.Thread):
 PROGRESS_TEXT = N_("Checking how many rows belong to each author (Progress): {0:.0f}%")

 class Blame:
-	def __init__(self, hard):
+	def __init__(self, hard, changes):
 		self.blames = {}
 		ls_tree_r = subprocess.Popen("git ls-tree --name-only -r " + interval.get_ref(), shell=True, bufsize=1,
 		                             stdout=subprocess.PIPE).stdout
@ -94,9 +98,9 @@ class Blame:

 			if FileDiff.is_valid_extension(row) and not filtering.set_filtered(FileDiff.get_filename(row)):
 				if not missing.add(row):
-					blame_string = "git blame -w {0} ".format("-C -C -M" if hard else "") + \
+					blame_string = "git blame -e -w {0} ".format("-C -C -M" if hard else "") + \
 					               interval.get_since() + interval.get_ref() + " -- \"" + row + "\""
-					thread = BlameThread(blame_string, FileDiff.get_extension(row), self.blames, row.strip())
+					thread = BlameThread(changes, blame_string, FileDiff.get_extension(row), self.blames, row.strip())
 					thread.daemon = True
 					thread.start()

@ -119,9 +123,9 @@ class Blame:
 		return string.find(" (") != -1

 	@staticmethod
-	def get_author_mail(string):
-		author_mail = re.search(" \((.*?)\d\d\d\d-\d\d-\d\d", string)
-		return author_mail.group(1).strip().lstrip("<").rstrip(">")
+	def get_author_email(string):
+		author_email = re.search(" \((.*?)\d\d\d\d-\d\d-\d\d", string)
+		return author_email.group(1).strip().lstrip("<").rstrip(">")

 	@staticmethod
 	def get_content(string):
@ -141,10 +145,10 @@ class Blame:

 __blame__ = None

-def get(hard):
+def get(hard, changes):
 	global __blame__
 	if __blame__ == None:
-		__blame__ = Blame(hard)
+		__blame__ = Blame(hard, changes)

 	return __blame__

@ -155,11 +159,10 @@ class BlameOutput(Outputable):
 	def __init__(self, hard):
 		self.hard = hard
 		self.changes = changes.get(hard)
+		get(self.hard, self.changes)
 		Outputable.__init__(self)

 	def output_html(self):
-		get(self.hard)
-
 		blame_xml = "<div><div class=\"box\">"
 		blame_xml += "<p>" + _(BLAME_INFO_TEXT) + ".</p><div><table id=\"blame\" class=\"git\">"
 		blame_xml += "<thead><tr> <th>{0}</th> <th>{1}</th> <th>{2}</th> </tr></thead>".format(_("Author"),
@ -177,7 +180,7 @@ class BlameOutput(Outputable):
 			blame_xml += "<tr " + ("class=\"odd\">" if i % 2 == 1 else ">")

 			if format.get_selected() == "html":
-				author_email = self.changes.get_author_email(entry[0])
+				author_email = self.changes.get_latest_email_by_author(entry[0])
 				blame_xml += "<td><img src=\"{0}\"/>{1}</td>".format(gravatar.get_url(author_email), entry[0])
 			else:
 				blame_xml += "<td>" + entry[0] + "</td>"
@ -214,7 +217,6 @@ class BlameOutput(Outputable):

 	def output_text(self):
 		print("")
-		get(self.hard)

 		if self.hard and sys.stdout.isatty():
 			terminal.clear_row()
@ -228,13 +230,11 @@ class BlameOutput(Outputable):
 			print("{0:.2f}".format(100.0 * i[1].comments / i[1].rows).rjust(19))

 	def output_xml(self):
-		get(self.hard)
-
 		message_xml = "\t\t<message>" + _(BLAME_INFO_TEXT) + "</message>\n"
 		blame_xml = ""

 		for i in sorted(__blame__.get_summed_blames().items()):
-			author_email = self.changes.get_author_email(i[0])
+			author_email = self.changes.get_latest_email_by_author(i[0])

 			name_xml = "\t\t\t\t<name>" + i[0] + "</name>\n"
 			gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(author_email) + "</gravatar>\n"
--- a/gitinspector/changes.py
+++ b/gitinspector/changes.py
@ -100,7 +100,8 @@ class AuthorInfo:
 class Changes:
 	authors = {}
 	authors_dateinfo = {}
-	authors_email = {}
+	authors_by_email = {}
+	emails_by_author = {}

 	def __init__(self, hard):
 		self.commits = []
@ -119,7 +120,8 @@ class Changes:

 			if Commit.is_commit_line(j):
 				(author, email) = Commit.get_author_and_email(j)
-				self.authors_email[author] = email
+				self.emails_by_author[author] = email
+				self.authors_by_email[email] = author

 			if Commit.is_commit_line(j) or i is lines[-1]:
 				if found_valid_extension:
@ -128,7 +130,8 @@ class Changes:
 				found_valid_extension = False
 				commit = Commit(j)

-			if FileDiff.is_filediff_line(j) and not filtering.set_filtered(FileDiff.get_filename(j)):
+			if FileDiff.is_filediff_line(j) and not filtering.set_filtered(FileDiff.get_filename(j)) and not \
+			   filtering.set_filtered(commit.author, "author") and not filtering.set_filtered(commit.email, "email"):
 				extensions.add_located(FileDiff.get_extension(j))

 				if FileDiff.is_valid_extension(j):
@ -167,8 +170,11 @@ class Changes:

 		return self.authors_dateinfo

-	def get_author_email(self, name):
-		return self.authors_email[name]
+	def get_latest_author_by_email(self, name):
+		return self.authors_by_email[name]
+
+	def get_latest_email_by_author(self, name):
+		return self.emails_by_author[name]

 __changes__ = None

@ -211,7 +217,7 @@ class ChangesOutput(Outputable):

 				if format.get_selected() == "html":
 					changes_xml += "<td><img src=\"{0}\"/>{1}</td>".format(
-					               gravatar.get_url(self.changes.get_author_email(entry)), entry)
+					               gravatar.get_url(self.changes.get_latest_email_by_author(entry)), entry)
 				else:
 					changes_xml += "<td>" + entry + "</td>"

@ -290,7 +296,7 @@ class ChangesOutput(Outputable):
 				authorinfo = authorinfo_list.get(i)
 				percentage = 0 if total_changes == 0 else (authorinfo.insertions + authorinfo.deletions) / total_changes * 100
 				name_xml = "\t\t\t\t<name>" + i + "</name>\n"
-				gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(self.changes.get_author_email(i)) + "</gravatar>\n"
+				gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(self.changes.get_latest_email_by_author(i)) + "</gravatar>\n"
 				commits_xml = "\t\t\t\t<commits>" + str(authorinfo.commits) + "</commits>\n"
 				insertions_xml = "\t\t\t\t<insertions>" + str(authorinfo.insertions) + "</insertions>\n"
 				deletions_xml = "\t\t\t\t<deletions>" + str(authorinfo.deletions) + "</deletions>\n"
--- a/gitinspector/filtering.py
+++ b/gitinspector/filtering.py
@ -25,8 +25,7 @@ import re
 import terminal
 import textwrap

-__filters__ = []
-__filtered_files__ = set()
+__filters__ = {"file": [[], set()], "author": [[], set()], "email": [[], set()]}

 class InvalidRegExpError(ValueError):
 	def __init__(self, msg):
@ -37,56 +36,98 @@ def get():
 	return __filters__

 def add(string):
-	__filters__.append(string)
+	global __filters__
+	for i in __filters__:
+		if (i + ":").lower() == string[0:len(i) + 1].lower():
+			__filters__[i][0].append(string[len(i) + 1:])
+			return
+	__filters__["file"][0].append(string)

 def clear():
 	global __filters__
-	__filters__ = []
+	for i in __filters__:
+		__filters__[i][0] = []

-def get_filered():
-	return __filtered_files__
+def get_filered(filter_type="file"):
+	return __filters__[filter_type][1]

-def set_filtered(file_name):
-	string = file_name.strip()
+def has_filtered():
+	for i in __filters__:
+		if __filters__[i][1]:
+			return True
+	return False
+
+def set_filtered(string, filter_type="file"):
+	string = string.strip()

 	if len(string) > 0:
-		for i in __filters__:
+		for i in __filters__[filter_type][0]:
 			try:
 				if re.search(i, string) != None:
-					__filtered_files__.add(string)
+					__filters__[filter_type][1].add(string)
 					return True
 			except:
 				raise InvalidRegExpError(_("invalid regular expression specified"))
 	return False

 FILTERING_INFO_TEXT = N_("The following files were excluded from the statistics due to the specified exclusion patterns")
+FILTERING_AUTHOR_INFO_TEXT = N_("The following authors were excluded from the statistics due to the specified exclusion patterns")
+FILTERING_EMAIL_INFO_TEXT = N_("The authors with the following emails were excluded from the statistics due to the specified exclusion patterns")

 class Filtering(Outputable):
-	def output_html(self):
-		if __filtered_files__:
-			filtering_xml = "<div><div class=\"box\">"
-			filtering_xml += "<p>" + _(FILTERING_INFO_TEXT) + "."+ "</p>"
+	@staticmethod
+	def __output_html_section__(info_string, filtered):
+		filtering_xml = ""

-			for i in __filtered_files__:
+		if filtered:
+			filtering_xml += "<p>" + info_string + "."+ "</p>"
+
+			for i in filtered:
 				filtering_xml += "<p>" + i + "</p>"

+		return filtering_xml
+
+	def output_html(self):
+		if has_filtered():
+			filtering_xml = "<div><div class=\"box\">"
+			Filtering.__output_html_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]);
+			Filtering.__output_html_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]);
+			Filtering.__output_html_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]);
 			filtering_xml += "</div></div>"
+
 			print(filtering_xml)

-	def output_text(self):
-		if __filtered_files__:
-			print("\n" + textwrap.fill(_(FILTERING_INFO_TEXT) + ":", width=terminal.get_size()[0]))
+	@staticmethod
+	def __output_text_section__(info_string, filtered):
+		if filtered:
+			print("\n" + textwrap.fill(info_string + ":", width=terminal.get_size()[0]))

-			for i in __filtered_files__:
+			for i in filtered:
 				(width, _unused) = terminal.get_size()
 				print("...%s" % i[-width+3:] if len(i) > width else i)

-	def output_xml(self):
-		if __filtered_files__:
-			message_xml = "\t\t<message>" + _(FILTERING_INFO_TEXT) + "</message>\n"
+	def output_text(self):
+		Filtering.__output_text_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]);
+		Filtering.__output_text_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]);
+		Filtering.__output_text_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]);
+
+	@staticmethod
+	def __output_xml_section__(info_string, filtered, container_tagname):
+		if filtered:
+			message_xml = "\t\t\t<message>" +info_string + "</message>\n"
 			filtering_xml = ""

-			for i in __filtered_files__:
-				filtering_xml += "\t\t\t<file>" + i + "</file>\n"
+			for i in filtered:
+				filtering_xml += "\t\t\t\t<entry>".format(container_tagname) + i + "</entry>\n".format(container_tagname)

-			print("\t<filering>\n" + message_xml + "\t\t<files>\n" + filtering_xml + "\t\t</files>\n\t</filtering>")
+			print("\t\t<{0}>".format(container_tagname))
+			print(message_xml + "\t\t\t<entries>\n" + filtering_xml + "\t\t\t</entries>\n")
+			print("\t\t</{0}>".format(container_tagname))
+
+	def output_xml(self):
+		if has_filtered():
+			print("\t<filtering>")
+			Filtering.__output_xml_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1], "files");
+			Filtering.__output_xml_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1], "authors");
+			Filtering.__output_xml_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1], "emails");
+			print("\t</filtering>")
--- a/gitinspector/responsibilities.py
+++ b/gitinspector/responsibilities.py
@ -36,7 +36,7 @@ class Responsibilities:
 	def get(hard, author_name):
 		author_blames = {}

-		for i in blame.get(hard).blames.items():
+		for i in blame.get(hard, changes.get(hard)).blames.items():
 			if (author_name == i[0][0]):
 				total_rows = i[1].rows - i[1].comments
 				if total_rows > 0:
@ -58,7 +58,7 @@ class ResponsibilitiesOutput(Outputable):
 	def output_text(self):
 		print("\n" + textwrap.fill(_(RESPONSIBILITIES_INFO_TEXT) + ":", width=terminal.get_size()[0]))

-		for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
+		for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
 			responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
 			if responsibilities:
 				print("\n" + i, _(MOSTLY_RESPONSIBLE_FOR_TEXT) + ":")
@ -77,13 +77,13 @@ class ResponsibilitiesOutput(Outputable):
 		resp_xml = "<div><div class=\"box\" id=\"responsibilities\">"
 		resp_xml += "<p>" + _(RESPONSIBILITIES_INFO_TEXT) + ".</p>"

-		for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
+		for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
 			responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
 			if responsibilities:
 				resp_xml += "<div>"

 				if format.get_selected() == "html":
-					author_email = self.changes.get_author_email(i)
+					author_email = self.changes.get_latest_email_by_author(i)
 					resp_xml += "<h3><img src=\"{0}\"/>{1} {2}</h3>".format(gravatar.get_url(author_email, size=32),
 					            i, _(MOSTLY_RESPONSIBLE_FOR_TEXT))
 				else:
@ -103,10 +103,10 @@ class ResponsibilitiesOutput(Outputable):
 		message_xml = "\t\t<message>" + _(RESPONSIBILITIES_INFO_TEXT) + "</message>\n"
 		resp_xml = ""

-		for i in sorted(set(i[0] for i in blame.get(self.hard).blames)):
+		for i in sorted(set(i[0] for i in blame.get(self.hard, self.changes).blames)):
 			responsibilities = sorted(((i[1], i[0]) for i in Responsibilities.get(self.hard, i)), reverse=True)
 			if responsibilities:
-				author_email = self.changes.get_author_email(i)
+				author_email = self.changes.get_latest_email_by_author(i)

 				resp_xml += "\t\t\t<author>\n"
 				resp_xml += "\t\t\t\t<name>" + i + "</name>\n"
--- a/gitinspector/timeline.py
+++ b/gitinspector/timeline.py
@ -70,7 +70,7 @@ class TimelineData:
 		return self.total_changes_by_period[period]

 	def get_authors(self):
-		return sorted(set([(i[0][0], self.changes.get_author_email(i[0][0])) for i in self.entries.items()]))
+		return sorted(set([(i[0][0], self.changes.get_latest_email_by_author(i[0][0])) for i in self.entries.items()]))

 	def get_author_signs_in_period(self, author, period, multiplier):
 		authorinfo = self.entries.get((author, period), None)