From d315829e9d0dd649a0560b58f7961862da098446 Mon Sep 17 00:00:00 2001 From: Adam Waldenberg Date: Thu, 27 Nov 2014 08:39:17 +0100 Subject: [PATCH] Added commit filtering by revision (Fixes issue 33). Specific commits can now be filtered by using; -x, --exclude="revision:" Like with all other filtering in gitinspector, regular expressions are supported. Likewise, multiple revisions can be specified using a comma or by simply supplying multiple exclude parameters. As usual, both the commit module and blame module take this filtering into account. --- gitinspector/blame.py | 114 +++++++++++++++++++++----------------- gitinspector/changes.py | 3 +- gitinspector/filtering.py | 6 +- 3 files changed, 71 insertions(+), 52 deletions(-) diff --git a/gitinspector/blame.py b/gitinspector/blame.py index 08e8a9b..b589afe 100644 --- a/gitinspector/blame.py +++ b/gitinspector/blame.py @@ -61,44 +61,67 @@ class BlameThread(threading.Thread): self.blames = blames self.filename = filename + def __clear_blamechunk_information__(self): + self.blamechunk_email = None + self.blamechunk_is_last = False + self.blamechunk_is_prior = False + self.blamechunk_revision = None + self.blamechunk_time = None + + def __handle_blamechunk_content__(self, content): + author = None + (comments, self.is_inside_comment) = comment.handle_comment_block(self.is_inside_comment, self.extension, content) + + if self.blamechunk_is_prior and interval.get_since(): + return + + try: + author = self.changes.get_latest_author_by_email(self.blamechunk_email) + except KeyError: + return + + __blame_lock__.acquire() # Global lock used to protect calls from here... + + if not filtering.set_filtered(author, "author") and not filtering.set_filtered(self.blamechunk_email, "email") and not \ + filtering.set_filtered(self.blamechunk_revision, "revision"): + if self.blames.get((author, self.filename), None) == None: + self.blames[(author, self.filename)] = BlameEntry() + + self.blames[(author, self.filename)].comments += comments + self.blames[(author, self.filename)].rows += 1 + + if (self.blamechunk_time - self.changes.first_commit_date).days > 0: + self.blames[(author, self.filename)].skew += ((self.changes.last_commit_date - self.blamechunk_time).days / + (7.0 if self.useweeks else AVG_DAYS_PER_MONTH)) + + __blame_lock__.release() # ...to here. + def run(self): git_blame_r = subprocess.Popen(self.blame_string, shell=True, bufsize=1, stdout=subprocess.PIPE).stdout - is_inside_comment = False - - for j in git_blame_r.readlines(): - j = j.decode("utf-8", "replace") - if Blame.is_blame_line(j): - content = Blame.get_content(j) - (comments, is_inside_comment) = comment.handle_comment_block(is_inside_comment, self.extension, content) - - if Blame.is_prior(j) and interval.get_since(): - continue - - email = Blame.get_author_email(j) - try: - author = self.changes.get_latest_author_by_email(email) - except KeyError: - continue - - __blame_lock__.acquire() # Global lock used to protect calls from here... - - if not filtering.set_filtered(author, "author") and not filtering.set_filtered(email, "email"): - if self.blames.get((author, self.filename), None) == None: - self.blames[(author, self.filename)] = BlameEntry() - - self.blames[(author, self.filename)].comments += comments - self.blames[(author, self.filename)].rows += 1 - - time = Blame.get_time(j) - time = datetime.date(int(time[0:4]), int(time[5:7]), int(time[8:10])) - - if (time - self.changes.first_commit_date).days > 0: - self.blames[(author, self.filename)].skew += ((self.changes.last_commit_date - time).days / - (7.0 if self.useweeks else AVG_DAYS_PER_MONTH)) - - __blame_lock__.release() # ...to here. - + rows = git_blame_r.readlines() git_blame_r.close() + + self.is_inside_comment = False + self.__clear_blamechunk_information__() + + for j in range(0, len(rows)): + row = rows[j].decode("utf-8", "replace").strip() + lr = row.split(" ", 2) + + if self.blamechunk_is_last: + self.__handle_blamechunk_content__(row) + self.__clear_blamechunk_information__() + elif lr[0] == "boundary": + self.blamechunk_is_prior = True + elif lr[0] == "author-mail": + self.blamechunk_email = lr[1].lstrip("<").rstrip(">") + elif lr[0] == "author-time": + self.blamechunk_time = datetime.date.fromtimestamp(int(lr[1])) + elif lr[0] == "filename": + self.blamechunk_is_last = True + elif Blame.is_revision(lr[0]): + self.blamechunk_revision = lr[0] + __thread_lock__.release() # Lock controlling the number of threads running PROGRESS_TEXT = N_("Checking how many rows belong to each author (Progress): {0:.0f}%") @@ -116,7 +139,7 @@ class Blame: row = row.decode("utf-8", "replace").strip("\"").strip("'").strip() if FileDiff.is_valid_extension(row) and not filtering.set_filtered(FileDiff.get_filename(row)): - blame_string = "git blame -e -w {0} ".format("-C -C -M" if hard else "") + \ + blame_string = "git blame --line-porcelain -w {0} ".format("-C -C -M" if hard else "") + \ interval.get_since() + interval.get_ref() + " -- \"" + row + "\"" thread = BlameThread(useweeks, changes, blame_string, FileDiff.get_extension(row), self.blames, row.strip()) thread.daemon = True @@ -137,22 +160,13 @@ class Blame: sys.stdout.flush() @staticmethod - def is_blame_line(string): - return string.find(" (") != -1 + def is_revision(string): + revision = re.search("([0-9a-f]{40})", string) - @staticmethod - def is_prior(string): - return string[0] == "^" + if revision == None: + return False - @staticmethod - def get_author_email(string): - author_email = re.search("\(<([^>]*)", string) - return author_email.group(1) - - @staticmethod - def get_content(string): - content = re.search(" \d+\)(.*)", string) - return content.group(1).lstrip() + return revision.group(1).strip() @staticmethod def get_stability(author, blamed_rows, changes): diff --git a/gitinspector/changes.py b/gitinspector/changes.py index 7d5bf78..675807d 100644 --- a/gitinspector/changes.py +++ b/gitinspector/changes.py @@ -132,7 +132,8 @@ class Changes: commit = Commit(j) if FileDiff.is_filediff_line(j) and not filtering.set_filtered(FileDiff.get_filename(j)) and not \ - filtering.set_filtered(commit.author, "author") and not filtering.set_filtered(commit.email, "email"): + filtering.set_filtered(commit.author, "author") and not filtering.set_filtered(commit.email, "email") and not \ + filtering.set_filtered(commit.sha, "revision"): extensions.add_located(FileDiff.get_extension(j)) if FileDiff.is_valid_extension(j): diff --git a/gitinspector/filtering.py b/gitinspector/filtering.py index 7e8f338..9b74e0c 100644 --- a/gitinspector/filtering.py +++ b/gitinspector/filtering.py @@ -25,7 +25,7 @@ import re import terminal import textwrap -__filters__ = {"file": [[], set()], "author": [[], set()], "email": [[], set()]} +__filters__ = {"file": [[], set()], "author": [[], set()], "email": [[], set()], "revision": [[], set()]} class InvalidRegExpError(ValueError): def __init__(self, msg): @@ -76,6 +76,7 @@ def set_filtered(string, filter_type="file"): FILTERING_INFO_TEXT = N_("The following files were excluded from the statistics due to the specified exclusion patterns") FILTERING_AUTHOR_INFO_TEXT = N_("The following authors were excluded from the statistics due to the specified exclusion patterns") FILTERING_EMAIL_INFO_TEXT = N_("The authors with the following emails were excluded from the statistics due to the specified exclusion patterns") +FILTERING_EMAIL_INFO_TEXT = N_("The following commit revisions were excluded from the statistics due to the specified exclusion patterns") class Filtering(Outputable): @staticmethod @@ -96,6 +97,7 @@ class Filtering(Outputable): Filtering.__output_html_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]) Filtering.__output_html_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]) Filtering.__output_html_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]) + Filtering.__output_html_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["revision"][1]) filtering_xml += "" print(filtering_xml) @@ -113,6 +115,7 @@ class Filtering(Outputable): Filtering.__output_text_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1]) Filtering.__output_text_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1]) Filtering.__output_text_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1]) + Filtering.__output_text_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["revision"][1]) @staticmethod def __output_xml_section__(info_string, filtered, container_tagname): @@ -133,4 +136,5 @@ class Filtering(Outputable): Filtering.__output_xml_section__(_(FILTERING_INFO_TEXT), __filters__["file"][1], "files") Filtering.__output_xml_section__(_(FILTERING_AUTHOR_INFO_TEXT), __filters__["author"][1], "authors") Filtering.__output_xml_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["email"][1], "emails") + Filtering.__output_xml_section__(_(FILTERING_EMAIL_INFO_TEXT), __filters__["revision"][1].union(), "revisions") print("\t")