gitinspector/gitinspector/blame.py

312 lines
11 KiB
Python

# coding: utf-8
#
# Copyright © 2012-2015 Ejwa Software. All rights reserved.
#
# This file is part of gitinspector.
#
# gitinspector is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gitinspector is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gitinspector. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from __future__ import unicode_literals
from localization import N_
from outputable import Outputable
from changes import FileDiff
import comment
import datetime
import filtering
import format
import gravatar
import interval
import json
import multiprocessing
import re
import subprocess
import sys
import terminal
import textwrap
import threading
NUM_THREADS = multiprocessing.cpu_count()
class BlameEntry:
rows = 0
skew = 0 # Used when calculating average code age.
comments = 0
__thread_lock__ = threading.BoundedSemaphore(NUM_THREADS)
__blame_lock__ = threading.Lock()
AVG_DAYS_PER_MONTH = 30.4167
class BlameThread(threading.Thread):
def __init__(self, useweeks, changes, blame_command, extension, blames, filename):
__thread_lock__.acquire() # Lock controlling the number of threads running
threading.Thread.__init__(self)
self.useweeks = useweeks
self.changes = changes
self.blame_command = blame_command
self.extension = extension
self.blames = blames
self.filename = filename
self.is_inside_comment = False
def __clear_blamechunk_info__(self):
self.blamechunk_email = None
self.blamechunk_is_last = False
self.blamechunk_is_prior = False
self.blamechunk_revision = None
self.blamechunk_time = None
def __handle_blamechunk_content__(self, content):
author = None
(comments, self.is_inside_comment) = comment.handle_comment_block(self.is_inside_comment, self.extension, content)
if self.blamechunk_is_prior and interval.get_since():
return
try:
author = self.changes.get_latest_author_by_email(self.blamechunk_email)
except KeyError:
return
if not filtering.set_filtered(author, "author") and not \
filtering.set_filtered(self.blamechunk_email, "email") and not \
filtering.set_filtered(self.blamechunk_revision, "revision"):
__blame_lock__.acquire() # Global lock used to protect calls from here...
if self.blames.get((author, self.filename), None) == None:
self.blames[(author, self.filename)] = BlameEntry()
self.blames[(author, self.filename)].comments += comments
self.blames[(author, self.filename)].rows += 1
if (self.blamechunk_time - self.changes.first_commit_date).days > 0:
self.blames[(author, self.filename)].skew += ((self.changes.last_commit_date - self.blamechunk_time).days /
(7.0 if self.useweeks else AVG_DAYS_PER_MONTH))
__blame_lock__.release() # ...to here.
def run(self):
git_blame_r = subprocess.Popen(self.blame_command, bufsize=1, stdout=subprocess.PIPE).stdout
rows = git_blame_r.readlines()
git_blame_r.close()
self.__clear_blamechunk_info__()
#pylint: disable=W0201
for j in range(0, len(rows)):
row = rows[j].decode("utf-8", "replace").strip()
keyval = row.split(" ", 2)
if self.blamechunk_is_last:
self.__handle_blamechunk_content__(row)
self.__clear_blamechunk_info__()
elif keyval[0] == "boundary":
self.blamechunk_is_prior = True
elif keyval[0] == "author-mail":
self.blamechunk_email = keyval[1].lstrip("<").rstrip(">")
elif keyval[0] == "author-time":
self.blamechunk_time = datetime.date.fromtimestamp(int(keyval[1]))
elif keyval[0] == "filename":
self.blamechunk_is_last = True
elif Blame.is_revision(keyval[0]):
self.blamechunk_revision = keyval[0]
__thread_lock__.release() # Lock controlling the number of threads running
PROGRESS_TEXT = N_("Checking how many rows belong to each author (Progress): {0:.0f}%")
class Blame:
def __init__(self, hard, useweeks, changes):
self.blames = {}
ls_tree_r = subprocess.Popen(["git", "ls-tree", "--name-only", "-r", interval.get_ref()], bufsize=1,
stdout=subprocess.PIPE).stdout
lines = ls_tree_r.readlines()
ls_tree_r.close()
for i, row in enumerate(lines):
row = row.strip().decode("unicode_escape", "ignore")
row = row.encode("latin-1", "replace")
row = row.decode("utf-8", "replace").strip("\"").strip("'").strip()
if FileDiff.is_valid_extension(row) and not filtering.set_filtered(FileDiff.get_filename(row)):
blame_command = filter(None, ["git", "blame", "--line-porcelain", "-w"] + \
(["-C", "-C", "-M"] if hard else []) +
[interval.get_since(), interval.get_ref(), "--", row])
thread = BlameThread(useweeks, changes, blame_command, FileDiff.get_extension(row), self.blames, row.strip())
thread.daemon = True
thread.start()
if hard:
Blame.output_progress(i, len(lines))
# Make sure all threads have completed.
for i in range(0, NUM_THREADS):
__thread_lock__.acquire()
@staticmethod
def output_progress(pos, length):
if sys.stdout.isatty() and format.is_interactive_format():
terminal.clear_row()
print(_(PROGRESS_TEXT).format(100 * pos / length), end="")
sys.stdout.flush()
@staticmethod
def is_revision(string):
revision = re.search("([0-9a-f]{40})", string)
if revision == None:
return False
return revision.group(1).strip()
@staticmethod
def get_stability(author, blamed_rows, changes):
if author in changes.get_authorinfo_list():
author_insertions = changes.get_authorinfo_list()[author].insertions
return 100 if author_insertions == 0 else 100.0 * blamed_rows / author_insertions
return 100
@staticmethod
def get_time(string):
time = re.search(" \(.*?(\d\d\d\d-\d\d-\d\d)", string)
return time.group(1).strip()
def get_summed_blames(self):
summed_blames = {}
for i in self.blames.items():
if summed_blames.get(i[0][0], None) == None:
summed_blames[i[0][0]] = BlameEntry()
summed_blames[i[0][0]].rows += i[1].rows
summed_blames[i[0][0]].skew += i[1].skew
summed_blames[i[0][0]].comments += i[1].comments
return summed_blames
__blame__ = None
def get(hard, useweeks, changes):
global __blame__
if __blame__ == None:
__blame__ = Blame(hard, useweeks, changes)
return __blame__
BLAME_INFO_TEXT = N_("Below are the number of rows from each author that have survived and are still "
"intact in the current revision")
class BlameOutput(Outputable):
def __init__(self, changes, hard, useweeks):
if format.is_interactive_format():
print("")
self.changes = changes
self.hard = hard
self.useweeks = useweeks
get(self.hard, self.useweeks, self.changes)
Outputable.__init__(self)
def output_html(self):
blame_xml = "<div><div class=\"box\">"
blame_xml += "<p>" + _(BLAME_INFO_TEXT) + ".</p><div><table id=\"blame\" class=\"git\">"
blame_xml += "<thead><tr> <th>{0}</th> <th>{1}</th> <th>{2}</th> <th>{3}</th> <th>{4}</th> </tr></thead>".format(
_("Author"), _("Rows"), _("Stability"), _("Age"), _("% in comments"))
blame_xml += "<tbody>"
chart_data = ""
blames = sorted(__blame__.get_summed_blames().items())
total_blames = 0
for i in blames:
total_blames += i[1].rows
for i, entry in enumerate(blames):
work_percentage = str("{0:.2f}".format(100.0 * entry[1].rows / total_blames))
blame_xml += "<tr " + ("class=\"odd\">" if i % 2 == 1 else ">")
if format.get_selected() == "html":
author_email = self.changes.get_latest_email_by_author(entry[0])
blame_xml += "<td><img src=\"{0}\"/>{1}</td>".format(gravatar.get_url(author_email), entry[0])
else:
blame_xml += "<td>" + entry[0] + "</td>"
blame_xml += "<td>" + str(entry[1].rows) + "</td>"
blame_xml += "<td>" + ("{0:.1f}".format(Blame.get_stability(entry[0], entry[1].rows, self.changes)) + "</td>")
blame_xml += "<td>" + "{0:.1f}".format(float(entry[1].skew) / entry[1].rows) + "</td>"
blame_xml += "<td>" + "{0:.2f}".format(100.0 * entry[1].comments / entry[1].rows) + "</td>"
blame_xml += "<td style=\"display: none\">" + work_percentage + "</td>"
blame_xml += "</tr>"
chart_data += "{{label: {0}, data: {1}}}".format(json.dumps(entry[0]), work_percentage)
if blames[-1] != entry:
chart_data += ", "
blame_xml += "<tfoot><tr> <td colspan=\"5\">&nbsp;</td> </tr></tfoot></tbody></table>"
blame_xml += "<div class=\"chart\" id=\"blame_chart\"></div></div>"
blame_xml += "<script type=\"text/javascript\">"
blame_xml += " blame_plot = $.plot($(\"#blame_chart\"), [{0}], {{".format(chart_data)
blame_xml += " series: {"
blame_xml += " pie: {"
blame_xml += " innerRadius: 0.4,"
blame_xml += " show: true,"
blame_xml += " combine: {"
blame_xml += " threshold: 0.01,"
blame_xml += " label: \"" + _("Minor Authors") + "\""
blame_xml += " }"
blame_xml += " }"
blame_xml += " }, grid: {"
blame_xml += " hoverable: true"
blame_xml += " }"
blame_xml += " });"
blame_xml += "</script></div></div>"
print(blame_xml)
def output_text(self):
if sys.stdout.isatty() and format.is_interactive_format():
terminal.clear_row()
print(textwrap.fill(_(BLAME_INFO_TEXT) + ":", width=terminal.get_size()[0]) + "\n")
terminal.printb(terminal.ljust(_("Author"), 21) + terminal.rjust(_("Rows"), 10) + terminal.rjust(_("Stability"), 15) +
terminal.rjust(_("Age"), 13) + terminal.rjust(_("% in comments"), 20))
for i in sorted(__blame__.get_summed_blames().items()):
print(terminal.ljust(i[0], 20)[0:20 - terminal.get_excess_column_count(i[0])], end=" ")
print(str(i[1].rows).rjust(10), end=" ")
print("{0:.1f}".format(Blame.get_stability(i[0], i[1].rows, self.changes)).rjust(14), end=" ")
print("{0:.1f}".format(float(i[1].skew) / i[1].rows).rjust(12), end=" ")
print("{0:.2f}".format(100.0 * i[1].comments / i[1].rows).rjust(19))
def output_xml(self):
message_xml = "\t\t<message>" + _(BLAME_INFO_TEXT) + "</message>\n"
blame_xml = ""
for i in sorted(__blame__.get_summed_blames().items()):
author_email = self.changes.get_latest_email_by_author(i[0])
name_xml = "\t\t\t\t<name>" + i[0] + "</name>\n"
gravatar_xml = "\t\t\t\t<gravatar>" + gravatar.get_url(author_email) + "</gravatar>\n"
rows_xml = "\t\t\t\t<rows>" + str(i[1].rows) + "</rows>\n"
stability_xml = ("\t\t\t\t<stability>" + "{0:.1f}".format(Blame.get_stability(i[0], i[1].rows,
self.changes)) + "</stability>\n")
age_xml = ("\t\t\t\t<age>" + "{0:.1f}".format(float(i[1].skew) / i[1].rows) + "</age>\n")
percentage_in_comments_xml = ("\t\t\t\t<percentage-in-comments>" + "{0:.2f}".format(100.0 * i[1].comments / i[1].rows) +
"</percentage-in-comments>\n")
blame_xml += ("\t\t\t<author>\n" + name_xml + gravatar_xml + rows_xml + stability_xml + age_xml +
percentage_in_comments_xml + "\t\t\t</author>\n")
print("\t<blame>\n" + message_xml + "\t\t<authors>\n" + blame_xml + "\t\t</authors>\n\t</blame>")