2012-05-04 11:40:30 +02:00
|
|
|
# coding: utf-8
|
|
|
|
#
|
2015-09-19 04:32:02 +02:00
|
|
|
# Copyright © 2012-2015 Ejwa Software. All rights reserved.
|
2012-05-04 11:40:30 +02:00
|
|
|
#
|
|
|
|
# This file is part of gitinspector.
|
|
|
|
#
|
|
|
|
# gitinspector is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# gitinspector is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with gitinspector. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2013-05-20 01:48:17 +02:00
|
|
|
from __future__ import unicode_literals
|
2014-01-27 03:11:15 +01:00
|
|
|
import datetime
|
2015-09-19 04:32:02 +02:00
|
|
|
import multiprocessing
|
2012-05-28 16:35:47 +02:00
|
|
|
import os
|
|
|
|
import subprocess
|
2015-09-19 04:32:02 +02:00
|
|
|
import threading
|
|
|
|
|
2015-10-20 18:40:08 +02:00
|
|
|
from . import extensions
|
|
|
|
from . import filtering
|
|
|
|
from . import interval
|
|
|
|
|
|
|
|
|
2015-09-19 04:32:02 +02:00
|
|
|
CHANGES_PER_THREAD = 200
|
|
|
|
NUM_THREADS = multiprocessing.cpu_count()
|
|
|
|
|
|
|
|
__thread_lock__ = threading.BoundedSemaphore(NUM_THREADS)
|
|
|
|
__changes_lock__ = threading.Lock()
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2015-10-12 03:15:30 +02:00
|
|
|
class FileDiff(object):
|
2012-05-04 11:40:30 +02:00
|
|
|
def __init__(self, string):
|
|
|
|
commit_line = string.split("|")
|
|
|
|
|
|
|
|
if commit_line.__len__() == 2:
|
|
|
|
self.name = commit_line[0].strip()
|
|
|
|
self.insertions = commit_line[1].count("+")
|
|
|
|
self.deletions = commit_line[1].count("-")
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def is_filediff_line(string):
|
2012-05-04 15:15:41 +02:00
|
|
|
string = string.split("|")
|
2012-05-05 23:24:14 +02:00
|
|
|
return string.__len__() == 2 and string[1].find("Bin") == -1 and ('+' in string[1] or '-' in string[1])
|
2012-05-04 11:40:30 +02:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_extension(string):
|
2013-04-16 14:58:33 +02:00
|
|
|
string = string.split("|")[0].strip().strip("{}").strip("\"").strip("'")
|
2012-05-04 15:15:41 +02:00
|
|
|
return os.path.splitext(string)[1][1:]
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2012-05-22 17:57:44 +02:00
|
|
|
@staticmethod
|
|
|
|
def get_filename(string):
|
2013-05-16 03:06:28 +02:00
|
|
|
return string.split("|")[0].strip().strip("{}").strip("\"").strip("'")
|
2012-05-22 17:57:44 +02:00
|
|
|
|
2012-05-04 11:40:30 +02:00
|
|
|
@staticmethod
|
|
|
|
def is_valid_extension(string):
|
2012-05-04 15:15:41 +02:00
|
|
|
extension = FileDiff.get_extension(string)
|
2012-05-04 11:40:30 +02:00
|
|
|
|
|
|
|
for i in extensions.get():
|
2015-10-01 03:59:44 +02:00
|
|
|
if (extension == "" and i == "*") or extension == i or i == '**':
|
2012-05-04 11:40:30 +02:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2015-10-12 03:15:30 +02:00
|
|
|
class Commit(object):
|
2012-05-04 11:40:30 +02:00
|
|
|
def __init__(self, string):
|
|
|
|
self.filediffs = []
|
|
|
|
commit_line = string.split("|")
|
|
|
|
|
2013-07-20 11:45:11 +02:00
|
|
|
if commit_line.__len__() == 4:
|
2012-05-04 11:40:30 +02:00
|
|
|
self.date = commit_line[0]
|
|
|
|
self.sha = commit_line[1]
|
2013-05-16 03:06:28 +02:00
|
|
|
self.author = commit_line[2].strip()
|
2013-07-20 11:45:11 +02:00
|
|
|
self.email = commit_line[3].strip()
|
2012-05-04 11:40:30 +02:00
|
|
|
|
|
|
|
def add_filediff(self, filediff):
|
|
|
|
self.filediffs.append(filediff)
|
|
|
|
|
|
|
|
def get_filediffs(self):
|
|
|
|
return self.filediffs
|
|
|
|
|
2013-07-25 23:36:50 +02:00
|
|
|
@staticmethod
|
|
|
|
def get_author_and_email(string):
|
|
|
|
commit_line = string.split("|")
|
|
|
|
|
|
|
|
if commit_line.__len__() == 4:
|
|
|
|
return (commit_line[2].strip(), commit_line[3].strip())
|
|
|
|
|
2012-05-04 11:40:30 +02:00
|
|
|
@staticmethod
|
|
|
|
def is_commit_line(string):
|
2013-07-20 11:45:11 +02:00
|
|
|
return string.split("|").__len__() == 4
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2015-10-12 03:15:30 +02:00
|
|
|
class AuthorInfo(object):
|
2013-07-22 05:51:56 +02:00
|
|
|
email = None
|
2012-05-04 11:40:30 +02:00
|
|
|
insertions = 0
|
|
|
|
deletions = 0
|
|
|
|
commits = 0
|
|
|
|
|
2015-09-19 04:32:02 +02:00
|
|
|
class ChangesThread(threading.Thread):
|
|
|
|
def __init__(self, hard, changes, first_hash, second_hash, offset):
|
|
|
|
__thread_lock__.acquire() # Lock controlling the number of threads running
|
|
|
|
threading.Thread.__init__(self)
|
2013-07-20 11:45:11 +02:00
|
|
|
|
2015-09-19 04:32:02 +02:00
|
|
|
self.hard = hard
|
|
|
|
self.changes = changes
|
|
|
|
self.first_hash = first_hash
|
|
|
|
self.second_hash = second_hash
|
|
|
|
self.offset = offset
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def create(hard, changes, first_hash, second_hash, offset):
|
|
|
|
thread = ChangesThread(hard, changes, first_hash, second_hash, offset)
|
|
|
|
thread.daemon = True
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
def run(self):
|
2015-10-05 06:17:10 +02:00
|
|
|
git_log_r = subprocess.Popen(filter(None, ["git", "log", "--reverse", "--pretty=%cd|%H|%aN|%aE",
|
|
|
|
"--stat=100000,8192", "--no-merges", "-w", interval.get_since(),
|
|
|
|
interval.get_until(), "--date=short"] + (["-C", "-C", "-M"] if self.hard else []) +
|
2015-09-19 04:32:02 +02:00
|
|
|
[self.first_hash + self.second_hash]), bufsize=1, stdout=subprocess.PIPE).stdout
|
|
|
|
lines = git_log_r.readlines()
|
|
|
|
git_log_r.close()
|
|
|
|
|
2012-05-04 11:40:30 +02:00
|
|
|
commit = None
|
|
|
|
found_valid_extension = False
|
2015-09-24 03:38:35 +02:00
|
|
|
is_filtered = False
|
2015-09-19 04:32:02 +02:00
|
|
|
commits = []
|
|
|
|
|
|
|
|
__changes_lock__.acquire() # Global lock used to protect calls from here...
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2012-05-04 12:37:55 +02:00
|
|
|
for i in lines:
|
2013-06-17 07:47:47 +02:00
|
|
|
j = i.strip().decode("unicode_escape", "ignore")
|
2013-06-03 02:15:55 +02:00
|
|
|
j = j.encode("latin-1", "replace")
|
|
|
|
j = j.decode("utf-8", "replace")
|
2013-05-16 03:06:28 +02:00
|
|
|
|
2013-07-25 23:36:50 +02:00
|
|
|
if Commit.is_commit_line(j):
|
|
|
|
(author, email) = Commit.get_author_and_email(j)
|
2015-09-19 04:32:02 +02:00
|
|
|
self.changes.emails_by_author[author] = email
|
|
|
|
self.changes.authors_by_email[email] = author
|
2013-07-25 23:36:50 +02:00
|
|
|
|
2013-06-03 02:15:55 +02:00
|
|
|
if Commit.is_commit_line(j) or i is lines[-1]:
|
2012-05-04 11:40:30 +02:00
|
|
|
if found_valid_extension:
|
2015-09-19 04:32:02 +02:00
|
|
|
commits.append(commit)
|
2012-05-04 11:40:30 +02:00
|
|
|
|
|
|
|
found_valid_extension = False
|
2015-09-24 03:38:35 +02:00
|
|
|
is_filtered = False
|
2013-06-03 02:15:55 +02:00
|
|
|
commit = Commit(j)
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2015-09-24 03:38:35 +02:00
|
|
|
if Commit.is_commit_line(j) and \
|
|
|
|
(filtering.set_filtered(commit.author, "author") or \
|
|
|
|
filtering.set_filtered(commit.email, "email") or \
|
|
|
|
filtering.set_filtered(commit.sha, "revision") or \
|
|
|
|
filtering.set_filtered(commit.sha, "message")):
|
|
|
|
is_filtered = True
|
|
|
|
|
|
|
|
if FileDiff.is_filediff_line(j) and not \
|
|
|
|
filtering.set_filtered(FileDiff.get_filename(j)) and not is_filtered:
|
2013-06-03 02:15:55 +02:00
|
|
|
extensions.add_located(FileDiff.get_extension(j))
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2013-06-03 02:15:55 +02:00
|
|
|
if FileDiff.is_valid_extension(j):
|
2012-05-04 11:40:30 +02:00
|
|
|
found_valid_extension = True
|
2013-06-03 02:15:55 +02:00
|
|
|
filediff = FileDiff(j)
|
2012-05-04 11:40:30 +02:00
|
|
|
commit.add_filediff(filediff)
|
|
|
|
|
2015-09-25 23:52:53 +02:00
|
|
|
self.changes.commits[self.offset // CHANGES_PER_THREAD] = commits
|
2015-09-19 04:32:02 +02:00
|
|
|
__changes_lock__.release() # ...to here.
|
|
|
|
__thread_lock__.release() # Lock controlling the number of threads running
|
|
|
|
|
2015-10-12 03:15:30 +02:00
|
|
|
class Changes(object):
|
2015-09-19 04:32:02 +02:00
|
|
|
authors = {}
|
|
|
|
authors_dateinfo = {}
|
|
|
|
authors_by_email = {}
|
|
|
|
emails_by_author = {}
|
|
|
|
|
|
|
|
def __init__(self, hard):
|
2015-09-29 22:09:05 +02:00
|
|
|
self.commits = []
|
2015-09-19 04:32:02 +02:00
|
|
|
git_log_hashes_r = subprocess.Popen(filter(None, ["git", "rev-list", "--reverse", "--no-merges",
|
|
|
|
interval.get_since(), interval.get_until(), "HEAD"]), bufsize=1,
|
|
|
|
stdout=subprocess.PIPE).stdout
|
|
|
|
lines = git_log_hashes_r.readlines()
|
|
|
|
git_log_hashes_r.close()
|
|
|
|
|
2015-09-29 22:09:05 +02:00
|
|
|
if len(lines) > 0:
|
|
|
|
self.commits = [None] * (len(lines) // CHANGES_PER_THREAD + 1)
|
|
|
|
first_hash = ""
|
|
|
|
|
|
|
|
for i, entry in enumerate(lines):
|
|
|
|
if i % CHANGES_PER_THREAD == CHANGES_PER_THREAD - 1:
|
|
|
|
entry = entry.decode("utf-8", "replace").strip()
|
|
|
|
second_hash = entry
|
|
|
|
ChangesThread.create(hard, self, first_hash, second_hash, i)
|
|
|
|
first_hash = entry + ".."
|
|
|
|
else:
|
2015-09-25 23:52:53 +02:00
|
|
|
entry = entry.decode("utf-8", "replace").strip()
|
|
|
|
second_hash = entry
|
2015-09-19 04:32:02 +02:00
|
|
|
ChangesThread.create(hard, self, first_hash, second_hash, i)
|
|
|
|
|
|
|
|
# Make sure all threads have completed.
|
|
|
|
for i in range(0, NUM_THREADS):
|
|
|
|
__thread_lock__.acquire()
|
|
|
|
|
2015-09-30 15:10:46 +02:00
|
|
|
self.commits = [item for sublist in self.commits for item in sublist]
|
2015-09-19 04:32:02 +02:00
|
|
|
|
2015-09-30 15:10:46 +02:00
|
|
|
if len(self.commits) > 0:
|
2015-09-29 22:09:05 +02:00
|
|
|
if interval.has_interval() and len(self.commits) > 0:
|
|
|
|
interval.set_ref(self.commits[-1].sha)
|
2013-05-01 23:09:10 +02:00
|
|
|
|
2014-02-19 06:36:06 +01:00
|
|
|
self.first_commit_date = datetime.date(int(self.commits[0].date[0:4]), int(self.commits[0].date[5:7]),
|
2015-09-29 22:09:05 +02:00
|
|
|
int(self.commits[0].date[8:10]))
|
2014-02-19 06:36:06 +01:00
|
|
|
self.last_commit_date = datetime.date(int(self.commits[-1].date[0:4]), int(self.commits[-1].date[5:7]),
|
2015-09-29 22:09:05 +02:00
|
|
|
int(self.commits[-1].date[8:10]))
|
2014-01-27 03:11:15 +01:00
|
|
|
|
2012-05-04 11:40:30 +02:00
|
|
|
def get_commits(self):
|
|
|
|
return self.commits
|
|
|
|
|
2015-10-05 06:17:10 +02:00
|
|
|
@staticmethod
|
|
|
|
def modify_authorinfo(authors, key, commit):
|
2012-05-04 11:40:30 +02:00
|
|
|
if authors.get(key, None) == None:
|
|
|
|
authors[key] = AuthorInfo()
|
|
|
|
|
2012-05-24 00:56:19 +02:00
|
|
|
if commit.get_filediffs():
|
|
|
|
authors[key].commits += 1
|
|
|
|
|
2012-05-04 11:40:30 +02:00
|
|
|
for j in commit.get_filediffs():
|
|
|
|
authors[key].insertions += j.insertions
|
|
|
|
authors[key].deletions += j.deletions
|
|
|
|
|
|
|
|
def get_authorinfo_list(self):
|
2013-07-20 11:45:11 +02:00
|
|
|
if not self.authors:
|
|
|
|
for i in self.commits:
|
2015-10-05 06:17:10 +02:00
|
|
|
Changes.modify_authorinfo(self.authors, i.author, i)
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2013-07-20 11:45:11 +02:00
|
|
|
return self.authors
|
2012-05-04 11:40:30 +02:00
|
|
|
|
|
|
|
def get_authordateinfo_list(self):
|
2013-07-20 11:45:11 +02:00
|
|
|
if not self.authors_dateinfo:
|
|
|
|
for i in self.commits:
|
2015-10-05 06:17:10 +02:00
|
|
|
Changes.modify_authorinfo(self.authors_dateinfo, (i.date, i.author), i)
|
2013-07-20 11:45:11 +02:00
|
|
|
|
|
|
|
return self.authors_dateinfo
|
|
|
|
|
2013-07-30 06:32:58 +02:00
|
|
|
def get_latest_author_by_email(self, name):
|
2015-09-19 04:32:02 +02:00
|
|
|
if not hasattr(name, "decode"):
|
2014-12-18 04:30:04 +01:00
|
|
|
name = str.encode(name)
|
|
|
|
|
2014-11-17 22:56:21 +01:00
|
|
|
name = name.decode("unicode_escape", "ignore")
|
2013-07-30 06:32:58 +02:00
|
|
|
return self.authors_by_email[name]
|
|
|
|
|
|
|
|
def get_latest_email_by_author(self, name):
|
|
|
|
return self.emails_by_author[name]
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2012-05-04 15:15:41 +02:00
|
|
|
__changes__ = None
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2012-05-20 22:02:08 +02:00
|
|
|
def get(hard):
|
2012-05-04 15:15:41 +02:00
|
|
|
global __changes__
|
|
|
|
if __changes__ == None:
|
2012-05-20 22:02:08 +02:00
|
|
|
__changes__ = Changes(hard)
|
2012-05-04 11:40:30 +02:00
|
|
|
|
2012-05-04 15:15:41 +02:00
|
|
|
return __changes__
|
2012-05-04 11:40:30 +02:00
|
|
|
|