From 1844de085c6e4ec8bd3e84611e61b8d6bb359aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radovan=20Garab=C3=ADk?= Date: Tue, 29 Dec 2020 16:13:59 +0100 Subject: [PATCH] do not crash on invalid utf-8 --- grcat | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/grcat b/grcat index 4611c72..3a0d054 100755 --- a/grcat +++ b/grcat @@ -4,6 +4,34 @@ from __future__ import print_function import sys, os, string, re, signal, errno + +# redefine readline to pass invalidly encoded characters unchanged, if possible +if hasattr(sys.stdin, 'reconfigure') and hasattr(sys.stdout, 'reconfigure'): + # at least python3.7 + sys.stdin.reconfigure(errors='surrogateescape') + sys.stdout.reconfigure(errors='surrogateescape') + myreadline = sys.stdin.readline + myprint = print +else: + if hasattr(sys.stdin, 'buffer'): + # python3 + buffer_reader = sys.stdin.buffer + else: + buffer_reader = sys.stdin + def myreadline(): + for line in buffer_reader: + try: + decoded = line.decode('utf-8', errors='surrogateescape') + except (UnicodeDecodeError, LookupError): + decoded = line.decode('utf-8', errors='ignore') + return decoded + return '' + def myprint(x): + try: + print(x) + except UnicodeEncodeError: + print(x.encode('utf-8', errors='replace').decode('utf-8')) + #some default definitions colours = { 'none' : "", @@ -175,9 +203,9 @@ while not is_last: prevcolour = colours['default'] prevcount = "more" blockflag = 0 -freadline = sys.stdin.readline + while 1: - line = freadline() + line = myreadline() if line == "" : break if line[-1] in '\r\n': @@ -275,7 +303,7 @@ while 1: clineprev = cline[i] nline = nline + colours['default'] try: - print(nline) + myprint(nline) except IOError as e: if e.errno == errno.EPIPE: break