do not crash on invalid utf-8

This commit is contained in:
Radovan Garabík 2020-12-29 16:13:59 +01:00
parent 4e796fbb8d
commit 1844de085c
1 changed files with 31 additions and 3 deletions

34
grcat
View File

@ -4,6 +4,34 @@ from __future__ import print_function
import sys, os, string, re, signal, errno
# redefine readline to pass invalidly encoded characters unchanged, if possible
if hasattr(sys.stdin, 'reconfigure') and hasattr(sys.stdout, 'reconfigure'):
# at least python3.7
sys.stdin.reconfigure(errors='surrogateescape')
sys.stdout.reconfigure(errors='surrogateescape')
myreadline = sys.stdin.readline
myprint = print
else:
if hasattr(sys.stdin, 'buffer'):
# python3
buffer_reader = sys.stdin.buffer
else:
buffer_reader = sys.stdin
def myreadline():
for line in buffer_reader:
try:
decoded = line.decode('utf-8', errors='surrogateescape')
except (UnicodeDecodeError, LookupError):
decoded = line.decode('utf-8', errors='ignore')
return decoded
return ''
def myprint(x):
try:
print(x)
except UnicodeEncodeError:
print(x.encode('utf-8', errors='replace').decode('utf-8'))
#some default definitions
colours = {
'none' : "",
@ -175,9 +203,9 @@ while not is_last:
prevcolour = colours['default']
prevcount = "more"
blockflag = 0
freadline = sys.stdin.readline
while 1:
line = freadline()
line = myreadline()
if line == "" :
break
if line[-1] in '\r\n':
@ -275,7 +303,7 @@ while 1:
clineprev = cline[i]
nline = nline + colours['default']
try:
print(nline)
myprint(nline)
except IOError as e:
if e.errno == errno.EPIPE:
break