Ensure files written with AtomicFile are read in UTF8

With some locale configurations (not that uncommon on CentOS), open() may
default to non-UTF8 encodings (eg. ANSI_X3.4-1968).

This is usually not an issue, because we use open() both for writing and
reading. However, AtomicFile implicitly enforces UTF8; which needs to be
mirrored when reading.
This commit is contained in:
Valentin Lorentz 2022-09-27 23:16:21 +02:00
parent b1cfb87e71
commit a6aa5530dd
5 changed files with 13 additions and 11 deletions

View File

@ -209,7 +209,7 @@ class SqliteKarmaDB(object):
def load(self, channel, filename):
filename = conf.supybot.directories.data.dirize(filename)
fd = open(filename)
fd = open(filename, encoding='utf8')
reader = csv.reader(fd)
db = self._getDb(channel)
cursor = db.cursor()

View File

@ -150,7 +150,7 @@ class FlatfileMapping(MappingInterface):
def __init__(self, filename, maxSize=10**6):
self.filename = filename
try:
fd = open(self.filename)
fd = open(self.filename, encoding='utf8')
strId = fd.readline().rstrip()
self.maxSize = len(strId)
try:
@ -175,7 +175,7 @@ class FlatfileMapping(MappingInterface):
def _incrementCurrentId(self, fd=None):
fdWasNone = fd is None
if fdWasNone:
fd = open(self.filename, 'a')
fd = open(self.filename, 'a', encoding='utf8')
fd.seek(0)
self.currentId += 1
fd.write(self._canonicalId(self.currentId))
@ -193,7 +193,7 @@ class FlatfileMapping(MappingInterface):
def add(self, s):
line = self._joinLine(self.currentId, s)
fd = open(self.filename, 'r+')
fd = open(self.filename, 'r+', encoding='utf8')
try:
fd.seek(0, 2) # End.
fd.write(line)
@ -205,7 +205,7 @@ class FlatfileMapping(MappingInterface):
def get(self, id):
strId = self._canonicalId(id)
try:
fd = open(self.filename)
fd = open(self.filename, encoding='utf8')
fd.readline() # First line, nextId.
for line in fd:
(lineId, s) = self._splitLine(line)
@ -221,7 +221,7 @@ class FlatfileMapping(MappingInterface):
def set(self, id, s):
strLine = self._joinLine(id, s)
try:
fd = open(self.filename, 'r+')
fd = open(self.filename, 'r+', encoding='utf8')
self.remove(id, fd)
fd.seek(0, 2) # End.
fd.write(strLine)
@ -233,7 +233,7 @@ class FlatfileMapping(MappingInterface):
strId = self._canonicalId(id)
try:
if fdWasNone:
fd = open(self.filename, 'r+')
fd = open(self.filename, 'r+', encoding='utf8')
fd.seek(0)
fd.readline() # First line, nextId
pos = fd.tell()
@ -262,7 +262,7 @@ class FlatfileMapping(MappingInterface):
fd.close()
def vacuum(self):
infd = open(self.filename)
infd = open(self.filename, encoding='utf8')
outfd = utils.file.AtomicFile(self.filename,makeBackupIfSmaller=False)
outfd.write(infd.readline()) # First line, nextId.
for line in infd:

View File

@ -84,7 +84,7 @@ def open_registry(filename, clear=False):
global _lastModified
if clear:
_cache.clear()
_fd = open(filename)
_fd = open(filename, encoding='utf8')
fd = utils.file.nonCommentNonEmptyLines(_fd)
acc = ''
slashEnd = re.compile(r'\\*$')

View File

@ -70,7 +70,7 @@ class Reader(object):
return s.lower()
def readFile(self, filename):
self.read(open(filename))
self.read(open(filename, encoding='utf8'))
def read(self, fd):
lineno = 0

View File

@ -130,7 +130,9 @@ def chunks(fd, size):
class AtomicFile(object):
"""Used for files that need to be atomically written -- i.e., if there's a
failure, the original file remains, unmodified. mode must be 'w' or 'wb'"""
failure, the original file remains, unmodified. mode must be 'w' or 'wb'.
If ``encoding`` is None (or not provided), files are open in `utf8` regardless
of the system locale."""
class default(object): # Holder for values.
# Callables?
tmpDir = None