3
0
mirror of https://github.com/ergochat/ergo.git synced 2024-11-15 00:19:29 +01:00

Merge pull request #1863 from slingamn/importer_utf8

anope2json, atheme2json: handle non-UTF8 data
This commit is contained in:
Shivaram Lingamneni 2021-12-10 01:20:35 -05:00 committed by GitHub
commit 7bc5bfaa5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 6 deletions

18
distrib/anope/anope2json.py Normal file → Executable file
View File

@ -46,8 +46,17 @@ def to_unixnano(timestamp):
def file_to_objects(infile):
result = []
obj = None
for line in infile:
pieces = line.rstrip('\r\n').split(' ', maxsplit=2)
while True:
line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
pieces = line.split(' ', maxsplit=2)
if len(pieces) == 0:
logging.warning("skipping blank line in db")
continue
@ -58,6 +67,9 @@ def file_to_objects(infile):
obj = AnopeObject(pieces[1], {})
elif pieces[0] == 'DATA':
obj.kv[pieces[1]] = pieces[2]
elif pieces[0] == 'ID':
# not sure what these do?
continue
else:
raise ValueError("unknown command found in anope db", pieces[0])
return result
@ -167,7 +179,7 @@ def convert(infile):
def main():
if len(sys.argv) != 3:
raise Exception("Usage: anope2json.py anope.db output.json")
with open(sys.argv[1]) as infile:
with open(sys.argv[1], 'rb') as infile:
output = convert(infile)
with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile)

View File

@ -31,8 +31,16 @@ def convert(infile):
channel_to_founder = defaultdict(lambda: (None, None))
for line in infile:
line = line.rstrip('\r\n')
while True:
line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
parts = line.split(' ')
category = parts[0]
@ -177,7 +185,7 @@ def convert(infile):
def main():
if len(sys.argv) != 3:
raise Exception("Usage: atheme2json.py atheme_db output.json")
with open(sys.argv[1]) as infile:
with open(sys.argv[1], 'rb') as infile:
output = convert(infile)
with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile)