3
0
mirror of https://github.com/ergochat/ergo.git synced 2024-11-22 03:49:27 +01:00

anope2json, atheme2json: handle non-UTF8 data

Also ignore an unrecognized field type in anope
This commit is contained in:
Shivaram Lingamneni 2021-12-09 22:11:24 -05:00
parent 3e32e3f19e
commit 4f7356f19a
2 changed files with 26 additions and 6 deletions

18
distrib/anope/anope2json.py Normal file → Executable file
View File

@ -46,8 +46,17 @@ def to_unixnano(timestamp):
def file_to_objects(infile): def file_to_objects(infile):
result = [] result = []
obj = None obj = None
for line in infile: while True:
pieces = line.rstrip('\r\n').split(' ', maxsplit=2) line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
pieces = line.split(' ', maxsplit=2)
if len(pieces) == 0: if len(pieces) == 0:
logging.warning("skipping blank line in db") logging.warning("skipping blank line in db")
continue continue
@ -58,6 +67,9 @@ def file_to_objects(infile):
obj = AnopeObject(pieces[1], {}) obj = AnopeObject(pieces[1], {})
elif pieces[0] == 'DATA': elif pieces[0] == 'DATA':
obj.kv[pieces[1]] = pieces[2] obj.kv[pieces[1]] = pieces[2]
elif pieces[0] == 'ID':
# not sure what these do?
continue
else: else:
raise ValueError("unknown command found in anope db", pieces[0]) raise ValueError("unknown command found in anope db", pieces[0])
return result return result
@ -167,7 +179,7 @@ def convert(infile):
def main(): def main():
if len(sys.argv) != 3: if len(sys.argv) != 3:
raise Exception("Usage: anope2json.py anope.db output.json") raise Exception("Usage: anope2json.py anope.db output.json")
with open(sys.argv[1]) as infile: with open(sys.argv[1], 'rb') as infile:
output = convert(infile) output = convert(infile)
with open(sys.argv[2], 'w') as outfile: with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile) json.dump(output, outfile)

View File

@ -31,8 +31,16 @@ def convert(infile):
channel_to_founder = defaultdict(lambda: (None, None)) channel_to_founder = defaultdict(lambda: (None, None))
for line in infile: while True:
line = line.rstrip('\r\n') line = infile.readline()
if not line:
break
line = line.rstrip(b'\r\n')
try:
line = line.decode('utf-8')
except UnicodeDecodeError:
line = line.decode('utf-8', 'replace')
logging.warning("line contained invalid utf8 data " + line)
parts = line.split(' ') parts = line.split(' ')
category = parts[0] category = parts[0]
@ -177,7 +185,7 @@ def convert(infile):
def main(): def main():
if len(sys.argv) != 3: if len(sys.argv) != 3:
raise Exception("Usage: atheme2json.py atheme_db output.json") raise Exception("Usage: atheme2json.py atheme_db output.json")
with open(sys.argv[1]) as infile: with open(sys.argv[1], 'rb') as infile:
output = convert(infile) output = convert(infile)
with open(sys.argv[2], 'w') as outfile: with open(sys.argv[2], 'w') as outfile:
json.dump(output, outfile) json.dump(output, outfile)