From a08b2b7550345a5316e8c1ef48734edc2b924fb1 Mon Sep 17 00:00:00 2001 From: remittor Date: Mon, 15 Nov 2021 14:43:28 +0300 Subject: [PATCH] Update PO and LMO tools Replace prefix "msgkey" to "msgid". Replace field "val_id" to "plural" (support new version LMO translator). --- lmo2po.py | 35 ++++++++++++++++++----------------- po2lmo.py | 47 +++++++++++++++++++++-------------------------- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/lmo2po.py b/lmo2po.py index 1ea285c..1a2cdba 100644 --- a/lmo2po.py +++ b/lmo2po.py @@ -10,9 +10,9 @@ def die(msg): class LmoEntry: - def __init__(self, key_id = 0, val_id = 0, offset = 0, length = 0, val = None): + def __init__(self, key_id = 0, plural = 0, offset = 0, length = 0, val = None): self.key_id = key_id - self.val_id = val_id + self.plural = plural self.offset = offset self.length = length self.val = val @@ -26,10 +26,10 @@ class Lmo: def __init__(self): self.options = "" self.entries = [] - self.use_plural_num = None # value of entry.val_id def load_from_bin(self, filename): self.entries = [] + use_plural_num = False with open(filename, "rb") as file: data = file.read() table_offset = int.from_bytes(data[-4:], byteorder='big') @@ -40,18 +40,21 @@ class Lmo: break entry = LmoEntry() entry.key_id = int.from_bytes(data[off :off+4] , byteorder='big') - entry.val_id = int.from_bytes(data[off+4 :off+8] , byteorder='big') + entry.plural = int.from_bytes(data[off+4 :off+8] , byteorder='big') - 1 entry.offset = int.from_bytes(data[off+8 :off+12], byteorder='big') entry.length = int.from_bytes(data[off+12:off+16], byteorder='big') entry.val = data[entry.offset:entry.offset+entry.length] - #print("%08X %d %08X %d" % (entry.key_id, entry.val_id, entry.offset, entry.length)) + #print("%08X %d %08X %d" % (entry.key_id, entry.plural, entry.offset, entry.length)) + if off == table_offset: + if entry.key_id == 0 and entry.plural == -1: + use_plural_num = True + if use_plural_num: + if entry.plural >= 10 or (off != table_offset and entry.plural < 0): + die("Too many plural forms") + else: + entry.plural = 0 # older version LMO-files contain hash of value self.entries.append(entry) off += 16 - if self.use_plural_num is None: - self.use_plural_num = True - ent = next((ent for ent in self.entries if ent.val_id > 10), None) - if ent: - self.use_plural_num = False self.entries = sorted(self.entries, key=lambda x: x.offset) #self.dup_search() @@ -76,7 +79,7 @@ class Lmo: val = ent.val.decode('utf-8') val = val.replace('\\', '\\\\') val = val.replace('"', r'\"') - if ent.key_id == 0 and ent.val_id == 0 and ent.offset == 0: + if ent.key_id == 0 and ent.plural == -1: val = val.replace('\n', r'\n') txt += 'msgid ""' + '\n' txt += 'msgstr ""' + '\n' @@ -90,11 +93,11 @@ class Lmo: txt += '\n' continue prefix = '' - if self.use_plural_num and ent.val_id != 1: - prefix = '[%d]' % (ent.val_id - 1) + #if ent.plural != 0: + # prefix = '[%d]' % ent.plural if ent.dup: txt += '# DUP' + '\n' - txt += 'msgkey 0x{}'.format("%08X" % ent.key_id) + '\n' + txt += 'msgid 0x{}'.format("%08X" % ent.key_id) + '\n' line_limit = 77 val = val.replace('\r', '') if val.find('\n') >= 0: @@ -143,7 +146,7 @@ if __name__ == "__main__": if not ('m' in lmo.options): lmo.save_to_text(fn_out) print('\nPO-file saved to "{}"'.format(fn_out)) - sys.exit(1) + sys.exit(0) # Merge 2 lmo-files fn_inp2 = sys.argv[4] @@ -158,8 +161,6 @@ if __name__ == "__main__": if not dup: lmo.entries.append(ent) lmo.options = 'k' - if not lmo2.use_plural_num: - lmo.use_plural_num = False lmo.save_to_text(fn_out) print('\nMerged PO-file saved to "{}"'.format(fn_out)) diff --git a/po2lmo.py b/po2lmo.py index 8988915..ecdbd72 100644 --- a/po2lmo.py +++ b/po2lmo.py @@ -36,9 +36,9 @@ def sfh_hash(data): if isinstance(data, str): data = data.encode("utf-8") size = len(data) - hash = u32_t(size) if size <= 0: return 0 + hash = u32_t(size) rem = size & 3 length = size // 4 for i in range(length): @@ -80,20 +80,20 @@ class Msg: def __init__(self): self.init() - def init(self): - self.plural_num = -1 + def init(self, plural_num = 0): + self.plural_num = plural_num self.ctxt = None self.id = None self.id_plural = None - self.val = [ None ] # list of string + self.val = [ None ] * 10 # list of string self.cur = MSG_UNSPEC self.key = None class LmoEntry: - def __init__(self, key_id = 0, val_id = 0, offset = 0, length = 0, val = None): + def __init__(self, key_id = 0, plural = 0, offset = 0, length = 0, val = None): self.key_id = key_id - self.val_id = val_id + self.plural = plural self.offset = offset self.length = length self.val = val @@ -109,10 +109,10 @@ class Lmo: self.entries = [] self.msg = Msg() - def add_entry(self, key_id, val_id, val): + def add_entry(self, key_id, plural, val): entry = LmoEntry() entry.key_id = key_id - entry.val_id = val_id + entry.plural = plural entry.offset = len(self.entries) entry.length = len(val) entry.val = val @@ -129,14 +129,14 @@ class Lmo: msg = self.msg if not msg.id and not msg.val[0]: return + if not msg.val[0]: + self.msg.init() + return if msg.key is not None: - val = msg.val[msg.plural_num] - self.add_entry(msg.key, msg.plural_num + 1, val) - elif msg.id and msg.val[0]: - for i in range(msg.plural_num + 1): - if i >= len(msg.val): - continue - val = msg.val[i] + val = msg.val[0] + self.add_entry(msg.key, 0, val) + elif msg.id and msg.plural_num >= 0: + for i, val in enumerate(msg.val): if val is None: continue if (msg.ctxt and msg.id_plural): @@ -150,8 +150,8 @@ class Lmo: key_id = sfh_hash(key) val_id = sfh_hash(val) if key_id != val_id: - self.add_entry(key_id, msg.plural_num + 1, val) - elif msg.val[0]: + self.add_entry(key_id, msg.plural_num, val) + else: val = msg.val[0] prefix = b'\\nPlural-Forms: ' x = val.find(prefix) @@ -159,7 +159,7 @@ class Lmo: x += len(prefix) x2 = val.find(b'\\n', x) if x2 > 0: - self.add_entry(0, 0, val[x:x2]) + self.add_entry(0, -1, val[x:x2]) # reinit object msg self.msg.init() @@ -207,10 +207,6 @@ class Lmo: msg.plural_num = int(line[x1+1:x2]) if msg.plural_num >= 10: die("Too many plural forms") - if len(msg.val) <= msg.plural_num: - x = msg.plural_num - len(msg.val) + 1 - for i in range(x): - msg.val.append(None) msg.val[msg.plural_num] = b'' msg.cur = MSG_STR # read text data @@ -228,7 +224,7 @@ class Lmo: def load_from_text(self, filename): self.entries = [] - self.msg.init() + self.msg.init(-1) with open(filename, "r", encoding='UTF-8') as file: for line in file: self.process_line(line.rstrip()) @@ -248,8 +244,7 @@ class Lmo: val = val.encode('utf-8') length = len(val) buf[offset:offset+length] = val - val_id = ent.val_id - ek = LmoEntry(ent.key_id, val_id, offset, length, val) + ek = LmoEntry(ent.key_id, ent.plural, offset, length, val) ek.dup = ent.dup elst.append(ek) offset += length @@ -261,7 +256,7 @@ class Lmo: table_offset = offset for i, ent in enumerate(elst): buf[offset :offset+4] = ent.key_id.to_bytes(4, byteorder='big') - buf[offset+4 :offset+8] = ent.val_id.to_bytes(4, byteorder='big') + buf[offset+4 :offset+8] = (ent.plural + 1).to_bytes(4, byteorder='big') buf[offset+8 :offset+12] = ent.offset.to_bytes(4, byteorder='big') buf[offset+12:offset+16] = ent.length.to_bytes(4, byteorder='big') if self.verbose and ent.dup: