#! /usr/bin/env python3 # # Reduce a translation file -- generally, a Timezone translation -- by # dropping untranslated strings. An untranslated string is one that # has an empty translation **and** is marked unfinished. # # This is mostly useful to cut down the size of the source file: # far and away most of the zones are not translated, and it's just a # handful of places that get special treatment. from xml.dom.minidom import parse import sys valid = True dom = parse(sys.argv[1]) for n in dom.getElementsByTagName("translation"): attrs = n.attributes.keys() drop = True if "type" not in attrs: drop = False elif "type" in attrs and n.attributes["type"].value != "unfinished": # In the samples I've seen, only "unfinished" is a valid type; # once something has been translated, the attribute vanishes (see # the if branch, above). print("WARNING ''{!s}'' unknown type".format(n.attributes["type"].value)) drop = False valid = False else: t = n.firstChild if t is None: # Unfinished and empty drop = True else: drop = bool(t.data) if drop: message = n.parentNode message.parentNode.removeChild(message) message.unlink() if valid: for line in dom.toxml().split("\n"): if line.strip(): print(line)