From 632445a431c696f776005a4c6d1c5715cc240a62 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 11:44:00 +0200 Subject: [PATCH 1/6] [unpackfs] Give entries a weight When there are multiple entries, the overall weight of the module is divided between the entries: currently each entry takes an equal amount of space in the overall progress. When there are multiple entries which take wildly different amounts of time (e.g. a squash-fs and a single file) then the progress overall looks weird: the squash-fs gets half of this module's weight, and the single file does too. With the new *weight* key for entries, that division can be tweaked so that progress looks more "even". --- src/modules/unpackfs/main.py | 26 ++++++++++++++++++++++++-- src/modules/unpackfs/unpackfs.conf | 8 ++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/modules/unpackfs/main.py b/src/modules/unpackfs/main.py index d89607465..e0152dc44 100644 --- a/src/modules/unpackfs/main.py +++ b/src/modules/unpackfs/main.py @@ -48,8 +48,8 @@ class UnpackEntry: :param sourcefs: :param destination: """ - __slots__ = ['source', 'sourcefs', 'destination', 'copied', 'total', 'exclude', 'excludeFile', - 'mountPoint'] + __slots__ = ('source', 'sourcefs', 'destination', 'copied', 'total', 'exclude', 'excludeFile', + 'mountPoint', 'weight', 'accumulated_weight') def __init__(self, source, sourcefs, destination): """ @@ -71,6 +71,8 @@ class UnpackEntry: self.copied = 0 self.total = 0 self.mountPoint = None + self.weight = 1 + self.accumulated_weight = 0 # That's weight **before** this entry def is_file(self): return self.sourcefs == "file" @@ -395,6 +397,24 @@ def repair_root_permissions(root_mount_point): # But ignore it +def extract_weight(entry): + """ + Given @p entry, a dict representing a single entry in + the *unpack* list, returns its weight (1, or whatever is + set if it is sensible). + """ + w = entry.get("weight", None) + if w: + try: + wi = int(w) + return wi if wi > 0 else 1 + except ValueError: + utils.warning("*weight* setting {!r} is not valid.".format(w)) + except TypeError: + utils.warning("*weight* setting {!r} must be number.".format(w)) + return 1 + + def run(): """ Unsquash filesystem. @@ -461,6 +481,8 @@ def run(): unpack[-1].exclude = entry["exclude"] if entry.get("excludeFile", None): unpack[-1].excludeFile = entry["excludeFile"] + unpack[-1].weight = extract_weight(entry) + unpack[-1].accumulated_weight = sum([e.weight for e in unpack[:-1]]) is_first = False diff --git a/src/modules/unpackfs/unpackfs.conf b/src/modules/unpackfs/unpackfs.conf index 2c4a25a80..d12110b60 100644 --- a/src/modules/unpackfs/unpackfs.conf +++ b/src/modules/unpackfs/unpackfs.conf @@ -32,6 +32,12 @@ # - *excludeFile* is a single file that is passed to rsync as an # --exclude-file argument. This should be a full pathname # inside the **host** filesystem. +# - *weight* is useful when the entries take wildly different +# times to unpack (e.g. with a squashfs, and one single file) +# and the total weight of this module should be distributed +# differently between the entries. (This is only relevant when +# there is more than one entry; by default all the entries +# have the same weight, 1) # # EXAMPLES # @@ -85,8 +91,10 @@ unpack: - source: ../CHANGES sourcefs: file destination: "/tmp/changes.txt" + weight: 1 # Single file - source: src/qml/calamares/slideshow sourcefs: file destination: "/tmp/slideshow/" exclude: [ "*.qmlc", "qmldir" ] + weight: 5 # Lots of files # excludeFile: /etc/calamares/modules/unpackfs/exclude-list.txt From 8173b68a71cd15298aa31ca78dc4cfc9f688f391 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 11:54:39 +0200 Subject: [PATCH 2/6] [unpackfs] Debug-log the weights of the modules --- src/modules/unpackfs/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/modules/unpackfs/main.py b/src/modules/unpackfs/main.py index e0152dc44..0657f7b85 100644 --- a/src/modules/unpackfs/main.py +++ b/src/modules/unpackfs/main.py @@ -486,6 +486,11 @@ def run(): is_first = False + # Show the entry, weight and accumulated_weight before each entry, + # to allow tweaking the weights. + for e in unpack: + utils.debug(".. {!s} w={!s} aw={!s}".format(e.source, e.weight, e.accumulated_weight)) + repair_root_permissions(root_mount_point) try: unpackop = UnpackOperation(unpack) From bc591f9bc143eecadfa43dd2b8622de02ffbd5a8 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 12:15:51 +0200 Subject: [PATCH 3/6] [unpackfs] Re-vamp progress reporting - simplify calculation of progress --- src/modules/unpackfs/main.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/modules/unpackfs/main.py b/src/modules/unpackfs/main.py index 0657f7b85..f1e96886f 100644 --- a/src/modules/unpackfs/main.py +++ b/src/modules/unpackfs/main.py @@ -262,6 +262,7 @@ class UnpackOperation: def __init__(self, entries): self.entries = entries self.entry_for_source = dict((x.source, x) for x in self.entries) + self.total_weight = entries[-1].accumulated_weight + entries[-1].weight def report_progress(self): """ @@ -269,30 +270,29 @@ class UnpackOperation: """ progress = float(0) - done = 0 # Done and total apply to the entry now-unpacking - total = 0 - complete = 0 # This many are already finished + current_total = 0 + current_done = 0 # Files count in the current entry + complete_count = 0 + complete_weight = 0 # This much weight already finished for entry in self.entries: if entry.total == 0: # Total 0 hasn't counted yet continue if entry.total == entry.copied: - complete += 1 + complete_weight += entry.weight + complete_count += 1 else: # There is at most *one* entry in-progress - total = entry.total - done = entry.copied + current_total = entry.total + current_done = entry.copied + complete_weight += ( 1.0 * current_done ) / current_total break - if total > 0: - # Pretend that each entry represents an equal amount of work; - # the complete ones count as 100% of their own fraction - # (and have *not* been counted in total or done), while - # total/done represents the fraction of the current fraction. - progress = ( ( 1.0 * complete ) / len(self.entries) ) + ( ( 1.0 / len(self.entries) ) * ( 1.0 * done / total ) ) + if current_total > 0: + progress = ( 1.0 * complete_weight ) / self.total_weight global status - status = _("Unpacking image {}/{}, file {}/{}").format((complete+1),len(self.entries),done, total) + status = _("Unpacking image {}/{}, file {}/{}").format((complete_count+1), len(self.entries), current_done, current_total) job.setprogress(progress) def run(self): From 57fa51ecd9af9f67c6ec42fd73a9234de99e8485 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 12:25:22 +0200 Subject: [PATCH 4/6] [unpackfs] Simplify progress reporting more If there's thousands of files in a squashfs (e.g. 400000 like on some ArcoLinux ISOs) then progress would be reported every 4000 files, which can take quite some time to write. Reduce file_chunk_count to at most 500, so that progress is reported more often even if that wouldn't lead to a visible change in the percentage progress: instead we **do** get a change in files-transferred numbers. - The total weight is only needed by the UnpackOperation, not by each entry. - Use a chunk size of 107 so that the number-complete seems busy: the whole larger-or-smaller chunk size doesn't really matter. - The progress-report was missing the weight of the current module, so would report way too low if weight > 1. This affects ArcoLinux configurations where one entry is huge and one is a single file, so weights 50 and 1 are appropriate. --- src/modules/unpackfs/main.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/modules/unpackfs/main.py b/src/modules/unpackfs/main.py index f1e96886f..8bbe63861 100644 --- a/src/modules/unpackfs/main.py +++ b/src/modules/unpackfs/main.py @@ -49,7 +49,7 @@ class UnpackEntry: :param destination: """ __slots__ = ('source', 'sourcefs', 'destination', 'copied', 'total', 'exclude', 'excludeFile', - 'mountPoint', 'weight', 'accumulated_weight') + 'mountPoint', 'weight') def __init__(self, source, sourcefs, destination): """ @@ -72,7 +72,6 @@ class UnpackEntry: self.total = 0 self.mountPoint = None self.weight = 1 - self.accumulated_weight = 0 # That's weight **before** this entry def is_file(self): return self.sourcefs == "file" @@ -193,11 +192,12 @@ def file_copy(source, entry, progress_cb): stdout=subprocess.PIPE, close_fds=ON_POSIX ) # last_num_files_copied trails num_files_copied, and whenever at least 100 more - # files have been copied, progress is reported and last_num_files_copied is updated. + # files (file_count_chunk) have been copied, progress is reported and + # last_num_files_copied is updated. Pick a chunk size that isn't "tidy" + # so that all the digits of the progress-reported number change. + # last_num_files_copied = 0 - file_count_chunk = entry.total / 100 - if file_count_chunk < 100: - file_count_chunk = 100 + file_count_chunk = 107 for line in iter(process.stdout.readline, b''): # rsync outputs progress in parentheses. Each line will have an @@ -262,7 +262,7 @@ class UnpackOperation: def __init__(self, entries): self.entries = entries self.entry_for_source = dict((x.source, x) for x in self.entries) - self.total_weight = entries[-1].accumulated_weight + entries[-1].weight + self.total_weight = sum([e.weight for e in entries]) def report_progress(self): """ @@ -285,7 +285,7 @@ class UnpackOperation: # There is at most *one* entry in-progress current_total = entry.total current_done = entry.copied - complete_weight += ( 1.0 * current_done ) / current_total + complete_weight += entry.weight * ( 1.0 * current_done ) / current_total break if current_total > 0: @@ -482,15 +482,9 @@ def run(): if entry.get("excludeFile", None): unpack[-1].excludeFile = entry["excludeFile"] unpack[-1].weight = extract_weight(entry) - unpack[-1].accumulated_weight = sum([e.weight for e in unpack[:-1]]) is_first = False - # Show the entry, weight and accumulated_weight before each entry, - # to allow tweaking the weights. - for e in unpack: - utils.debug(".. {!s} w={!s} aw={!s}".format(e.source, e.weight, e.accumulated_weight)) - repair_root_permissions(root_mount_point) try: unpackop = UnpackOperation(unpack) From 672e27564ed8bb1de3dcad2eddc29878ed07f6b2 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 13:29:15 +0200 Subject: [PATCH 5/6] [unpackfs] Also report progress every half-second, if possible This still won't help if there's one really huge file that takes several seconds to write, but if there's a bunch of files together that is less than a file_chunk_count but take more than a half- second to write, update anyway --- src/modules/unpackfs/main.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/modules/unpackfs/main.py b/src/modules/unpackfs/main.py index 8bbe63861..a573cf6e7 100644 --- a/src/modules/unpackfs/main.py +++ b/src/modules/unpackfs/main.py @@ -163,6 +163,8 @@ def file_copy(source, entry, progress_cb): :param progress_cb: A callback function for progress reporting. Takes a number and a total-number. """ + import time + dest = entry.destination # Environment used for executing rsync properly @@ -191,12 +193,13 @@ def file_copy(source, entry, progress_cb): args, env=at_env, stdout=subprocess.PIPE, close_fds=ON_POSIX ) - # last_num_files_copied trails num_files_copied, and whenever at least 100 more + # last_num_files_copied trails num_files_copied, and whenever at least 107 more # files (file_count_chunk) have been copied, progress is reported and - # last_num_files_copied is updated. Pick a chunk size that isn't "tidy" + # last_num_files_copied is updated. The chunk size isn't "tidy" # so that all the digits of the progress-reported number change. # last_num_files_copied = 0 + last_timestamp_reported = time.time() file_count_chunk = 107 for line in iter(process.stdout.readline, b''): @@ -222,9 +225,10 @@ def file_copy(source, entry, progress_cb): # adjusting the offset so that progressbar can be continuesly drawn num_files_copied = num_files_total_local - num_files_remaining - # Update about once every 1% of this entry - if num_files_copied - last_num_files_copied >= file_count_chunk: + now = time.time() + if (num_files_copied - last_num_files_copied >= file_count_chunk) or (now - last_timestamp_reported > 0.5): last_num_files_copied = num_files_copied + last_timestamp_reported = now progress_cb(num_files_copied, num_files_total_local) process.wait() From 7125012a357d23a95a36ec7121becf235f675cc6 Mon Sep 17 00:00:00 2001 From: Adriaan de Groot Date: Tue, 6 Oct 2020 13:42:17 +0200 Subject: [PATCH 6/6] Changes: document unpackfs --- CHANGES | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index ec6ad4b61..a96335e6e 100644 --- a/CHANGES +++ b/CHANGES @@ -22,8 +22,16 @@ This release contains contributions from (alphabetically by first name): - The *machineid* module, which generates UUIDs for systemd and dbus and can generate entropy files (filled from `/dev/urandom` in the host system) now supports more than one entropy file; generate them as needed - (or copy a fixed value to all, depending on *entropy-copy*). Deprecate - *entropy* (which generates a specific output file) as too inflexible. + (or copy a fixed value to all, depending on *entropy-copy*). Deprecate + *entropy* (which generates a specific output file) as too inflexible. + - Progress reporting from the *unpackfs* module has been revamped: + it reports more often now, so that it is more obvious that files + are being transferred even when the percentage progress does not + change. + - The *unpackfs* module now supports a *weight* setting for each + of the unpack entries. For a single entry this does not matter, + but if there are multiple entries it allows tweaking the relative + progress between each entry. # 3.2.30 (2020-09-03) #