Add support for nested progressbar to tracincp even when tqdm is not available. (#1046)

cyrjano · facebook-github-bot · commit 2689f21a2d21 · 2022-11-02T13:13:52.000-07:00
Summary: This Pull request add support for nested progressbar in SimpleProgress bar (i.e. when tqdm is not installed) by leveraging Python contexts (with statements) that is also implemented in tqdm. To keep `SimpleProgress` simple this is the behaviour: - Add a new line per each update of the parent progress bar. - Guarantee that each update for parent progress bar is refreshed. - Do not write refresh at the ending of parent progress bar (to avoid duplicate lines). This support is used now in TracInCp and TracInCpFast methods. ![Screen Shot 2022-10-11 at 12 43 23 PM](https://user-images.githubusercontent.com/3238673/195717879-1ffc3e4a-a8d4-4f4f-a661-4c11fd93252c.png) Pull Request resolved: #1046 Reviewed By: aobo-y Differential Revision: D40397776 Pulled By: cyrjano fbshipit-source-id: 26316255296a50fc4c80a22658be386769877c5e
diff --git a/captum/_utils/progress.py b/captum/_utils/progress.py
@@ -5,6 +5,8 @@
 from time import time
 from typing import cast, Iterable, Sized, TextIO
 
+from captum._utils.typing import Literal
+
 try:
     from tqdm.auto import tqdm
 except ImportError:
@@ -40,6 +42,38 @@ def flush(self, *args, **kwargs):
         return self._wrapped_run(self._wrapped.flush, *args, **kwargs)
 
 
+class NullProgress:
+    """Passthrough class that implements the progress API.
+
+    This class implements the tqdm and SimpleProgressBar api but
+    does nothing. This class can be used as a stand-in for an
+    optional progressbar, most commonly in the case of nested
+    progress bars.
+    """
+
+    def __init__(self, iterable: Iterable = None, *args, **kwargs):
+        del args, kwargs
+        self.iterable = iterable
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback) -> Literal[False]:
+        return False
+
+    def __iter__(self):
+        if not self.iterable:
+            return
+        for it in self.iterable:
+            yield it
+
+    def update(self, amount: int = 1):
+        pass
+
+    def close(self):
+        pass
+
+
 class SimpleProgress:
     def __init__(
         self,
@@ -51,10 +85,13 @@ def __init__(
     ) -> None:
         """
         Simple progress output used when tqdm is unavailable.
-        Same as tqdm, output to stderr channel
+        Same as tqdm, output to stderr channel.
+        If you want to do nested Progressbars with simple progress
+        the parent progress bar should be used as a context
+        (i.e. with statement) and the nested progress bar should be
+        created inside this context.
         """
         self.cur = 0
-
         self.iterable = iterable
         self.total = total
         if total is None and hasattr(iterable, "__len__"):
@@ -69,6 +106,16 @@ def __init__(
         self.mininterval = mininterval
         self.last_print_t = 0.0
         self.closed = False
+        self._is_parent = False
+
+    def __enter__(self):
+        self._is_parent = True
+        self._refresh()
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback) -> Literal[False]:
+        self.close()
+        return False
 
     def __iter__(self):
         if self.closed or not self.iterable:
@@ -87,8 +134,8 @@ def _refresh(self):
         else:
             # e.g., progress: .....
             progress_str += "." * self.cur
-
-        print("\r" + progress_str, end="", file=self.file)
+        end = "\n" if self._is_parent else ""
+        print("\r" + progress_str, end=end, file=self.file)
 
     def update(self, amount: int = 1):
         if self.closed:
@@ -101,7 +148,7 @@ def update(self, amount: int = 1):
             self.last_print_t = cur_t
 
     def close(self):
-        if not self.closed:
+        if not self.closed and not self._is_parent:
             self._refresh()
             print(file=self.file)  # end with new line
             self.closed = True
diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py
@@ -23,7 +23,7 @@
     _compute_jacobian_wrt_params,
     _compute_jacobian_wrt_params_with_sample_wise_trick,
 )
-from captum._utils.progress import progress
+from captum._utils.progress import NullProgress, progress
 from captum.influence._core.influence import DataInfluence
 from captum.influence._utils.common import (
     _format_inputs_dataset,
@@ -1006,13 +1006,6 @@ def _self_influence_by_checkpoints(
         # If `show_progress` is true, create an outer progress bar that keeps track of
         # how many checkpoints have been processed
         if show_progress:
-            checkpoints_progress = progress(
-                desc=(
-                    f"Using {self.get_name()} to compute self "
-                    "influence. Processing checkpoint"
-                ),
-                total=len(self.checkpoints),
-            )
             # Try to determine length of inner progress bar if possible, with a default
             # of `None`.
             inputs_dataset_len = None
@@ -1090,17 +1083,29 @@ def get_checkpoint_contribution(checkpoint):
             # We concatenate the contributions from each batch into a single 1D tensor,
             # which represents the contributions for all batches in `inputs_dataset`
 
-            if show_progress:
-                checkpoints_progress.update()
-
             return torch.cat(checkpoint_contribution, dim=0)
 
-        batches_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0])
-
-        # The self influence score for all examples is the sum of contributions from
-        # each checkpoint
-        for checkpoint in self.checkpoints[1:]:
-            batches_self_tracin_scores += get_checkpoint_contribution(checkpoint)
+        if show_progress:
+            checkpoints_progress = progress(
+                desc=(
+                    f"Using {self.get_name()} to compute self "
+                    "influence. Processing checkpoint"
+                ),
+                total=len(self.checkpoints),
+                mininterval=0.0,
+            )
+        else:
+            checkpoints_progress = NullProgress()
+        with checkpoints_progress:
+            batches_self_tracin_scores = get_checkpoint_contribution(
+                self.checkpoints[0]
+            )
+            checkpoints_progress.update()
+            # The self influence score for all examples is the sum of contributions from
+            # each checkpoint
+            for checkpoint in self.checkpoints[1:]:
+                batches_self_tracin_scores += get_checkpoint_contribution(checkpoint)
+                checkpoints_progress.update()
 
         return batches_self_tracin_scores
 
diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py
@@ -8,7 +8,7 @@
 import torch
 from captum._utils.common import _format_inputs, _get_module_from_name, _sort_key_list
 from captum._utils.gradient import _gather_distributed_tensors
-from captum._utils.progress import progress
+from captum._utils.progress import NullProgress, progress
 
 from captum.influence._core.tracincp import (
     _influence_route_to_helpers,
@@ -556,13 +556,6 @@ def _self_influence_by_checkpoints(
         # If `show_progress` is true, create an outer progress bar that keeps track of
         # how many checkpoints have been processed
         if show_progress:
-            checkpoints_progress = progress(
-                desc=(
-                    f"Using {self.get_name()} to compute self "
-                    "influence. Processing checkpoint"
-                ),
-                total=len(self.checkpoints),
-            )
             # Try to determine length of inner progress bar if possible, with a default
             # of `None`.
             inputs_dataset_len = None
@@ -621,20 +614,31 @@ def get_checkpoint_contribution(checkpoint):
 
             # We concatenate the contributions from each batch into a single 1D tensor,
             # which represents the contributions for all batches in `inputs_dataset`
-
-            if show_progress:
-                checkpoints_progress.update()
-
             return torch.cat(checkpoint_contribution, dim=0)
 
-        batches_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0])
-
-        # The self influence score for all examples is the sum of contributions from
-        # each checkpoint
-        for checkpoint in self.checkpoints[1:]:
-            batches_self_tracin_scores += get_checkpoint_contribution(checkpoint)
+        if show_progress:
+            checkpoints_progress = progress(
+                desc=(
+                    f"Using {self.get_name()} to compute self "
+                    "influence. Processing checkpoint"
+                ),
+                total=len(self.checkpoints),
+                mininterval=0.0,
+            )
+        else:
+            checkpoints_progress = NullProgress()
 
-        return batches_self_tracin_scores
+        with checkpoints_progress:
+            batches_self_tracin_scores = get_checkpoint_contribution(
+                self.checkpoints[0]
+            )
+            checkpoints_progress.update()
+            # The self influence score for all examples is the sum of contributions from
+            # each checkpoint
+            for checkpoint in self.checkpoints[1:]:
+                batches_self_tracin_scores += get_checkpoint_contribution(checkpoint)
+                checkpoints_progress.update()
+            return batches_self_tracin_scores
 
     def self_influence(
         self,
diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py
@@ -49,9 +49,15 @@ def _check_error_msg_multiplicity(
         output = mock_stderr.getvalue()
         actual_msg_multiplicity = output.count(msg)
         assert isinstance(actual_msg_multiplicity, int)
-        error_msg = f"Error in progress of batches with output: {repr(output)}"
+        error_msg = (
+            f"Error in progress of batches with output looking for '{msg}'"
+            f" at least {msg_multiplicity} times"
+            f"(found {actual_msg_multiplicity}) in {repr(output)}"
+        )
         if greater_than:
-            self.assertTrue(actual_msg_multiplicity - msg_multiplicity >= 0, error_msg)
+            self.assertGreaterEqual(
+                actual_msg_multiplicity, msg_multiplicity, error_msg
+            )
         else:
             self.assertEqual(
                 actual_msg_multiplicity,
@@ -124,23 +130,6 @@ def test_tracin_show_progress(
                     # `outer_loop_by_checkpoints` is True. In this case, we should see a
                     # single outer progress bar over checkpoints, and for every
                     # checkpoints, a separate progress bar over batches
-
-                    # In this case, displaying progress involves nested progress
-                    # bars, which are not currently supported by the backup
-                    # `SimpleProgress` that is used if `tqdm` is not installed.
-                    # Therefore, we skip the test in this case.
-                    # TODO: support nested progress bars for `SimpleProgress`
-                    try:
-                        import tqdm  # noqa
-                    except ModuleNotFoundError:
-                        raise unittest.SkipTest(
-                            (
-                                "Skipping self influence progress bar tests for "
-                                f"{tracin.get_name()}, because proper displaying "
-                                "requires the tqdm module, which is not installed."
-                            )
-                        )
-
                     tracin.self_influence(
                         DataLoader(train_dataset, batch_size=batch_size),
                         show_progress=True,
diff --git a/tests/utils/test_progress.py b/tests/utils/test_progress.py
@@ -4,11 +4,61 @@
 import unittest
 import unittest.mock
 
-from captum._utils.progress import progress
+from captum._utils.progress import NullProgress, progress
 from tests.helpers.basic import BaseTest
 
 
 class Test(BaseTest):
+    @unittest.mock.patch("sys.stderr", new_callable=io.StringIO)
+    def test_nullprogress(self, mock_stderr) -> None:
+        count = 0
+        with NullProgress(["x", "y", "z"]) as np:
+            for _ in np:
+                for _ in NullProgress([1, 2, 3]):
+                    count += 1
+
+        self.assertEqual(count, 9)
+        output = mock_stderr.getvalue()
+        self.assertEqual(output, "")
+
+    @unittest.mock.patch("sys.stderr", new_callable=io.StringIO)
+    def test_nested_progress_tqdm(self, mock_stderr) -> None:
+        try:
+            import tqdm  # noqa: F401
+        except ImportError:
+            raise unittest.SkipTest("Skipping tqdm test, tqdm not available.")
+
+        parent_data = ["x", "y", "z"]
+        test_data = [1, 2, 3]
+        with progress(parent_data, desc="parent progress") as parent:
+            for item in parent:
+                for _ in progress(test_data, desc=f"test progress {item}"):
+                    pass
+        output = mock_stderr.getvalue()
+        self.assertIn("parent progress:", output)
+        for item in parent_data:
+            self.assertIn(f"test progress {item}:", output)
+
+    @unittest.mock.patch("sys.stderr", new_callable=io.StringIO)
+    def test_nested_simple_progress(self, mock_stderr) -> None:
+        parent_data = ["x", "y", "z"]
+        test_data = [1, 2, 3]
+        with progress(
+            parent_data, desc="parent progress", use_tqdm=False, mininterval=0.0
+        ) as parent:
+            for item in parent:
+                for _ in progress(
+                    test_data, desc=f"test progress {item}", use_tqdm=False
+                ):
+                    pass
+
+        output = mock_stderr.getvalue()
+        self.assertEqual(
+            output.count("parent progress:"), 5, "5 'parent' progress bar expected"
+        )
+        for item in parent_data:
+            self.assertIn(f"test progress {item}:", output)
+
     @unittest.mock.patch("sys.stderr", new_callable=io.StringIO)
     def test_progress_tqdm(self, mock_stderr) -> None:
         try: