test ci

KexinFeng · KexinFeng · commit bbb258ae9d25 · 2022-07-08T12:23:04.000-07:00
diff --git a/python/mxnet/_ctypes/cached_op.py b/python/mxnet/_ctypes/cached_op.py
@@ -77,6 +77,7 @@ def __call__(self, *args, **kwargs):
         if not default_device:
             default_device = kwargs.pop('default_ctx', None)
         out = kwargs.pop('out', None)
+        nleaf_vars = [container.data() for container in kwargs.pop('_nleaf_vars', [])]
         if kwargs:
             raise TypeError(
                 "CachedOp.__call__ got unexpected keyword argument(s): " + \
@@ -93,7 +94,10 @@ def __call__(self, *args, **kwargs):
                 *args,
                 type_id,
                 device_id,
-                *out_arg
+                len(out_arg),
+                *out_arg,
+                len(nleaf_vars),
+                *nleaf_vars
             )
             if out is not None:
                 return out
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
@@ -33,13 +33,14 @@
 import json
 import numpy as np
 
-from ..base import mx_real_t, MXNetError, NDArrayHandle, SymbolHandle, py_str, check_call, _LIB
+from ..base import mx_real_t, MXNetError, NDArrayHandle, SymbolHandle, py_str, check_call, _LIB, \
+    _as_list
 from .. import symbol, ndarray, initializer, autograd, _deferred_compute as dc, name as _name, \
     profiler as _profiler, device as _device
 from ..symbol.numpy import _symbol as np_symbol
 from ..symbol import Symbol, fromjson
 from ..ndarray import NDArray, get_dtype_name
-from .parameter import Parameter, DeferredInitializationError
+from .parameter import Parameter, DeferredInitializationError, Intermediate
 from .utils import _indent, _brief_print_list, HookHandle, shape_is_known
 from .utils import _check_same_symbol_type, _check_all_np_ndarrays, _check_block_input_np_ndarrays
 from .. import numpy_extension as _mx_npx
@@ -1091,6 +1092,7 @@ def __init__(self):
         self._backend_opts = {}
         self._partition_if_dynamic = True
         self._first_forward = True
+        self._nleaf_vars = OrderedDict()
 
     def __setattr__(self, name, value):
         """Registers parameters."""
@@ -1302,7 +1304,7 @@ def _call_cached_op(self, *args):
         args_without_none = [ele for ele in args if ele is not None]
         cargs = [args_without_none[i] if is_arg else i.data()
                  for is_arg, name, i in self._cached_op_args]
-        out = self._cached_op(*cargs)
+        out = self._cached_op(*cargs, _nleaf_vars=self._nleaf_vars.values())
         if isinstance(out, NDArray):
             out = [out]
         return _regroup(out, self._out_format)
@@ -1678,6 +1680,92 @@ def reset_ctx(self, ctx):
         self.reset_device(ctx)
 
 
+    def intermediate(self, names, var_arrays_inp, grad_req='write'):
+        """Mark the intermediate variables.
+
+        Parameters
+        ----------
+        name : str or tuple[str], name of the registered intermediate variable
+        var_arrays_inp : ndarray or tuple[ndarray], the output of the expression
+        grad_req : str, gradient request
+        """
+        if not self._active:
+            var_arrays = _as_list(var_arrays_inp)
+            names = _as_list(names)
+            self._nleaf_vars.update(
+                {name : Intermediate(name, array, grad_req) for name, array in zip(names, var_arrays)})
+        else:
+            prev_val = dc.set_deferred_compute(False)
+            var_arrays = _as_list(var_arrays_inp)
+            names = _as_list(names)
+            # Prepare ctypes array types
+            import ctypes
+            var_handles_type = ctypes.c_void_p * len(var_arrays)
+            # Convert handles
+            var_handles = var_handles_type(*[arr.handle for arr in var_arrays])
+            check_call(_LIB.MXNDArrayMarkDCVariables(var_handles, len(var_arrays), len(self._nleaf_vars)))
+            self._nleaf_vars.update(
+                {name : Intermediate(name, array, grad_req) for name, array in zip(names, var_arrays)})
+            dc.set_deferred_compute(prev_val)
+        return var_arrays_inp
+
+    def attach_grad_intermediate(self):
+        """Attach gradient to all the intermediate variables.
+        """
+        for val in self._nleaf_vars.values():
+            val.data().attach_grad(grad_req=val.grad_req)
+
+    def get_intermediate(self, names):
+        """Get the intermediate variables by names
+        """
+        if isinstance(names, list):
+            return [self._nleaf_vars[n] for n in names]
+        else:
+            return self._nleaf_vars[names]
+
+    def intermediate(self, names, var_arrays_inp, grad_req='write'):
+        """Mark the intermediate variables.
+
+        Parameters
+        ----------
+        name : str or tuple[str], name of the registered intermediate variable
+        var_arrays_inp : ndarray or tuple[ndarray], the output of the expression
+        grad_req : str, gradient request
+        """
+        if not self._active:
+            var_arrays = _as_list(var_arrays_inp)
+            names = _as_list(names)
+            self._nleaf_vars.update(
+                {name : Intermediate(name, array, grad_req) for name, array in zip(names, var_arrays)})
+        else:
+            prev_val = dc.set_deferred_compute(False)
+            var_arrays = _as_list(var_arrays_inp)
+            names = _as_list(names)
+            # Prepare ctypes array types
+            import ctypes
+            var_handles_type = ctypes.c_void_p * len(var_arrays)
+            # Convert handles
+            var_handles = var_handles_type(*[arr.handle for arr in var_arrays])
+            check_call(_LIB.MXNDArrayMarkDCVariables(var_handles, len(var_arrays), len(self._nleaf_vars)))
+            self._nleaf_vars.update(
+                {name : Intermediate(name, array, grad_req) for name, array in zip(names, var_arrays)})
+            dc.set_deferred_compute(prev_val)
+        return var_arrays_inp
+
+    def attach_grad_intermediate(self):
+        """Attach gradient to all the intermediate variables.
+        """
+        for val in self._nleaf_vars.values():
+            val.data().attach_grad(grad_req=val.grad_req)
+
+    def get_intermediate(self, names):
+        """Get the intermediate variables by names
+        """
+        if isinstance(names, list):
+            return [self._nleaf_vars[n] for n in names]
+        else:
+            return self._nleaf_vars[names]
+
 class SymbolBlock(HybridBlock):
     """Construct block from symbol. This is useful for using pre-trained models
     as feature extractors. For example, you may want to extract the output
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
@@ -773,3 +773,40 @@ def grad_req(self, req):
             warnings.warn('Constant parameter "{}" does not support '
                           'grad_req other than "null", and new value "{}" '
                           'is ignored.'.format(self.name, req))
+
+class Intermediate:
+    """A Container holding marked intermediate variables of Blocks.
+
+    Parameters
+    ----------
+    name : str.
+        Name of this parameter. It be used to retrieve the marked variables.
+    grad_req : {'write', 'add', 'null'}, default 'write'
+        Specifies how to update gradient to grad arrays.
+
+        - ``'write'`` means everytime gradient is written to grad :py:class:`NDArray`.
+        - ``'add'`` means everytime gradient is added to the grad :py:class:`NDArray`. You need
+          to manually call ``zero_grad()`` to clear the gradient buffer before each
+          iteration when using this option.
+        - 'null' means gradient is not requested for this parameter. gradient arrays
+          will not be allocated.
+    """
+    def __init__(self, name, data=None, grad_req='write'):
+        self._name = name
+        self._data = data
+        self._grad_req = grad_req
+
+    def __repr__(self):
+        s = 'Intermediate name={name}'
+        return s.format(name=self._name)
+
+    def data(self):
+        return self._data
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def grad_req(self):
+        return self._grad_req