Parallel Rosenbrock

This problem deals with a generalized Rosenbrock function in parallel. In this case the objective function is

\[f(x) = \sum_{i=1}^{n-1} (1 - x_{i})^{2} + 100(x_{i+1} - x_{i}^2)^2\]

Two constraints are imposed in this problem. The first is that the point remain within a ball of radius 1/2 centered on the origin:

\[c_{1}(x) = \frac{1}{4} - \sum_{i=1}^{n} x_{i}^{2} \ge 0\]

The second is a linear constraint that the sum of all variables be greater than -10:

\[c_{2}(x) = \sum_{i=1}^{n} x_{i} + 10 \ge 0\]

C++ implementation

This example is a further demonstration of the ParOptProblem interface class making use of a distributed design vector, separable sparse constraints, and Hessian-vector products.

The Hessian-vector products can be used to accelerate the convergence of the optimizer. They are generally used once the optimization problem has converged to a point that is closer to the optimum. These products are implemented by the user in the ParOptProblem class, using the following prototype:

int evalHvecProduct( ParOptVec *xvec,
                     ParOptScalar *z, ParOptVec *zwvec,
                     ParOptVec *pxvec, ParOptVec *hvec );

This function provides access to Hessian-vector products of the Lagrangian. In ParOpt the Lagrangian function is defined as

\[\mathcal{L} \triangleq f(x) - z^{T} c(x) - z_{w}^{T} c_{w}(x)\]

where $\mathcal{L}$ is the Lagrangian, $f(x), c(x), c_{w}(x)$ are the objective, dense constraints and sparse separable constraints, respectively, and $z, z_{w}$ are multipliers associated with the dense and sparse constraints. The Hessian-vector product is then computed as

\[h = \nabla^{2} \mathcal{L}(x, z, z_{w}) p_{x}\]

The interface to the sparse separable constraint code consists of four functions. These functions consist of the following:

void evalSparseCon( ParOptVec *x, ParOptVec *out );
void addSparseJacobian( ParOptScalar alpha, ParOptVec *x,
                        ParOptVec *px, ParOptVec *out );
void addSparseJacobianTranspose( ParOptScalar alpha, ParOptVec *x,
                                 ParOptVec *pzw, ParOptVec *out );
void addSparseInnerProduct( ParOptScalar alpha, ParOptVec *x,
                            ParOptVec *cvec, ParOptScalar *A );

These member functions provide the following mathematical operations:

\[\begin{split}\begin{align} \mathrm{out} & \leftarrow c_{w} \leftarrow c_{w}(x) \\ \mathrm{out} & \leftarrow \alpha A_{w}(x) p_{x} \\ \mathrm{out} & \leftarrow \alpha A_{w}(x)^{T} p_{z_{w}} \\ \mathrm{out} & \leftarrow \alpha A_{w} C A_{w}(x)^{T} \\ \end{align}\end{split}\]

Here $A_{w}(x) = \nabla c_{w}(x)$$ and $C$ is a diagonal matrix.

#include "ParOptOptimizer.h"

/*
  The following is a simple implementation of a scalable Rosenbrock
  function with constraints that can be used to test the parallel
  optimizer.
*/

class Rosenbrock : public ParOptProblem {
 public:
  Rosenbrock(MPI_Comm comm, int _nvars, int _nwcon, int _nwstart, int _nw,
             int _nwskip)
      : ParOptProblem(comm) {
    // Set the base class problem sizes
    setProblemSizes(_nvars, 2, _nwcon);

    int nwinequality = _nwcon;
    setNumInequalities(2, nwinequality);

    nwblock = 1;
    nwcon = _nwcon;
    nwstart = _nwstart;
    nw = _nw;
    nwskip = _nwskip;
    scale = 1.0;
  }

  //! Create the quasi-def matrix associated with this problem
  ParOptQuasiDefMat *createQuasiDefMat() {
    return new ParOptQuasiDefBlockMat(this, nwblock);
  }

  //! Get the variables/bounds
  void getVarsAndBounds(ParOptVec *xvec, ParOptVec *lbvec, ParOptVec *ubvec) {
    ParOptScalar *x, *lb, *ub;
    xvec->getArray(&x);
    lbvec->getArray(&lb);
    ubvec->getArray(&ub);

    // Set the design variable bounds
    for (int i = 0; i < nvars; i++) {
      x[i] = -1.0;
      lb[i] = -2.0;
      ub[i] = 1.0;
    }
  }

  //! Evaluate the objective and constraints
  int evalObjCon(ParOptVec *xvec, ParOptScalar *fobj, ParOptScalar *cons) {
    ParOptScalar obj = 0.0;
    ParOptScalar *x;
    xvec->getArray(&x);

    for (int i = 0; i < nvars - 1; i++) {
      obj += ((1.0 - x[i]) * (1.0 - x[i]) +
              100.0 * (x[i + 1] - x[i] * x[i]) * (x[i + 1] - x[i] * x[i]));
    }

    ParOptScalar con[2];
    con[0] = con[1] = 0.0;
    for (int i = 0; i < nvars; i++) {
      con[0] -= x[i] * x[i];
    }

    for (int i = 0; i < nvars; i += 2) {
      con[1] += x[i];
    }

    MPI_Allreduce(&obj, fobj, 1, PAROPT_MPI_TYPE, MPI_SUM, comm);
    MPI_Allreduce(con, cons, 2, PAROPT_MPI_TYPE, MPI_SUM, comm);

    cons[0] += 0.25;
    cons[1] += 10.0;
    cons[0] *= scale;
    cons[1] *= scale;

    return 0;
  }

  //! Evaluate the objective and constraint gradients
  int evalObjConGradient(ParOptVec *xvec, ParOptVec *gvec, ParOptVec **Ac) {
    ParOptScalar *x, *g, *c;
    xvec->getArray(&x);
    gvec->getArray(&g);
    gvec->zeroEntries();

    for (int i = 0; i < nvars - 1; i++) {
      g[i] += (-2.0 * (1.0 - x[i]) +
               200.0 * (x[i + 1] - x[i] * x[i]) * (-2.0 * x[i]));
      g[i + 1] += 200.0 * (x[i + 1] - x[i] * x[i]);
    }

    Ac[0]->getArray(&c);
    for (int i = 0; i < nvars; i++) {
      c[i] = -2.0 * scale * x[i];
    }

    Ac[1]->getArray(&c);
    for (int i = 0; i < nvars; i += 2) {
      c[i] = scale;
    }

    return 0;
  }

  //! Evaluate the product of the Hessian with the given vector
  int evalHvecProduct(ParOptVec *xvec, ParOptScalar *z, ParOptVec *zwvec,
                      ParOptVec *pxvec, ParOptVec *hvec) {
    ParOptScalar *hvals;
    hvec->zeroEntries();
    hvec->getArray(&hvals);

    ParOptScalar *px, *x;
    xvec->getArray(&x);
    pxvec->getArray(&px);

    for (int i = 0; i < nvars - 1; i++) {
      hvals[i] +=
          (2.0 * px[i] + 200.0 * (x[i + 1] - x[i] * x[i]) * (-2.0 * px[i]) +
           200.0 * (px[i + 1] - 2.0 * x[i] * px[i]) * (-2.0 * x[i]));
      hvals[i + 1] += 200.0 * (px[i + 1] - 2.0 * x[i] * px[i]);
    }

    for (int i = 0; i < nvars; i++) {
      hvals[i] += 2.0 * scale * z[0] * px[i];
    }

    return 0;
  }

  //! Evaluate the sparse constraints
  void evalSparseCon(ParOptVec *x, ParOptVec *out) {
    ParOptScalar *xvals, *outvals;
    x->getArray(&xvals);
    out->getArray(&outvals);

    for (int i = 0, j = nwstart; i < nwcon; i++, j += nwskip) {
      outvals[i] = 1.0;
      for (int k = 0; k < nw; k++, j++) {
        outvals[i] -= xvals[j];
      }
    }
  }

  //! Compute the Jacobian-vector product out = J(x)*px
  void addSparseJacobian(ParOptScalar alpha, ParOptVec *x, ParOptVec *px,
                         ParOptVec *out) {
    ParOptScalar *pxvals, *outvals;
    px->getArray(&pxvals);
    out->getArray(&outvals);

    for (int i = 0, j = nwstart; i < nwcon; i++, j += nwskip) {
      for (int k = 0; k < nw; k++, j++) {
        outvals[i] -= alpha * pxvals[j];
      }
    }
  }

  //! Compute the transpose Jacobian-vector product out = J(x)^{T}*pzw
  void addSparseJacobianTranspose(ParOptScalar alpha, ParOptVec *x,
                                  ParOptVec *pzw, ParOptVec *out) {
    ParOptScalar *outvals, *pzwvals;
    out->getArray(&outvals);
    pzw->getArray(&pzwvals);
    for (int i = 0, j = nwstart; i < nwcon; i++, j += nwskip) {
      for (int k = 0; k < nw; k++, j++) {
        outvals[j] -= alpha * pzwvals[i];
      }
    }
  }

  //! Add the inner product of the constraints to the matrix such
  //! that A += J(x)*cvec*J(x)^{T} where cvec is a diagonal matrix
  void addSparseInnerProduct(ParOptScalar alpha, ParOptVec *x, ParOptVec *cvec,
                             ParOptScalar *A) {
    ParOptScalar *cvals;
    cvec->getArray(&cvals);

    for (int i = 0, j = nwstart; i < nwcon; i++, j += nwskip) {
      for (int k = 0; k < nw; k++, j++) {
        A[i] += alpha * cvals[j];
      }
    }
  }

  int nwcon;
  int nwblock;
  int nwstart;
  int nw, nwskip;
  ParOptScalar scale;
};

int main(int argc, char *argv[]) {
  MPI_Init(&argc, &argv);

  // Set the MPI communicator
  MPI_Comm comm = MPI_COMM_WORLD;

  // Get the rank
  int mpi_rank = 0;
  MPI_Comm_rank(comm, &mpi_rank);

  // Get the prefix from the input arguments
  int nvars = 100;
  const char *prefix = NULL;
  char buff[512];
  for (int k = 0; k < argc; k++) {
    if (sscanf(argv[k], "prefix=%s", buff) == 1) {
      prefix = buff;
    }
    if (sscanf(argv[k], "nvars=%d", &nvars) == 1) {
      if (nvars < 100) {
        nvars = 100;
      }
    }
  }

  if (mpi_rank == 0) {
    printf("prefix = %s\n", prefix);
    fflush(stdout);
  }

  // Allocate the Rosenbrock function
  int nwcon = 5, nw = 5;
  int nwstart = 1, nwskip = 1;
  Rosenbrock *rosen =
      new Rosenbrock(comm, nvars - 1, nwcon, nwstart, nw, nwskip);
  rosen->incref();

  // Create the options class, and create default values
  ParOptOptions *options = new ParOptOptions();
  ParOptOptimizer::addDefaultOptions(options);

  options->setOption("algorithm", "tr");
  options->setOption("barrier_strategy", "mehrotra");
  options->setOption("output_level", 1);
  options->setOption("qn_type", "bfgs");
  options->setOption("qn_subspace_size", 10);
  options->setOption("abs_res_tol", 1e-6);
  options->setOption("output_file", "paropt.out");
  options->setOption("tr_output_file", "paropt.tr");
  options->setOption("mma_output_file", "paropt.mma");

  ParOptOptimizer *opt = new ParOptOptimizer(rosen, options);
  opt->incref();

  // Set the checkpoint file
  double start = MPI_Wtime();
  if (prefix) {
    char output[512];
    snprintf(output, sizeof(output), "%s/rosenbrock_output.bin", prefix);
    options->setOption("ip_checkpoint_file", output);
  }
  opt->optimize();
  double diff = MPI_Wtime() - start;

  if (mpi_rank == 0) {
    printf("ParOpt time: %f seconds \n", diff);
  }

  opt->decref();
  rosen->decref();

  MPI_Finalize();
  return (0);
}