ROL
ROL_TypeP_TrustRegionAlgorithm_Def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Rapid Optimization Library (ROL) Package
4//
5// Copyright 2014 NTESS and the ROL contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
11#define ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
12
13#include <deque>
14
15namespace ROL {
16namespace TypeP {
17
18
19template<typename Real>
21 const Ptr<Secant<Real>> &secant) {
22 // Set status test
23 status_->reset();
24 status_->add(makePtr<StatusTest<Real>>(list));
25
26 ParameterList &trlist = list.sublist("Step").sublist("Trust Region");
27 // Trust-Region Parameters
28 state_->searchSize = trlist.get("Initial Radius", -1.0);
29 delMax_ = trlist.get("Maximum Radius", ROL_INF<Real>());
30 eta0_ = trlist.get("Step Acceptance Threshold", 0.05);
31 eta1_ = trlist.get("Radius Shrinking Threshold", 0.05);
32 eta2_ = trlist.get("Radius Growing Threshold", 0.9);
33 gamma0_ = trlist.get("Radius Shrinking Rate (Negative rho)", 0.0625);
34 gamma1_ = trlist.get("Radius Shrinking Rate (Positive rho)", 0.25);
35 gamma2_ = trlist.get("Radius Growing Rate", 2.5);
36 TRsafe_ = trlist.get("Safeguard Size", 100.0);
38 interpRad_ = trlist.get("Use Radius Interpolation", false);
39 verbosity_ = trlist.sublist("General").get("Output Level", 0);
40 initProx_ = trlist.get("Apply Prox to Initial Guess", false);
41 t0_ = list.sublist("Status Test").get("Proximal Gradient Parameter", 1.0);
42 // Nonmonotone Parameters
43 storageNM_ = trlist.get("Nonmonotone Storage Size", 0);
44 useNM_ = (storageNM_ <= 0 ? false : true);
45 // Algorithm-Specific Parameters
46 ROL::ParameterList &lmlist = trlist.sublist("TRN");
47 mu0_ = lmlist.get("Sufficient Decrease Parameter", 1e-2);
48 spexp_ = lmlist.get("Relative Tolerance Exponent", 1.0);
49 spexp_ = std::max(static_cast<Real>(1),std::min(spexp_,static_cast<Real>(2)));
50 redlim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Reduction Steps", 10);
51 explim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Expansion Steps", 10);
52 alpha_ = lmlist.sublist("Cauchy Point").get("Initial Step Size", 1.0);
53 normAlpha_ = lmlist.sublist("Cauchy Point").get("Normalize Initial Step Size", false);
54 interpf_ = lmlist.sublist("Cauchy Point").get("Reduction Rate", 0.1);
55 extrapf_ = lmlist.sublist("Cauchy Point").get("Expansion Rate", 10.0);
56 qtol_ = lmlist.sublist("Cauchy Point").get("Decrease Tolerance", 1e-8);
57 // Subsolver (general) parameters
58 lambdaMin_ = lmlist.sublist("Solver").get("Minimum Spectral Step Size", 1e-8);
59 lambdaMax_ = lmlist.sublist("Solver").get("Maximum Spectral Step Size", 1e8);
60 gamma_ = lmlist.sublist("Solver").get("Sufficient Decrease Tolerance", 1e-4);
61 maxSize_ = lmlist.sublist("Solver").get("Maximum Storage Size", 10);
62 maxit_ = lmlist.sublist("Solver").get("Iteration Limit", 25);
63 tol1_ = lmlist.sublist("Solver").get("Absolute Tolerance", 1e-4);
64 tol2_ = lmlist.sublist("Solver").get("Relative Tolerance", 1e-2);
65 // Subsolver (spectral projected gradient) parameters
66 useMin_ = lmlist.sublist("Solver").get("Use Smallest Model Iterate", true);
67 useNMSP_ = lmlist.sublist("Solver").get("Use Nonmonotone Search", false);
68 std::string ssname = lmlist.sublist("Solver").get("Subproblem Solver", "SPG");
70 // Subsolver (nonlinear conjugate gradient) parameters)
71 ncgType_ = lmlist.sublist("Solver").sublist("NCG").get("Nonlinear CG Type", 4);
72 etaNCG_ = lmlist.sublist("Solver").sublist("NCG").get("Truncation Parameter for HZ CG", 1e-2);
73 desPar_ = lmlist.sublist("Solver").sublist("NCG").get("Descent Parameter", 0.2);
74 // Inexactness Information
75 ParameterList &glist = list.sublist("General");
76 useInexact_.clear();
77 useInexact_.push_back(glist.get("Inexact Objective Function", false));
78 useInexact_.push_back(glist.get("Inexact Gradient", false));
79 useInexact_.push_back(glist.get("Inexact Hessian-Times-A-Vector", false));
80 // Trust-Region Inexactness Parameters
81 ParameterList &ilist = trlist.sublist("Inexact").sublist("Gradient");
82 scale0_ = ilist.get("Tolerance Scaling", static_cast<Real>(0.1));
83 scale1_ = ilist.get("Relative Tolerance", static_cast<Real>(2));
84 // Inexact Function Evaluation Information
85 ParameterList &vlist = trlist.sublist("Inexact").sublist("Value");
86 scale_ = vlist.get("Tolerance Scaling", static_cast<Real>(1.e-1));
87 omega_ = vlist.get("Exponent", static_cast<Real>(0.9));
88 force_ = vlist.get("Forcing Sequence Initial Value", static_cast<Real>(1.0));
89 updateIter_ = vlist.get("Forcing Sequence Update Frequency", static_cast<int>(10));
90 forceFactor_ = vlist.get("Forcing Sequence Reduction Factor", static_cast<Real>(0.1));
91 // Output Parameters
92 verbosity_ = list.sublist("General").get("Output Level",0);
94 // Secant Information
95 useSecantPrecond_ = list.sublist("General").sublist("Secant").get("Use as Preconditioner", false);
96 useSecantHessVec_ = list.sublist("General").sublist("Secant").get("Use as Hessian", false);
100 // Initialize trust region model
101 model_ = makePtr<TrustRegionModel_U<Real>>(list,secant,mode);
102 if (secant == nullPtr) {
103 std::string secantType = list.sublist("General").sublist("Secant").get("Type","Limited-Memory BFGS");
104 esec_ = StringToESecant(secantType);
105 }
106}
107
108template<typename Real>
110 const Vector<Real> &g,
111 Real ftol,
112 Objective<Real> &sobj,
113 Objective<Real> &nobj,
114 Vector<Real> &px,
115 Vector<Real> &dg,
116 std::ostream &outStream) {
117 // Initialize data
119 nhess_ = 0;
120 // Update approximate gradient and approximate objective function.
121 if (initProx_){
122 nobj.prox(*state_->iterateVec,x,t0_, ftol); state_->nprox++;
123 x.set(*state_->iterateVec);
124 }
125 sobj.update(x,UpdateType::Initial,state_->iter);
126 state_->svalue = sobj.value(x,ftol); state_->nsval++;
127 nobj.update(x, UpdateType::Initial,state_->iter);
128 state_->nvalue = nobj.value(x,ftol); state_->nnval++;
129 state_->value = state_->svalue + state_->nvalue;
130 computeGradient(x,*state_->gradientVec,px,dg,*state_->stepVec,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
131
132 state_->snorm = ROL_INF<Real>();
133 // Normalize initial CP step length
134 if (normAlpha_) alpha_ /= state_->gradientVec->norm();//change here?
135 // Compute initial trust region radius if desired.
136 if ( state_->searchSize <= static_cast<Real>(0) )
137 state_->searchSize = state_->gradientVec->norm();
138 SPiter_ = 0;
139 SPflag_ = 0;
140}
141
142template<typename Real>
144 Real &outTol,
145 Real pRed,
146 Real &fold,
147 int iter,
148 const Vector<Real> &x,
149 const Vector<Real> &xold,
150 Objective<Real> &sobj) {
151 outTol = std::sqrt(ROL_EPSILON<Real>());
152 if ( useInexact_[0] ) {
153 if (!(iter%updateIter_) && (iter!=0)) force_ *= forceFactor_;
154 const Real one(1);
155 Real eta = static_cast<Real>(0.999)*std::min(eta1_,one-eta2_);
156 outTol = scale_*std::pow(eta*std::min(pRed,force_),one/omega_);
157 if (inTol > outTol) {
158 fold = sobj.value(xold,outTol); state_->nsval++;
159 }
160 }
161 // Evaluate objective function at new iterate
163 Real fval = sobj.value(x,outTol); state_->nsval++;
164 return fval;
165}
166
167template<typename Real>
169 Vector<Real> &g,
170 Vector<Real> &px,
171 Vector<Real> &dg,
172 Vector<Real> &step,
173 Real del,
174 Objective<Real> &sobj,
175 Objective<Real> &nobj,
176 bool accept,
177 Real &gtol,
178 Real &gnorm,
179 std::ostream &outStream) const {
180 if ( useInexact_[1] ) {
181 Real gtol0 = scale0_*del;
182 if (accept) gtol = gtol0 + static_cast<Real>(1);
183 else gtol0 = scale0_*std::min(gnorm,del);
184 while ( gtol > gtol0 ) {
185 gtol = gtol0;
186 sobj.gradient(g,x,gtol); state_->ngrad++;
187 dg.set(g.dual());
188 pgstep(px, step, nobj, x, dg, t0_, gtol0); // change gtol? one or ocScale?
189 gnorm = step.norm() / t0_;
190 gtol0 = scale0_*std::min(gnorm,del);
191 }
192 }
193 else {
194 if (accept) {
195 gtol = std::sqrt(ROL_EPSILON<Real>());
196 sobj.gradient(g,x,gtol); state_->ngrad++;
197 dg.set(g.dual());
198 pgstep(px, step, nobj, x, dg, t0_, gtol);
199 gnorm = step.norm() / t0_;
200 }
201 }
202}
203
204template<typename Real>
206 const Vector<Real> &g,
207 Objective<Real> &sobj,
208 Objective<Real> &nobj,
209 std::ostream &outStream ) {
210 const Real zero(0), one(1);
211 //Real tol0 = std::sqrt(ROL_EPSILON<Real>());
212 Real inTol = static_cast<Real>(0.1)*ROL_OVERFLOW<Real>(), outTol(inTol);
213 Real strial(0), ntrial(0), smodel(0), Ftrial(0), pRed(0), rho(1);
214 // Initialize trust-region data
215 std::vector<std::string> output;
216 Ptr<Vector<Real>> gmod = g.clone();
217 Ptr<Vector<Real>> px = x.clone();
218 Ptr<Vector<Real>> dg = x.clone();
219 // Initialize Algorithm
220 initialize(x,g,inTol,sobj,nobj, *px, *dg, outStream);
221 // Initialize storage vectors
222 Ptr<Vector<Real>> pwa1 = x.clone(), pwa2 = x.clone();
223 Ptr<Vector<Real>> pwa3 = x.clone(), pwa4 = x.clone();
224 Ptr<Vector<Real>> pwa5 = x.clone(), pwa6 = x.clone();
225 Ptr<Vector<Real>> pwa7 = x.clone();
226 Ptr<Vector<Real>> dwa1 = g.clone(), dwa2 = g.clone();
227 // Initialize nonmonotone data
228 Real rhoNM(0), sigmac(0), sigmar(0);
229 Real fr(state_->value), fc(state_->value), fmin(state_->value);
230 TRUtils::ETRFlag TRflagNM;
231 int L(0);
232 // Output
233 if (verbosity_ > 0) writeOutput(outStream,true);
234
235 while (status_->check(*state_)) {
236 // Build trust-region model
237 model_->setData(sobj,*state_->iterateVec,*state_->gradientVec,gtol_);
238
239 /**** SOLVE TRUST-REGION SUBPROBLEM ****/
240 //q = state_->svalue + state_->nvalue;//q is no longer used
241 gmod->set(*state_->gradientVec);
242 smodel = state_->svalue;
243 ntrial = state_->nvalue;
244 switch (algSelect_) {
246 default:
247 // Compute Cauchy point (TRON notation: x = x[1])
248 dcauchy(*state_->stepVec,alpha_, smodel, ntrial,
249 *state_->iterateVec, *dg, state_->searchSize,
250 *model_, nobj, *px, *dwa1, *dwa2, outStream); // Solve 1D optimization problem for alpha
251 x.plus(*state_->stepVec); // Set x = x[0] + alpha*g
252 // Model gradient at s = x[1] - x[0]
253 gmod->plus(*dwa1); // hessVec from Cauchy point computation
254
255 // Apply SPG starting from the Cauchy point->change input
256 dspg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
257 *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*pwa7,*dwa1,
258 outStream);
259 pRed = state_->value - (smodel+ntrial);
260 break;
262 dspg2(x,smodel, ntrial, pRed, *gmod, *state_->iterateVec,
263 state_->searchSize, *model_, nobj,
264 *pwa1, *pwa2, *px, *dwa1, outStream);
265 break;
267 dncg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
268 *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*dwa1,
269 outStream);
270 pRed = state_->value - (smodel+ntrial);
271 break;
272 }
273
274 // Update storage and compute predicted reduction
275 //pRed = -q; // now updated in dcauchy/dspg
276 state_->stepVec->set(x); state_->stepVec->axpy(-one,*state_->iterateVec);
277 state_->snorm = state_->stepVec->norm();
278
279 // Compute trial objective value
280 strial = computeValue(inTol,outTol,pRed,state_->svalue,state_->iter,x,*state_->iterateVec,sobj);
281 Ftrial = strial + ntrial;
282
283 // Compute ratio of actual and predicted reduction
285 TRUtils::analyzeRatio<Real>(rho,TRflag_,state_->value,Ftrial,pRed,eps_,outStream,verbosity_>1);
286 if (useNM_) {
287 TRUtils::analyzeRatio<Real>(rhoNM,TRflagNM,fr,Ftrial,pRed+sigmar,eps_,outStream,verbosity_>1);
288 TRflag_ = (rho < rhoNM ? TRflagNM : TRflag_);
289 rho = (rho < rhoNM ? rhoNM : rho );
290 }
291
292 // Update algorithm state
293 state_->iter++;
294 // Accept/reject step and update trust region radius
295 if ((rho < eta0_ && TRflag_ == TRUtils::SUCCESS) || (TRflag_ >= 2)) { // Step Rejected
296 x.set(*state_->iterateVec);
297 sobj.update(x,UpdateType::Revert,state_->iter);
298 nobj.update(x,UpdateType::Revert,state_->iter);
299 if (interpRad_ && (rho < zero && TRflag_ != TRUtils::TRNAN)) {
300 // Negative reduction, interpolate to find new trust-region radius
301 state_->searchSize = TRUtils::interpolateRadius<Real>(*state_->gradientVec,*state_->stepVec,
302 state_->snorm,pRed,state_->value,Ftrial,state_->searchSize,gamma0_,gamma1_,eta2_,
303 outStream,verbosity_>1);
304 }
305 else { // Shrink trust-region radius
306 state_->searchSize = gamma1_*std::min(state_->snorm,state_->searchSize);
307 }
308 computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,false,gtol_,state_->gnorm,outStream);
309 }
310 else if ((rho >= eta0_ && TRflag_ != TRUtils::NPOSPREDNEG)
311 || (TRflag_ == TRUtils::POSPREDNEG)) { // Step Accepted
312 state_->value = Ftrial;
313 state_->svalue = strial;
314 state_->nvalue = ntrial;
315 sobj.update(x,UpdateType::Accept,state_->iter);
316 nobj.update(x,UpdateType::Accept,state_->iter);
317 inTol = outTol;
318 if (useNM_) {
319 sigmac += pRed; sigmar += pRed;
320 if (Ftrial < fmin) { fmin = Ftrial; fc = fmin; sigmac = zero; L = 0; }
321 else {
322 L++;
323 if (Ftrial > fc) { fc = Ftrial; sigmac = zero; }
324 if (L == storageNM_) { fr = fc; sigmar = sigmac; }
325 }
326 }
327 // Increase trust-region radius
328 if (rho >= eta2_) state_->searchSize = std::min(gamma2_*state_->searchSize, delMax_);
329 // Compute gradient at new iterate
330 dwa1->set(*state_->gradientVec);
331 computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
332 state_->iterateVec->set(x);
333 // Update secant information in trust-region model
334 model_->update(x,*state_->stepVec,*dwa1,*state_->gradientVec,
335 state_->snorm,state_->iter);
336 }
337
338 // Update Output
339 if (verbosity_ > 0) writeOutput(outStream,writeHeader_);
340 }
342}
343
344template<typename Real>
346 Real &alpha,
347 Real &sval,
348 Real &nval,
349 const Vector<Real> &x,
350 const Vector<Real> &g,
351 const Real del,
353 Objective<Real> &nobj,
354 Vector<Real> &px,
355 Vector<Real> &dwa,
356 Vector<Real> &dwa1,
357 std::ostream &outStream) {
358 const Real half(0.5), sold(sval), nold(nval);
359 Real tol = std::sqrt(ROL_EPSILON<Real>());
360 bool interp = false;
361 Real gs(0), snorm(0), Qk(0), pRed(0);
362 // Compute s = P(x[0] - alpha g[0]) - x[0]
363 pgstep(px, s, nobj, x, g, alpha, tol);
364 snorm = s.norm();
365 if (snorm > del) {
366 interp = true;
367 }
368 else {
369 model.hessVec(dwa,s,x,tol); nhess_++;
370 nobj.update(px, UpdateType::Trial);
371 nval = nobj.value(px, tol); state_->nnval++;
372 gs = s.dot(g);
373 sval = sold + gs + half * s.apply(dwa);
374 pRed = (sold + nold) - (sval + nval);
375 Qk = gs + nval - nold;
376 interp = (pRed < -mu0_*Qk);
377 }
378 // Either increase or decrease alpha to find approximate Cauchy point
379 int cnt = 0;
380 if (interp) {//decrease loop
381 bool search = true;
382 while (search) {
383 alpha *= interpf_;
384 pgstep(px, s, nobj, x, g, alpha, tol);
385 snorm = s.norm();
386 if (snorm <= del) {
387 model.hessVec(dwa,s,x,tol); nhess_++;
388 nobj.update(px, UpdateType::Trial);
389 nval = nobj.value(px, tol); state_->nnval++;
390 gs = s.dot(g);
391 sval = sold + gs + half * s.apply(dwa);
392 pRed = (sold + nold) - (sval + nval);
393 Qk = gs + nval - nold;
394 search = ((pRed < -mu0_*Qk) && (cnt < redlim_)) ;
395 }
396 cnt++;
397 }
398 }
399 else {
400 bool search = true;
401 Real alphas = alpha;
402 Real mvals = pRed;
403 Real svals = sval;
404 dwa1.set(dwa);
405 while (search) {
406 alpha *= extrapf_;
407 pgstep(px, s, nobj, x, g, alpha, tol);
408 snorm = s.norm();
409 if (snorm <= del && cnt < explim_){// && mnew < mold + mu0_*Qk) {
410 model.hessVec(dwa,s,x,tol); nhess_++;
411 nobj.update(px, UpdateType::Trial);
412 nval = nobj.value(px, tol); state_->nnval++;
413 gs = s.dot(g);
414 sval = sold + gs + half * s.apply(dwa);
415 pRed = (sold + nold) - (sval + nval);
416 Qk = gs + nval - nold;
417 if (pRed >= -mu0_*Qk && std::abs(pRed-mvals) > qtol_*std::abs(mvals)) {
418 dwa1.set(dwa);
419 alphas = alpha;
420 mvals = pRed;
421 svals = sval;
422 search = true;
423 }
424 else {
425 dwa.set(dwa1);
426 pRed = mvals;
427 sval = svals;
428 search = false;
429 }
430 }
431 else {
432 search = false;
433 }
434 cnt++;
435 }
436 alpha = alphas;
437 pgstep(px, s, nobj, x, g, alpha, tol);
438 snorm = s.norm();
439 }
440 if (verbosity_ > 1) {
441 outStream << " Cauchy point" << std::endl;
442 outStream << " Step length (alpha): " << alpha << std::endl;
443 outStream << " Step length (alpha*g): " << snorm << std::endl;
444 outStream << " Model decrease (pRed): " << pRed << std::endl;
445 if (!interp)
446 outStream << " Number of extrapolation steps: " << cnt << std::endl;
447 }
448 return snorm;
449}
450
451template<typename Real>
453 Real &sval,
454 Real &nval,
455 Real &pRed,
456 Vector<Real> &gmod,
457 const Vector<Real> &x,
458 Real del,
460 Objective<Real> &nobj,
461 Vector<Real> &pwa,
462 Vector<Real> &pwa1,
463 Vector<Real> &pwa2,
464 Vector<Real> &dwa,
465 std::ostream &outStream) {
466 // Use SPG to approximately solve TR subproblem:
467 // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> subject to y\in C, ||y|| \le del
468 //
469 // Inpute:
470 // y = Primal vector
471 // x = Current iterate
472 // g = Current gradient
473 const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
474 const Real mprev(sval+nval);
475 Real tol(std::sqrt(ROL_EPSILON<Real>()));
476 Real coeff(1), alpha(1), alphaMax(1), lambda(1), lambdaTmp(1);
477 Real gs(0), ss(0), gnorm(0), s0s0(0), ss0(0), sHs(0), snorm(0), nold(nval);
478 pwa1.zero();
479
480 // Set y = x
481 y.set(x);
482
483 // Compute initial step
484 coeff = one / gmod.norm();
485 lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
486 pgstep(pwa2, pwa, nobj, y, gmod.dual(), lambda, tol);
487 gs = gmod.apply(pwa); // gs = <step, model gradient>
488 ss = pwa.dot(pwa); // Norm squared of step
489 snorm = std::sqrt(ss); // norm(step)
490 gnorm = snorm / lambda; // norm(step) / lambda
491
492 // Compute initial projected gradient norm
493 const Real gtol = std::min(tol1_,tol2_*gnorm);
494
495 if (verbosity_ > 1)
496 outStream << " Spectral Projected Gradient" << std::endl;
497
498 SPiter_ = 0;
499 while (SPiter_ < maxit_) {
500 SPiter_++;
501
502 // Evaluate model Hessian
503 model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
504 sHs = dwa.apply(pwa); // sHs = <step, H step>
505
506 // Evaluate nonsmooth term
507 nobj.update(pwa2,UpdateType::Trial);
508 nval = nobj.value(pwa2,tol); state_->nnval++;
509
510 // Perform line search
511 alphaMax = one;
512 if (snorm >= del-safeguard) { // Trust-region constraint is violated
513 ss0 = pwa1.dot(pwa);
514 alphaMax = std::min(one, (-ss0 + std::sqrt(ss0*ss0 - ss*(s0s0-del*del)))/ss);
515 }
516 alpha = (sHs <= safeguard) ? alphaMax : std::min(alphaMax, -(gs + nval - nold)/sHs);
517
518 // Update model quantities
519 if (alpha == one) {
520 y.set(pwa2);
521 sval += gs + half * sHs;
522 gmod.plus(dwa);
523 }
524 else {
525 y.axpy(alpha,pwa); // New iterate
527 nval = nobj.value(y, tol); state_->nnval++;
528 sval += alpha * (gs + half * alpha * sHs);
529 gmod.axpy(alpha,dwa);
530 }
531 nold = nval;
532 pRed = mprev - (sval+nval);
533
534 // Check trust-region constraint violation
535 pwa1.set(y); pwa1.axpy(-one,x);
536 s0s0 = pwa1.dot(pwa1);
537 snorm = std::sqrt(s0s0);
538
539 if (verbosity_ > 1) {
540 outStream << std::endl;
541 outStream << " Iterate: " << SPiter_ << std::endl;
542 outStream << " Spectral step length (lambda): " << lambda << std::endl;
543 outStream << " Step length (alpha): " << alpha << std::endl;
544 outStream << " Model decrease (pRed): " << pRed << std::endl;
545 outStream << " Optimality criterion: " << gnorm << std::endl;
546 outStream << " Step norm: " << snorm << std::endl;
547 outStream << std::endl;
548 }
549
550 if (snorm >= del - safeguard) { SPflag_ = 2; break; }
551
552 // Compute new spectral step
553 lambdaTmp = (sHs <= safeguard) ? one/gmod.norm() : ss/sHs;
554 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
555
556 pgstep(pwa2, pwa, nobj, y, gmod.dual(), alpha, tol); // pass pwa by reference? *pwa?
557 gs = gmod.apply(pwa);
558 ss = pwa.dot(pwa);
559 gnorm = std::sqrt(ss) / lambda;
560
561 if (gnorm <= gtol) { SPflag_ = 0; break; }
562 }
563 SPflag_ = (SPiter_==maxit_) ? 1 : SPflag_;
564}
565
566template<typename Real>
568 Real &sval,
569 Real &nval,
570 Vector<Real> &gmod,
571 const Vector<Real> &x,
572 Real del,
574 Objective<Real> &nobj,
575 Vector<Real> &ymin,
576 Vector<Real> &pwa,
577 Vector<Real> &pwa1,
578 Vector<Real> &pwa2,
579 Vector<Real> &pwa3,
580 Vector<Real> &pwa4,
581 Vector<Real> &pwa5,
582 Vector<Real> &dwa,
583 std::ostream &outStream) {
584 // Use SPG to approximately solve TR subproblem:
585 // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y|| \le del
586 //
587 // Inpute:
588 // y = Cauchy step
589 // x = Current iterate
590 // g = Current gradient
591 const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
592 const Real mval(sval+nval);
593 Real tol(std::sqrt(ROL_EPSILON<Real>()));
594 Real mcomp(0), mval_min(0), sval_min(0), nval_min(0);
595 Real alpha(1), coeff(1), lambda(1), lambdaTmp(1);
596 Real snew(sval), nnew(nval), mnew(mval);
597 Real sold(sval), nold(nval), mold(mval);
598 Real sHs(0), ss(0), gs(0), Qk(0), gnorm(0);
599 std::deque<Real> mqueue; mqueue.push_back(mold);
600
601 if (useNMSP_ && useMin_) {
602 mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
603 }
604
605 // Compute initial proximal gradient norm
606 pwa1.set(gmod.dual());
607 pwa.set(y); pwa.axpy(-t0_,pwa1);
608 dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
609 pwa.axpy(-one,y);
610 gnorm = pwa.norm() / t0_;
611 const Real gtol = std::min(tol1_,tol2_*gnorm);
612
613 // Compute initial spectral step size
614 coeff = one / gmod.norm();
615 lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
616
617 if (verbosity_ > 1)
618 outStream << " Spectral Projected Gradient" << std::endl;
619
620 SPiter_ = 0;
621 while (SPiter_ < maxit_) {
622 SPiter_++;
623
624 // Compuate SPG step
625 alpha = one;
626 pwa.set(y); pwa.axpy(-lambda,pwa1); // pwa = y - lambda gmod.dual()
627 dprox(pwa,x,lambda,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream); // pwa = P(y - lambda gmod.dual())
628 pwa2.set(pwa); // pwa2 = P(y - lambda gmod.dual())
629 pwa.axpy(-one,y); // pwa = P(y - lambda gmod.dual()) - y = step
630 ss = pwa.dot(pwa); // Norm squared of step
631
632 // Evaluate model Hessian
633 model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
634 nobj.update(pwa2, UpdateType::Trial);
635 nnew = nobj.value(pwa2, tol); state_->nnval++;
636 sHs = dwa.apply(pwa); // sHs = <step, H step>
637 gs = gmod.apply(pwa); // gs = <step, model gradient>
638 snew = half * sHs + gs + sold;
639 mnew = snew + nnew;
640 Qk = gs + nnew - nold;
641
642 // Perform line search
643 //mcomp = useNMSP_ ? *std::max_element(mqueue.begin(),mqueue.end()) : mold;
644 //while( mnew > mcomp + mu0_*Qk ) {
645 // alpha *= interpf_;
646 // pwa2.set(y); pwa2.axpy(alpha,pwa);
647 // nobj.update(pwa2, UpdateType::Trial);
648 // nnew = nobj.value(pwa2, tol); state_->nnval++;
649 // snew = half * alpha * alpha * sHs + alpha * gs + sold;
650 // mnew = nnew + snew;
651 // Qk = alpha * gs + nnew - nold;
652 //}
653 if (useNMSP_) { // Nonmonotone
654 mcomp = *std::max_element(mqueue.begin(),mqueue.end());
655 while( mnew > mcomp + mu0_*Qk ) {
656 alpha *= interpf_;
657 pwa2.set(y); pwa2.axpy(alpha,pwa);
658 nobj.update(pwa2, UpdateType::Trial);
659 nnew = nobj.value(pwa2, tol); state_->nnval++;
660 snew = half * alpha * alpha * sHs + alpha * gs + sold;
661 mnew = nnew + snew;
662 Qk = alpha * gs + nnew - nold;
663 }
664 }
665 else {
666 alpha = (sHs <= safeguard) ? one : std::min(one,-(gs + nnew - nold)/sHs);
667 }
668
669 // Update model quantities
670 y.set(pwa2);
671 sold = snew;
672 nold = nnew;
673 mold = mnew;
674 gmod.axpy(alpha,dwa); // Update model gradient
675 nobj.update(y, UpdateType::Accept);
676
677 // Update nonmonotone line search information
678 if (useNMSP_) {
679 if (static_cast<int>(mqueue.size())==maxSize_) mqueue.pop_front();
680 mqueue.push_back(sval+nval);
681 if (useMin_ && mval <= mval_min) {
682 mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
683 }
684 }
685
686 // Compute projected gradient norm
687 pwa1.set(gmod.dual());
688 pwa.set(y); pwa.axpy(-t0_,pwa1);
689 dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
690 pwa.axpy(-one,y);
691 gnorm = pwa.norm() / t0_;
692
693 if (verbosity_ > 1) {
694 outStream << std::endl;
695 outStream << " Iterate: " << SPiter_ << std::endl;
696 outStream << " Spectral step length (lambda): " << lambda << std::endl;
697 outStream << " Step length (alpha): " << alpha << std::endl;
698 outStream << " Model decrease (pRed): " << mval-mold << std::endl;
699 outStream << " Optimality criterion: " << gnorm << std::endl;
700 outStream << std::endl;
701 }
702 if (gnorm < gtol) break;
703
704 // Compute new spectral step
705 lambdaTmp = (sHs == 0 ? coeff : ss / sHs);
706 lambda = std::max(lambdaMin_, std::min(lambdaTmp, lambdaMax_));
707 }
708 if (useNMSP_ && useMin_) {
709 sval = sval_min; nval = nval_min; y.set(ymin);
710 }
711 else {
712 sval = sold; nval = nold;
713 }
714 SPflag_ = (SPiter_==maxit_) ? 1 : 0;
715}
716
717template<typename Real>
719 const Vector<Real> &x0,
720 Real t,
721 Real del,
722 Objective<Real> &nobj,
723 Vector<Real> &y0,
724 Vector<Real> &y1,
725 Vector<Real> &yc,
726 Vector<Real> &pwa,
727 std::ostream &outStream) const {
728 // Solve ||P(t*x0 + (1-t)*(x-x0))-x0|| = del using Brent's method
729 const Real zero(0), half(0.5), one(1), two(2), three(3);
730 const Real eps(ROL_EPSILON<Real>()), tol0(1e1*eps), fudge(1.0-1e-2*sqrt(eps));
731 Real f0(0), f1(0), fc(0), t0(0), t1(1), tc(0), d1(1), d2(1), tol(1);
732 Real p(0), q(0), r(0), s(0), m(0);
733 int cnt(state_->nprox);
734 nobj.prox(y1, x, t, tol); state_->nprox++;
735 pwa.set(y1); pwa.axpy(-one,x0);
736 f1 = pwa.norm();
737 if (f1 <= del) {
738 x.set(y1);
739 return;
740 }
741 y0.set(x0);
742 tc = t0; fc = f0; yc.set(y0);
743 d1 = t1-t0; d2 = d1;
744 int code = 0;
745 while (true) {
746 if (std::abs(fc-del) < std::abs(f1-del)) {
747 t0 = t1; t1 = tc; tc = t0;
748 f0 = f1; f1 = fc; fc = f0;
749 y0.set(y1); y1.set(yc); yc.set(y0);
750 }
751 tol = two*eps*std::abs(t1) + half*tol0;
752 m = half*(tc - t1);
753 if (std::abs(m) <= tol) { code = 1; break; }
754 if ((f1 >= fudge*del && f1 <= del)) break;
755 if (std::abs(d1) < tol || std::abs(f0-del) <= std::abs(f1-del)) {
756 d1 = m; d2 = d1;
757 }
758 else {
759 s = (f1-del)/(f0-del);
760 if (t0 == tc) {
761 p = two*m*s;
762 q = one-s;
763 }
764 else {
765 q = (f0-del)/(fc-del);
766 r = (f1-del)/(fc-del);
767 p = s*(two*m*q*(q-r)-(t1-t0)*(r-one));
768 q = (q-one)*(r-one)*(s-one);
769 }
770 if (p > zero) q = -q;
771 else p = -p;
772 s = d1;
773 d1 = d2;
774 if (two*p < three*m*q-std::abs(tol*q) && p < std::abs(half*s*q)) {
775 d2 = p/q;
776 }
777 else {
778 d1 = m; d2 = d1;
779 }
780 }
781 t0 = t1; f0 = f1; y0.set(y1);
782 if (std::abs(d2) > tol) t1 += d2;
783 else if (m > zero) t1 += tol;
784 else t1 -= tol;
785 pwa.set(x); pwa.scale(t1); pwa.axpy(one-t1,x0);
786 nobj.prox(y1, pwa, t1*t, tol); state_->nprox++;
787 pwa.set(y1); pwa.axpy(-one,x0);
788 f1 = pwa.norm();
789 if ((f1 > del && fc > del) || (f1 <= del && fc <= del)) {
790 tc = t0; fc = f0; yc.set(y0);
791 d1 = t1-t0; d2 = d1;
792 }
793 }
794 if (code==1 && f1>del) x.set(yc);
795 else x.set(y1);
796 if (verbosity_ > 1) {
797 outStream << std::endl;
798 outStream << " Trust-Region Subproblem Proximity Operator" << std::endl;
799 outStream << " Number of proxes: " << state_->nprox-cnt << std::endl;
800 if (code == 1 && f1 > del) {
801 outStream << " Transformed Multiplier: " << tc << std::endl;
802 outStream << " Dual Residual: " << fc-del << std::endl;
803 }
804 else {
805 outStream << " Transformed Multiplier: " << t1 << std::endl;
806 outStream << " Dual Residual: " << f1-del << std::endl;
807 }
808 outStream << " Exit Code: " << code << std::endl;
809 outStream << std::endl;
810 }
811}
812
813template<typename Real>
814void TrustRegionAlgorithm<Real>::dbls(Real &alpha, Real &nval, Real &pred,
815 const Vector<Real> &y,
816 const Vector<Real> &s,
817 Real lambda, Real tmax,
818 Real kappa, Real gs,
819 Objective<Real> &nobj,
820 Vector<Real> &pwa) {
821 Real tol(std::sqrt(ROL_EPSILON<Real>()));
822 const Real eps(1e-2*std::sqrt(ROL_EPSILON<Real>())), tol0(1e4*tol);
823 const Real eps0(1e2*ROL_EPSILON<Real>());
824 const unsigned maxit(50);
825 const Real zero(0), half(0.5), one(1), two(2);
826 const Real lam(0.5*(3.0-std::sqrt(5.0)));
827 const Real nold(nval);
828 const Real mu(1e-4);
829
830 // Evaluate model at initial left end point (t = 0)
831 Real tL(0), pL(0);
832 // Evaluate model at right end point (t = tmax)
833 Real tR = tmax;
834 pwa.set(y); pwa.axpy(tR, s);
835 nobj.update(pwa,UpdateType::Trial);
836 Real nR = nobj.value(pwa,tol); state_->nnval++;
837 Real pR = tR * (half * tR * kappa + gs) + nR - nold;
838
839 // Compute minimizer of quadratic upper bound
840 Real t0 = tR, n0 = nR;
841 if (tmax > lambda) {
842 t0 = lambda;
843 pwa.set(y); pwa.axpy(t0, s);
844 nobj.update(pwa,UpdateType::Trial);
845 n0 = nobj.value(pwa,tol); state_->nnval++;
846 }
847 Real ts = (kappa > 0) ? std::min(tR,(((nold - n0) / kappa) / t0) - (gs / kappa)) : tR;
848 Real t = std::min(t0,ts);
849 bool useOptT = true;
850 if (t <= tL) {
851 t = (t0 < tR ? t0 : half*tR);
852 useOptT = false;
853 }
854
855 // Evaluate model at t (t = minimizer of quadratic upper bound)
856 pwa.set(y); pwa.axpy(t, s);
857 nobj.update(pwa,UpdateType::Trial);
858 Real nt = nobj.value(pwa,tol); state_->nnval++;
859 Real Qt = t * gs + nt - nold, Qu = Qt;
860 Real pt = half * t * t * kappa + Qt;
861
862 // If phi(x) = phi(x+tmax s) = phi(x+ts), then
863 // phi(x+ts) is constant for all t, so the TR
864 // model is quadratic---use the minimizer
865 if (useOptT && nt == nold && nR == nold) {
866 alpha = ts;
867 pred = ts * (half * ts * kappa + gs);
868 nval = nold;
869 return;
870 }
871
872 // If pt >= max(pL, pR), then the model is concave
873 // and the minimum is obtained at the end points
874 if (pt >= std::max(pL, pR)) {
875 alpha = tR;
876 pred = pR;
877 nval = nR;
878 return;
879 }
880
881 // Run Brent's method (quadratic interpolation + golden section)
882 // to minimize m_k(x+ts) with respect to t
883 Real w = t, v = t, pw = pt, pv = pt, d(0), pu(0), nu(0);
884 Real u(0), p(0), q(0), r(0), etmp(0), e(0), dL(0), dR(0);
885 Real tm = half * (tL + tR);
886 Real tol1 = tol0 * std::abs(t)+eps;
887 Real tol2 = two * tol1;
888 Real tol3 = eps;
889 for (unsigned it = 0u; it < maxit; ++it) {
890 dL = tL-t; dR = tR-t;
891 if (std::abs(e) > tol1) {
892 r = (t-w)*(pt-pv);
893 q = (t-v)*(pt-pw);
894 p = (t-v)*q-(t-w)*r;
895 q = two*(q-r);
896 if (q > zero) p = -p;
897 q = std::abs(q);
898 etmp = e;
899 e = d;
900 if ( std::abs(p) >= std::abs(half*q*etmp) || p <= q*dL || p >= q*dR ) {
901 e = (t > tm ? dL : dR);
902 d = lam * e;
903 }
904 else {
905 d = p/q;
906 u = t+d;
907 if (u-tL < tol2 || tR-u < tol2) d = (tm >= t ? tol1 : -tol1);
908 }
909 }
910 else {
911 e = (t > tm ? dL : dR);
912 d = lam * e;
913 }
914 u = t + (std::abs(d) >= tol1 ? d : (d >= zero ? tol1 : -tol1));
915 pwa.set(y); pwa.axpy(u, s);
916 nobj.update(pwa,UpdateType::Trial);
917 nu = nobj.value(pwa,tol); state_->nnval++;
918 Qu = u * gs + nu - nold;
919 pu = half * u * u * kappa + Qu;
920 if (pu <= pt) {
921 if (u >= t) tL = t;
922 else tR = t;
923 v = w; w = t; t = u;
924 pv = pw; pw = pt; pt = pu;
925 nt = nu; Qt = Qu;
926 }
927 else {
928 if (u < t) tL = u;
929 else tR = u;
930 if (pu <= pw || w == t) {
931 v = w; w = u;
932 pv = pw; pw = pu;
933 }
934 else if (pu <= pv || v == t || v == w) {
935 v = u; pv = pu;
936 }
937 }
938 tm = half * (tL+tR);
939 tol1 = tol0*std::abs(t)+eps;
940 tol2 = two*tol1;
941 tol3 = eps0 * std::max(std::abs(Qt),one);
942 if (pt <= (mu*std::min(zero,Qt)+tol3) && std::abs(t-tm) <= (tol2-half*(tR-tL))) break;
943 }
944 alpha = t;
945 pred = pt;
946 nval = nt;
947}
948
949// NCG Subsolver
950template<typename Real>
952 Real &sval,
953 Real &nval,
954 Vector<Real> &gmod,
955 const Vector<Real> &x,
956 Real del,
958 Objective<Real> &nobj,
959 Vector<Real> &s,
960 Vector<Real> &pwa1,
961 Vector<Real> &pwa2,
962 Vector<Real> &pwa3,
963 Vector<Real> &pwa4,
964 Vector<Real> &pwa5,
965 Vector<Real> &dwa,
966 std::ostream &outStream) {
967 // Use NCG to approximately solve TR subproblem:
968 // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y-x|| \le del
969 //
970 // Inpute:
971 // y = computed iterate
972 // sval = smooth model value
973 // nval = nonsmooth value
974 // gmod = current gradient
975 // x = current iterate
976 // del = trust region radius
977 // model = trust region model
978 // nobj = nonsmooth objective function
979 // s = the current step
980 // pwa1 = the SPG iterate
981 // pwa2 = the "negative gradient"
982 // pwa3 = y - x
983 // pwa4 = the previous "negative gradient"
984 // pwa5 = temporary storage
985 // dwa = the Hessian applied to the step
986 const Real zero(0), half(0.5), one(1), two(2);
987 const Real del2(del*del);
988 Real tol(std::sqrt(ROL_EPSILON<Real>())), safeguard(tol);
989 Real mold(sval+nval), nold(nval);
990 Real snorm(0), snorm0(0), gnorm(0), gnorm0(0), gnorm2(0);
991 Real alpha(1), beta(1), lambdaTmp(1), lambda(1), eta(etaNCG_);
992 Real alphaMax(1), pred(0), lam1(1);
993 Real sy(0), gg(0), sHs(0), gs(0), ds(0), ss(0), ss0(0);
994 bool reset(true);
995
996 // Set y = x
997 y.set(x);
998 pwa3.zero(); // Initially y - x = 0
999
1000 // Compute initial spectral step length
1001 lambdaTmp = t0_ / gmod.norm();
1002 lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
1003 lam1 = lambda;
1004
1005 // Compute Cauchy point via SPG
1006 pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1007 pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1008 s.set(pwa2); // s = (pwa1 - x) / lambda
1009 gs = gmod.apply(s); // gs = <g, prox(x-lambda g)-x> / lambda
1010 snorm = s.norm(); // snorm = norm(prox(x-lambda g)-x) / lambda
1011 gnorm = snorm;
1012 const Real gtol = std::min(tol1_,tol2_*gnorm);
1013
1014 if (verbosity_>1) outStream << " Nonlinear Conjugate Gradient" << std::endl;
1015
1016 SPiter_ = 0;
1017 SPflag_ = 1;
1018 while (SPiter_ < maxit_) {
1019 SPiter_++;
1020
1021 // Compute the model curvature
1022 model.hessVec(dwa,s,x,tol); nhess_++; // dwa = H step
1023 sHs = dwa.apply(s); // sHs = <step, H step>
1024
1025 // Compute alpha as the 1D minimize in the s direction
1026 ss = snorm*snorm;
1027 ds = s.dot(pwa3);
1028 alphaMax = (-ds + std::sqrt(ds*ds + ss*(del2 - snorm0*snorm0)))/ss;
1029 dbls(alpha,nold,pred,y,s,lam1,alphaMax,sHs,gs,nobj,pwa5);
1030
1031 //if (sHs <= safeguard) alpha = alphaMax;
1032 //else {
1033 // pwa5.set(y); pwa5.axpy(alphaMax, s);
1034 // nobj.update(pwa5,UpdateType::Trial);
1035 // nmax = nobj.value(pwa5,tol); state_->nnval++;
1036 // alpha = alphaMax * std::min(one, -(alphaMax * gs + nmax - nold)/(alphaMax * alphaMax * sHs));
1037 // if (alpha <= safeguard) alpha = std::min(one, -(gs + nval - nold)/sHs);
1038 //}
1039
1040 // Update quantities to evaluate quadratic model value and gradient
1041 y.axpy(alpha, s);
1042 gmod.axpy(alpha, dwa); // need dual here?
1043 sval += alpha*(gs + half*alpha*sHs);
1044
1045 // Check step size
1046 pwa3.set(y); pwa3.axpy(-one,x);
1047 ss0 += alpha*(alpha*ss + two*ds);
1048 snorm0 = std::sqrt(ss0); // pwa3.norm();
1049
1050 if (snorm0 >= (one-safeguard)*del) { SPflag_ = 2; break; }
1051
1052 // Update spectral step length
1053 lambdaTmp = (sHs <= safeguard) ? t0_/gmod.norm() : ss/sHs;
1054 lambda = std::max(lambdaMin_, std::min(lambdaMax_, lambdaTmp));
1055
1056 // Compute SPG direction
1057 pwa4.set(pwa2); // store previous "negative gradient"
1058 pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1059 pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1060 gnorm0 = gnorm;
1061 gnorm = pwa2.norm();
1062 if (gnorm <= gtol) { SPflag_ = 0; break; }
1063
1064 gnorm2 = gnorm * gnorm;
1065 switch (ncgType_) {
1066 case 0: // Fletcher-Reeves
1067 beta = gnorm2/(gnorm0 * gnorm0);
1068 break;
1069 default:
1070 case 1: // Polyak-Ribiere+
1071 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1072 beta = std::max(zero, -pwa5.dot(pwa2)/(gnorm0*gnorm0));
1073 break;
1074 case 2: // Hager-Zhang
1075 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1076 sy = s.dot(pwa5);
1077 gg = pwa2.dot(pwa4);
1078 eta = -one/(s.norm()*std::min(etaNCG_, gnorm0));
1079 beta = std::max(eta, (gnorm2-gg-two*pwa2.dot(s)*(gnorm2-two*gg+(gnorm0*gnorm0))/sy)/sy);
1080 break;
1081 case 3: // Hestenes-Stiefel+
1082 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1083 beta = std::max(zero, -pwa2.dot(pwa5)/s.dot(pwa5));
1084 break;
1085 case 4: // Dai-Yuan+
1086 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1087 beta = std::max(zero, gnorm2/s.dot(pwa5));
1088 break;
1089 case 5: // Fletcher-Reeves-Polyak-Ribiere
1090 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1091 beta = std::max(-gnorm2, std::min(gnorm2, -pwa5.dot(pwa2)))/(gnorm0*gnorm0);
1092 break;
1093 case 6: //Dai-Yuan-Hestenes-Stiefles
1094 pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1095 beta = std::max(zero, std::min(-pwa2.dot(pwa5), gnorm2)/s.dot(pwa5));
1096 break;
1097 }
1098
1099 reset = true;
1100 if (beta != zero && beta < ROL_INF<Real>()){
1101 pwa5.set(pwa2); // pwa5 = pwa2 (SPG step)
1102 pwa5.axpy(beta, s); // pwa5 = pwa2 + beta*s
1103 pwa4.set(y); // pwa4 = y
1104 pwa4.plus(pwa5); // pwa4 = y + pwa5
1105 nobj.update(pwa4,UpdateType::Trial);
1106 nval = nobj.value(pwa4,tol); state_->nnval++;
1107 gs = gmod.apply(pwa5);
1108 if (gs + nval - nold <= -(one - desPar_)*gnorm2) {
1109 s.set(pwa5);
1110 lam1 = one;
1111 reset = false;
1112 }
1113 }
1114 if (reset){ // Reset because either beta=0 or step does not produce descent
1115 s.set(pwa2);
1116 gs = gmod.apply(s);
1117 lam1 = lambda;
1118 beta = zero;
1119 }
1120 snorm = s.norm();
1121
1122 if (verbosity_ > 1) {
1123 outStream << std::endl;
1124 outStream << " Iterate: " << SPiter_ << std::endl;
1125 outStream << " Spectral step length (lambda): " << lambda << std::endl;
1126 outStream << " Step length (alpha): " << alpha << std::endl;
1127 outStream << " NCG parameter (beta): " << beta << std::endl;
1128 outStream << " Model decrease (pRed): " << mold-(sval+nold) << std::endl;
1129 outStream << " Step size: " << snorm0 << std::endl;
1130 outStream << " Optimality criterion: " << gnorm << std::endl;
1131 outStream << " Optimality tolerance: " << gtol << std::endl;
1132 outStream << std::endl;
1133 }
1134 }
1135 nval = nold;
1136}
1137
1138template<typename Real>
1139void TrustRegionAlgorithm<Real>::writeHeader( std::ostream& os ) const {
1140 std::ios_base::fmtflags osFlags(os.flags());
1141 if (verbosity_ > 1) {
1142 os << std::string(114,'-') << std::endl;
1143 switch (algSelect_) {
1144 default:
1145 case TRUSTREGION_P_SPG: os << " SPG "; break;
1146 case TRUSTREGION_P_SPG2: os << " Simplified SPG "; break;
1147 case TRUSTREGION_P_NCG: os << " NCG "; break;
1148 }
1149 os << "trust-region method status output definitions" << std::endl << std::endl;
1150 os << " iter - Number of iterates (steps taken)" << std::endl;
1151 os << " value - Objective function value" << std::endl;
1152 os << " gnorm - Norm of the gradient" << std::endl;
1153 os << " snorm - Norm of the step (update to optimization vector)" << std::endl;
1154 os << " delta - Trust-Region radius" << std::endl;
1155 os << " #sval - Number of times the smooth objective function was evaluated" << std::endl;
1156 os << " #nval - Number of times the nonsmooth objective function was evaluated" << std::endl;
1157 os << " #grad - Number of times the gradient was computed" << std::endl;
1158 os << " #hess - Number of times the Hessian was applied" << std::endl;
1159 os << " #prox - Number of times the proximal operator was computed" << std::endl;
1160 os << std::endl;
1161 os << " tr_flag - Trust-Region flag" << std::endl;
1162 for( int flag = TRUtils::SUCCESS; flag != TRUtils::UNDEFINED; ++flag ) {
1163 os << " " << NumberToString(flag) << " - "
1164 << TRUtils::ETRFlagToString(static_cast<TRUtils::ETRFlag>(flag)) << std::endl;
1165 }
1166 os << std::endl;
1167 os << " iterSP - Number of Spectral Projected Gradient iterations" << std::endl << std::endl;
1168 os << " flagSP - Trust-Region Spectral Projected Gradient flag" << std::endl;
1169 os << " 0 - Converged" << std::endl;
1170 os << " 1 - Iteration Limit Exceeded" << std::endl;
1171 os << std::string(114,'-') << std::endl;
1172 }
1173 os << " ";
1174 os << std::setw(6) << std::left << "iter";
1175 os << std::setw(15) << std::left << "value";
1176 os << std::setw(15) << std::left << "gnorm";
1177 os << std::setw(15) << std::left << "snorm";
1178 os << std::setw(15) << std::left << "delta";
1179 os << std::setw(10) << std::left << "#sval";
1180 os << std::setw(10) << std::left << "#nval";
1181 os << std::setw(10) << std::left << "#grad";
1182 os << std::setw(10) << std::left << "#hess";
1183 os << std::setw(10) << std::left << "#prox";
1184 os << std::setw(10) << std::left << "tr_flag";
1185 os << std::setw(10) << std::left << "iterSP";
1186 os << std::setw(10) << std::left << "flagSP";
1187 os << std::endl;
1188 os.flags(osFlags);
1189}
1190
1191template<typename Real>
1192void TrustRegionAlgorithm<Real>::writeName( std::ostream& os ) const {
1193 std::ios_base::fmtflags osFlags(os.flags());
1194 os << std::endl;
1195 switch (algSelect_) {
1196 default:
1197 case TRUSTREGION_P_SPG: os << "SPG "; break;
1198 case TRUSTREGION_P_SPG2: os << "Simplified SPG "; break;
1199 case TRUSTREGION_P_NCG: os << "NCG "; break;
1200 }
1201 os << "Trust-Region Method (Type P)" << std::endl;
1202 os.flags(osFlags);
1203}
1204
1205template<typename Real>
1206void TrustRegionAlgorithm<Real>::writeOutput( std::ostream& os, bool write_header ) const {
1207 std::ios_base::fmtflags osFlags(os.flags());
1208 os << std::scientific << std::setprecision(6);
1209 if ( state_->iter == 0 ) writeName(os);
1210 if ( write_header ) writeHeader(os);
1211 if ( state_->iter == 0 ) {
1212 os << " ";
1213 os << std::setw(6) << std::left << state_->iter;
1214 os << std::setw(15) << std::left << state_->value;
1215 os << std::setw(15) << std::left << state_->gnorm;
1216 os << std::setw(15) << std::left << "---";
1217 os << std::setw(15) << std::left << state_->searchSize;
1218 os << std::setw(10) << std::left << state_->nsval;
1219 os << std::setw(10) << std::left << state_->nnval;
1220 os << std::setw(10) << std::left << state_->ngrad;
1221 os << std::setw(10) << std::left << nhess_;
1222 os << std::setw(10) << std::left << state_->nprox;
1223 os << std::setw(10) << std::left << "---";
1224 os << std::setw(10) << std::left << "---";
1225 os << std::setw(10) << std::left << "---";
1226 os << std::endl;
1227 }
1228 else {
1229 os << " ";
1230 os << std::setw(6) << std::left << state_->iter;
1231 os << std::setw(15) << std::left << state_->value;
1232 os << std::setw(15) << std::left << state_->gnorm;
1233 os << std::setw(15) << std::left << state_->snorm;
1234 os << std::setw(15) << std::left << state_->searchSize;
1235 os << std::setw(10) << std::left << state_->nsval;
1236 os << std::setw(10) << std::left << state_->nnval;
1237 os << std::setw(10) << std::left << state_->ngrad;
1238 os << std::setw(10) << std::left << nhess_;
1239 os << std::setw(10) << std::left << state_->nprox;
1240 os << std::setw(10) << std::left << TRflag_;
1241 os << std::setw(10) << std::left << SPiter_;
1242 os << std::setw(10) << std::left << SPflag_;
1243 os << std::endl;
1244 }
1245 os.flags(osFlags);
1246}
1247
1248} // namespace TypeP
1249} // namespace ROL
1250
1251#endif
Objective_SerialSimOpt(const Ptr< Obj > &obj, const V &ui) z0 zero)()
virtual void initialize(const Vector< Real > &x)
Initialize temporary variables.
Provides the interface to evaluate objective functions.
virtual void prox(Vector< Real > &Pv, const Vector< Real > &v, Real t, Real &tol)
Compute the proximity operator.
virtual void gradient(Vector< Real > &g, const Vector< Real > &x, Real &tol)
Compute gradient.
virtual Real value(const Vector< Real > &x, Real &tol)=0
Compute value.
virtual void update(const Vector< Real > &x, UpdateType type, int iter=-1)
Update objective function.
Provides interface for and implements limited-memory secant operators.
Provides an interface to check status of optimization algorithms.
Provides the interface to evaluate trust-region model functions.
virtual void hessVec(Vector< Real > &hv, const Vector< Real > &v, const Vector< Real > &s, Real &tol) override
void pgstep(Vector< Real > &pgiter, Vector< Real > &pgstep, Objective< Real > &nobj, const Vector< Real > &x, const Vector< Real > &dg, Real t, Real &tol) const
const Ptr< AlgorithmState< Real > > state_
virtual void writeExitStatus(std::ostream &os) const
const Ptr< CombinedStatusTest< Real > > status_
void initialize(const Vector< Real > &x, const Vector< Real > &g)
TRUtils::ETRFlag TRflag_
Trust-region exit flag.
void dncg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &s, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
unsigned verbosity_
Output level (default: 0).
TrustRegionAlgorithm(ParameterList &list, const Ptr< Secant< Real > > &secant=nullPtr)
ESecant esec_
Secant type (default: Limited-Memory BFGS).
Real gamma1_
Radius decrease rate (positive rho) (default: 0.25).
void dprox(Vector< Real > &x, const Vector< Real > &x0, Real t, Real del, Objective< Real > &nobj, Vector< Real > &y0, Vector< Real > &y1, Vector< Real > &yc, Vector< Real > &pwa, std::ostream &outStream=std::cout) const
void dspg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &ymin, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
void initialize(Vector< Real > &x, const Vector< Real > &g, Real ftol, Objective< Real > &sobj, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dg, std::ostream &outStream=std::cout)
void writeName(std::ostream &os) const override
Print step name.
Real gamma0_
Radius decrease rate (negative rho) (default: 0.0625).
void writeOutput(std::ostream &os, bool write_header=false) const override
Print iterate status.
bool interpRad_
Interpolate the trust-region radius if ratio is negative (default: false).
int SPiter_
Subproblem solver iteration count.
int explim_
Maximum number of Cauchy point expansion steps (default: 10).
Real computeValue(Real inTol, Real &outTol, Real pRed, Real &fold, int iter, const Vector< Real > &x, const Vector< Real > &xold, Objective< Real > &obj)
Real tol1_
Absolute tolerance for truncated CG (default: 1e-4).
Real mu0_
Sufficient decrease parameter (default: 1e-2).
Real qtol_
Relative tolerance for computed decrease in Cauchy point computation (default: 1-8).
void dbls(Real &alpha, Real &nval, Real &pred, const Vector< Real > &y, const Vector< Real > &s, Real lambda, Real tmax, Real kappa, Real gs, Objective< Real > &nobj, Vector< Real > &pwa)
Real interpf_
Backtracking rate for Cauchy point computation (default: 1e-1).
Real delMax_
Maximum trust-region radius (default: ROL_INF).
Real alpha_
Initial Cauchy point step length (default: 1.0).
Real dcauchy(Vector< Real > &s, Real &alpha, Real &sval, Real &nval, const Vector< Real > &x, const Vector< Real > &g, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dwa, Vector< Real > &dwa1, std::ostream &outStream=std::cout)
bool normAlpha_
Normalize initial Cauchy point step length (default: false).
Real TRsafe_
Safeguard size for numerically evaluating ratio (default: 1e2).
void computeGradient(const Vector< Real > &x, Vector< Real > &g, Vector< Real > &px, Vector< Real > &dg, Vector< Real > &pwa, Real del, Objective< Real > &sobj, Objective< Real > &nobj, bool accept, Real &gtol, Real &gnorm, std::ostream &outStream=std::cout) const
Real extrapf_
Extrapolation rate for Cauchy point computation (default: 1e1).
Real eta2_
Radius increase threshold (default: 0.9).
void writeHeader(std::ostream &os) const override
Print iterate header.
Ptr< TrustRegionModel_U< Real > > model_
Container for trust-region model.
Real eps_
Safeguard for numerically evaluating ratio.
int redlim_
Maximum number of Cauchy point reduction steps (default: 10).
void run(Vector< Real > &x, const Vector< Real > &g, Objective< Real > &sobj, Objective< Real > &nobj, std::ostream &outStream=std::cout) override
Run algorithm on unconstrained problems (Type-U). This general interface supports the use of dual opt...
bool writeHeader_
Flag to write header at every iteration.
Real tol2_
Relative tolerance for truncated CG (default: 1e-2).
Real spexp_
Relative tolerance exponent for subproblem solve (default: 1, range: [1,2]).
Real eta1_
Radius decrease threshold (default: 0.05).
int SPflag_
Subproblem solver termination flag.
bool useSecantHessVec_
Flag to use secant as Hessian (default: false).
Real eta0_
Step acceptance threshold (default: 0.05).
int maxit_
Maximum number of CG iterations (default: 25).
bool useSecantPrecond_
Flag to use secant as a preconditioner (default: false).
Real gamma2_
Radius increase rate (default: 2.5).
int nhess_
Number of Hessian applications.
void dspg2(Vector< Real > &y, Real &sval, Real &nval, Real &pRed, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &dwa, std::ostream &outStream=std::cout)
Defines the linear algebra or vector space interface.
virtual Real apply(const Vector< Real > &x) const
Apply to a dual vector. This is equivalent to the call .
virtual Real norm() const =0
Returns where .
virtual void set(const Vector &x)
Set where .
virtual void scale(const Real alpha)=0
Compute where .
virtual const Vector & dual() const
Return dual representation of , for example, the result of applying a Riesz map, or change of basis,...
virtual void plus(const Vector &x)=0
Compute , where .
virtual void zero()
Set to zero vector.
virtual ROL::Ptr< Vector > clone() const =0
Clone to make a new (uninitialized) vector.
virtual void axpy(const Real alpha, const Vector &x)
Compute where .
virtual Real dot(const Vector &x) const =0
Compute where .
void analyzeRatio(Real &rho, ETRFlag &flag, const Real fold, const Real ftrial, const Real pRed, const Real epsi, std::ostream &outStream=std::cout, const bool print=false)
std::string ETRFlagToString(ETRFlag trf)
Real interpolateRadius(const Vector< Real > &g, const Vector< Real > &s, const Real snorm, const Real pRed, const Real fold, const Real ftrial, const Real del, const Real gamma0, const Real gamma1, const Real eta2, std::ostream &outStream=std::cout, const bool print=false)
ETrustRegionP StringToETrustRegionP(std::string s)
std::string NumberToString(T Number)
Definition ROL_Types.hpp:47
Real ROL_EPSILON(void)
Platform-dependent machine epsilon.
Definition ROL_Types.hpp:57
ESecant StringToESecant(std::string s)
ESecantMode
@ SECANTMODE_FORWARD
@ SECANTMODE_INVERSE
@ SECANTMODE_BOTH
Real ROL_OVERFLOW(void)
Platform-dependent maximum double.
Definition ROL_Types.hpp:68
Real ROL_INF(void)
Definition ROL_Types.hpp:71