libDAI
emalg.h
Go to the documentation of this file.
1 /* This file is part of libDAI - http://www.libdai.org/
2  *
3  * Copyright (c) 2006-2011, The libDAI authors. All rights reserved.
4  *
5  * Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
6  */
7 
8 
9 #ifndef __defined_libdai_emalg_h
10 #define __defined_libdai_emalg_h
11 
12 
13 #include <vector>
14 #include <map>
15 
16 #include <dai/factor.h>
17 #include <dai/daialg.h>
18 #include <dai/evidence.h>
19 #include <dai/index.h>
20 #include <dai/properties.h>
21 
22 
26 
27 
28 namespace dai {
29 
30 
32 
51  public:
53  typedef ParameterEstimation* (*ParamEstFactory)( const PropertySet& );
54 
56  virtual ~ParameterEstimation() {}
57 
59  virtual ParameterEstimation* clone() const = 0;
60 
62 
67  static ParameterEstimation* construct( const std::string &method, const PropertySet &p );
68 
70  static void registerMethod( const std::string &method, const ParamEstFactory &f ) {
71  if( _registry == NULL )
73  (*_registry)[method] = f;
74  }
75 
77  virtual Prob estimate(const Prob &p) { return parametersToFactor(parameters(p)); }
78 
80  virtual Prob parametersToFactor(const Prob &params) = 0;
81 
83  virtual Prob parameters(const Prob &p) = 0;
84 
86  virtual size_t probSize() const = 0;
87 
88  // Returns the name of the class of this parameter estimation
89  virtual const std::string& name() const = 0;
90 
91  virtual const PropertySet& properties() const = 0;
92  private:
94  static std::map<std::string, ParamEstFactory> *_registry;
95 
97  static void loadDefaultRegistry();
98 };
99 
100 
102 
105  private:
107  size_t _target_dim;
110 
111  static std::string _name; // = "CondProbEstimation";
112 
115 
116  public:
118 
122  CondProbEstimation( size_t target_dimension, const Prob &pseudocounts );
123 
125 
133  static ParameterEstimation* factory( const PropertySet &p );
134 
136  virtual ParameterEstimation* clone() const { return new CondProbEstimation( _target_dim, _initial_stats ); }
137 
139  virtual ~CondProbEstimation() {}
140 
142 
145  virtual Prob parameters(const Prob &p);
146 
147  // For a discrete conditional probability distribution,
148  // the parameters are equivalent to the resulting factor
149  virtual Prob parametersToFactor(const Prob &p);
150 
152  virtual size_t probSize() const { return _initial_stats.size(); }
153 
154  virtual const std::string& name() const { return _name; }
155 
156  virtual const PropertySet& properties() const { return _props; }
157 };
158 
159 
161 
172  public:
174  typedef size_t FactorIndex;
176  typedef std::map<FactorIndex, std::vector<Var> > FactorOrientations;
177 
178  private:
180  std::map<FactorIndex, VarSet> _varsets;
182  std::map<FactorIndex, Permute> _perms;
184  FactorOrientations _varorders;
191 
193 
197  static Permute calculatePermutation( const std::vector<Var> &varOrder, VarSet &outVS );
198 
201 
202  public:
204 
208  SharedParameters( const FactorOrientations &varorders, ParameterEstimation *estimation, bool ownPE=false );
209 
211 
214  SharedParameters( std::istream &is, const FactorGraph &fg );
215 
217  SharedParameters( const SharedParameters &sp ) : _varsets(sp._varsets), _perms(sp._perms), _varorders(sp._varorders), _estimation(sp._estimation), _ownEstimation(sp._ownEstimation), _expectations(NULL) {
218  // If sp owns its _estimation object, we should clone it instead of copying the pointer
219  if( _ownEstimation )
220  _estimation = _estimation->clone();
221  _expectations = new Prob(*sp._expectations);
222  }
223 
226  // If we own the _estimation object, we should delete it now
227  if( _ownEstimation )
228  delete _estimation;
229  if( _expectations != NULL)
230  delete _expectations;
231  }
232 
234 
238  void collectExpectations( InfAlg &alg );
239 
241  const Prob& currentExpectations() const { return *_expectations; }
242 
243  ParameterEstimation& getPEst() const { return *_estimation; }
244 
246 
251  void setParameters( FactorGraph &fg );
252 
254 
258  const FactorOrientations& getFactorOrientations() const { return _varorders; }
259 
261  void clear( ) { _expectations->fill(0); }
262 };
263 
264 
266 
269  private:
271  std::vector<SharedParameters> _params;
272 
273  public:
275  MaximizationStep() : _params() {}
276 
278  MaximizationStep( std::vector<SharedParameters> &maximizations ) : _params(maximizations) {}
279 
281 
283  MaximizationStep( std::istream &is, const FactorGraph &fg_varlookup );
284 
286  void addExpectations( InfAlg &alg );
287 
289  void maximize( FactorGraph &fg );
290 
292  void clear( );
293 
295 
296  typedef std::vector<SharedParameters>::iterator iterator;
299  typedef std::vector<SharedParameters>::const_iterator const_iterator;
300 
302  iterator begin() { return _params.begin(); }
304  const_iterator begin() const { return _params.begin(); }
306  iterator end() { return _params.end(); }
308  const_iterator end() const { return _params.end(); }
310 };
311 
312 
314 
331 class EMAlg {
332  private:
335 
338 
340  std::vector<MaximizationStep> _msteps;
341 
343  size_t _iters;
344 
346  std::vector<Real> _lastLogZ;
347 
349  size_t _max_iters;
350 
353 
354  public:
356  static const std::string MAX_ITERS_KEY;
358  static const size_t MAX_ITERS_DEFAULT;
360  static const std::string LOG_Z_TOL_KEY;
362  static const Real LOG_Z_TOL_DEFAULT;
363 
365 
370  EMAlg( const Evidence &evidence, InfAlg &estep, std::vector<MaximizationStep> &msteps, const PropertySet &termconditions )
371  : _evidence(evidence), _estep(estep), _msteps(msteps), _iters(0), _lastLogZ(), _max_iters(MAX_ITERS_DEFAULT), _log_z_tol(LOG_Z_TOL_DEFAULT)
372  {
373  setTermConditions( termconditions );
374  }
375 
377 
379  EMAlg( const Evidence &evidence, InfAlg &estep, std::istream &mstep_file );
380 
382 
388  void setTermConditions( const PropertySet &p );
389 
391 
397  bool hasSatisfiedTermConditions() const;
398 
400  Real logZ() const { return _lastLogZ.back(); }
401 
403  size_t Iterations() const { return _iters; }
404 
406  const InfAlg& eStep() const { return _estep; }
407 
409 
411  Real iterate();
412 
414  Real iterate( MaximizationStep &mstep );
415 
417  void run();
418 
420 
421  typedef std::vector<MaximizationStep>::iterator s_iterator;
424  typedef std::vector<MaximizationStep>::const_iterator const_s_iterator;
425 
427  s_iterator s_begin() { return _msteps.begin(); }
429  const_s_iterator s_begin() const { return _msteps.begin(); }
431  s_iterator s_end() { return _msteps.end(); }
433  const_s_iterator s_end() const { return _msteps.end(); }
435 };
436 
437 
438 } // end of namespace dai
439 
440 
446 #endif
ParameterEstimation *(* ParamEstFactory)(const PropertySet &)
Type of pointer to factory function.
Definition: emalg.h:53
void addExpectations(InfAlg &alg)
Collect the beliefs from this InfAlg as expectations for the next Maximization step.
Definition: emalg.cpp:209
const_iterator begin() const
Returns constant iterator that points to the first parameter estimation task.
Definition: emalg.h:304
FactorOrientations _varorders
Maps factor indices to the corresponding desired variable orderings.
Definition: emalg.h:184
std::vector< SharedParameters >::const_iterator const_iterator
Constant iterator over the parameter estimation tasks.
Definition: emalg.h:299
virtual Prob parametersToFactor(const Prob &params)=0
Convert a set of estimated parameters to a factor.
this_type & fill(T x)
Sets all entries to x.
Definition: prob.h:544
Defines class Evidence, which stores multiple observations of joint states of variables.
virtual size_t probSize() const
Returns the required size for arguments to estimate().
Definition: emalg.h:152
InfAlg & _estep
How to do the expectation step.
Definition: emalg.h:337
std::vector< SharedParameters >::iterator iterator
Iterator over the parameter estimation tasks.
Definition: emalg.h:297
Defines the IndexFor, multifor, Permute and State classes, which all deal with indexing multi-dimensi...
void collectExpectations(InfAlg &alg)
Collect the expected values (beliefs) according to alg.
Definition: emalg.cpp:171
PropertySet _props
PropertySet that allows reconstruction of this estimator.
Definition: emalg.h:114
void clear()
Clear the step, to be called at the begining of each step.
Definition: emalg.cpp:221
SharedParameters(const FactorOrientations &varorders, ParameterEstimation *estimation, bool ownPE=false)
Constructor.
Definition: emalg.cpp:106
ParameterEstimation * _estimation
Parameter estimation method to be used.
Definition: emalg.h:186
void setParameters(FactorGraph &fg)
Estimate and set the shared parameters.
Definition: emalg.cpp:185
const_iterator end() const
Returns constant iterator that points beyond the last parameter estimation task.
Definition: emalg.h:308
size_t _target_dim
Number of states of the variable of interest.
Definition: emalg.h:107
TProb< Real > Prob
Represents a vector with entries of type dai::Real.
Definition: prob.h:766
static const std::string LOG_Z_TOL_KEY
Key for setting likelihood termination condition.
Definition: emalg.h:360
static const std::string MAX_ITERS_KEY
Key for setting maximum iterations.
Definition: emalg.h:356
Represents a factor graph.
Definition: factorgraph.h:65
virtual Prob estimate(const Prob &p)
Estimate the factor using the provided expectations.
Definition: emalg.h:77
std::vector< MaximizationStep > _msteps
The maximization steps to take.
Definition: emalg.h:340
std::vector< SharedParameters > _params
Vector of parameter estimation tasks of which this maximization step consists.
Definition: emalg.h:271
const FactorOrientations & getFactorOrientations() const
Return a reference to the vector of factor orientations.
Definition: emalg.h:258
virtual ParameterEstimation * clone() const
Virtual copy constructor.
Definition: emalg.h:136
static void registerMethod(const std::string &method, const ParamEstFactory &f)
Register a subclass so that it can be used with construct().
Definition: emalg.h:70
static std::map< std::string, ParamEstFactory > * _registry
A static registry containing all methods registered so far.
Definition: emalg.h:94
Real _log_z_tol
Convergence tolerance.
Definition: emalg.h:352
static const size_t MAX_ITERS_DEFAULT
Default maximum iterations.
Definition: emalg.h:358
size_t _max_iters
Maximum number of iterations.
Definition: emalg.h:349
double Real
Real number (alias for double, which could be changed to long double if necessary) ...
Definition: util.h:98
std::map< FactorIndex, std::vector< Var > > FactorOrientations
Convenience label for a grouping of factor orientations.
Definition: emalg.h:176
virtual size_t probSize() const =0
Returns the size of the Prob that should be passed to estimate and parameters.
virtual ~CondProbEstimation()
Virtual destructor.
Definition: emalg.h:139
std::map< FactorIndex, VarSet > _varsets
Maps factor indices to the corresponding VarSets.
Definition: emalg.h:180
virtual Prob parametersToFactor(const Prob &p)
Convert a set of estimated parameters to a factor.
Definition: emalg.cpp:58
Base class for parameter estimation methods.
Definition: emalg.h:50
static const Real LOG_Z_TOL_DEFAULT
Default likelihood tolerance.
Definition: emalg.h:362
EMAlg performs Expectation Maximization to learn factor parameters.
Definition: emalg.h:331
EMAlg(const Evidence &evidence, InfAlg &estep, std::vector< MaximizationStep > &msteps, const PropertySet &termconditions)
Construct an EMAlg from several objects.
Definition: emalg.h:370
Real logZ() const
Return the last calculated log likelihood.
Definition: emalg.h:400
Stores a data set consisting of multiple samples, where each sample is the observed joint state of so...
Definition: evidence.h:30
static ParameterEstimation * construct(const std::string &method, const PropertySet &p)
General factory method that constructs the desired ParameterEstimation subclass.
Definition: emalg.cpp:26
Prob * _expectations
The accumulated expectations.
Definition: emalg.h:190
Represents a single factor or set of factors whose parameters should be estimated.
Definition: emalg.h:171
~SharedParameters()
Destructor.
Definition: emalg.h:225
Represents a set of variables.
Definition: varset.h:94
static Permute calculatePermutation(const std::vector< Var > &varOrder, VarSet &outVS)
Calculates the permutation that permutes the canonical ordering into the desired ordering.
Definition: emalg.cpp:84
const_s_iterator s_end() const
Returns constant iterator that points beyond the last maximization step.
Definition: emalg.h:433
s_iterator s_end()
Returns iterator that points beyond the last maximization step.
Definition: emalg.h:431
size_t Iterations() const
Returns number of iterations done so far.
Definition: emalg.h:403
std::vector< MaximizationStep >::const_iterator const_s_iterator
Constant iterator over the maximization steps.
Definition: emalg.h:424
const Evidence & _evidence
All the data samples used during learning.
Definition: emalg.h:334
std::vector< Real > _lastLogZ
History of likelihoods.
Definition: emalg.h:346
std::map< FactorIndex, Permute > _perms
Maps factor indices to the corresponding Permute objects that permute the canonical ordering into the...
Definition: emalg.h:182
Prob _initial_stats
Initial pseudocounts.
Definition: emalg.h:109
void run()
Iterate until termination conditions are satisfied.
Definition: emalg.cpp:317
virtual Prob parameters(const Prob &p)=0
Return parameters for the estimated factor, in a format specific to the parameter estimation...
MaximizationStep()
Default constructor.
Definition: emalg.h:275
static void loadDefaultRegistry()
Registers default ParameterEstimation subclasses (currently, only CondProbEstimation).
Definition: emalg.cpp:20
void maximize(FactorGraph &fg)
Using all of the currently added expectations, make new factors with maximized parameters and set the...
Definition: emalg.cpp:215
std::vector< MaximizationStep >::iterator s_iterator
Iterator over the maximization steps.
Definition: emalg.h:422
Defines the Property and PropertySet classes, which are mainly used for managing parameters of infere...
bool _ownEstimation
Indicates whether *this gets ownership of _estimation.
Definition: emalg.h:188
size_t size() const
Returns length of the vector (i.e., the number of entries)
Definition: prob.h:311
InfAlg is an abstract base class, defining the common interface of all inference algorithms in libDAI...
Definition: daialg.h:35
virtual Prob parameters(const Prob &p)
Returns an estimate of the conditional probability distribution.
Definition: emalg.cpp:65
Represents a set of properties, mapping keys (of type PropertyKey) to values (of type PropertyValue) ...
Definition: properties.h:73
const InfAlg & eStep() const
Get the E-step method used.
Definition: emalg.h:406
void clear()
Reset the current expectations.
Definition: emalg.h:261
const_s_iterator s_begin() const
Returns constant iterator that points to the first maximization step.
Definition: emalg.h:429
s_iterator s_begin()
Returns iterator that points to the first maximization step.
Definition: emalg.h:427
virtual ~ParameterEstimation()
Virtual destructor for deleting pointers to derived classes.
Definition: emalg.h:56
iterator begin()
Returns iterator that points to the first parameter estimation task.
Definition: emalg.h:302
Namespace for libDAI.
Definition: alldai.cpp:16
static ParameterEstimation * factory(const PropertySet &p)
Virtual constructor, using a PropertySet.
Definition: emalg.cpp:39
Tool for calculating permutations of linear indices of multi-dimensional arrays.
Definition: index.h:137
A MaximizationStep groups together several parameter estimation tasks (SharedParameters objects) into...
Definition: emalg.h:268
CondProbEstimation(size_t target_dimension, const Prob &pseudocounts)
Constructor.
Definition: emalg.cpp:48
Defines the general interface for inference methods in libDAI (classes InfAlg, DaiAlg<>, DaiAlgFG and DaiAlgRG).
void setTermConditions(const PropertySet &p)
Change the conditions for termination.
Definition: emalg.cpp:244
size_t FactorIndex
Convenience label for an index of a factor in a FactorGraph.
Definition: emalg.h:174
Estimates the parameters of a conditional probability table, using pseudocounts.
Definition: emalg.h:104
Defines TFactor<> and Factor classes which represent factors in probability distributions.
const Prob & currentExpectations() const
Return the current accumulated expectations.
Definition: emalg.h:241
size_t _iters
Number of iterations done.
Definition: emalg.h:343
Real iterate()
Iterate once over all maximization steps.
Definition: emalg.cpp:307
MaximizationStep(std::vector< SharedParameters > &maximizations)
Construct MaximizationStep from a vector of parameter estimation tasks.
Definition: emalg.h:278
iterator end()
Returns iterator that points beyond the last parameter estimation task.
Definition: emalg.h:306
SharedParameters(const SharedParameters &sp)
Copy constructor.
Definition: emalg.h:217
bool hasSatisfiedTermConditions() const
Determine if the termination conditions have been met.
Definition: emalg.cpp:252
virtual ParameterEstimation * clone() const =0
Virtual copy constructor.
void setPermsAndVarSetsFromVarOrders()
Initializes _varsets and _perms from _varorders and checks whether their state spaces correspond with...
Definition: emalg.cpp:90