libeblearn
|
#include <datasource.h>
Public Member Functions | |
class_datasource () | |
CAUTION: This empty constructor requires a subsequent call to init(). | |
class_datasource (midx< Tdata > &data, idx< Tlabel > &labels, vector< string * > *lblstr=NULL, const char *name=NULL) | |
class_datasource (idx< Tdata > &data, idx< Tlabel > &labels, vector< string * > *lblstr=NULL, const char *name=NULL) | |
class_datasource (midx< Tdata > &data, idx< Tlabel > &labels, idx< ubyte > &classes, const char *name=NULL) | |
class_datasource (idx< Tdata > &data, idx< Tlabel > &labels, idx< ubyte > &classes, const char *name=NULL) | |
class_datasource (const char *data_name, const char *labels_name, const char *jitters_name=NULL, const char *scales_name=NULL, const char *classes_name=NULL, const char *name=NULL) | |
class_datasource (const class_datasource< Tnet, Tdata, Tlabel > &ds) | |
(Shallow) copy constructor (only the class strings are deeply copied). | |
virtual | ~class_datasource () |
Destructor. | |
void | defaults () |
Initialize variables to their default value. | |
virtual void | init_strings (idx< ubyte > &classes) |
Allocate class strings vector from ubyte matrix 'classes'. | |
void | init_local (vector< string * > *lblstr) |
void | init (midx< Tdata > &data, idx< Tlabel > &labels, vector< string * > *lblstr, const char *name) |
Initialize from matrices where the data is a multi-matrix matrix. | |
void | init (idx< Tdata > &data, idx< Tlabel > &labels, vector< string * > *lblstr, const char *name) |
Initialize from matrices. | |
void | init (const char *data_fname, const char *labels_fname, const char *jitters_fname=NULL, const char *scales_fname=NULL, const char *classes_fname=NULL, const char *name=NULL, uint max_size=0) |
void | init_root (const char *root, const char *data_fname, const char *labels_fname, const char *jitters_fname=NULL, const char *scales_fname=NULL, const char *classes_fname=NULL, const char *name=NULL) |
Intialize from root and partial matrices filenames. | |
void | init_root (const char *root_dsname, const char *name=NULL) |
virtual void | init_class_labels () |
virtual Tlabel | get_label () |
virtual bool | included_sample (intg index) |
This returns true if sample with 'index' is considered for training. | |
virtual intg | count_included_samples () |
This returns the number of samples actually considered for training. | |
virtual void | seek_begin () |
virtual void | seek_begin_train () |
virtual bool | next () |
virtual bool | next_train () |
virtual void | next_balanced_class () |
virtual void | reset_class_order () |
virtual void | set_random_class_order (bool ran) |
virtual void | limit_classes (intg n, intg offset=0, bool random=false) |
virtual void | set_balanced (bool bal=true) |
virtual bool | epoch_done () |
virtual void | init_epoch () |
virtual void | normalize_all_probas () |
virtual void | normalize_probas (int classid=-1) |
virtual intg | get_nclasses () |
Return the number of classes. | |
virtual int | get_class_id (const char *name) |
Return the label id corresponding to name, or -1 if not found. | |
virtual string & | get_class_name (int id) |
Return the label string for index id. | |
virtual vector< string * > & | get_label_strings () |
Returns a reference to a vector of each label string. | |
virtual intg | get_lowest_common_size () |
virtual void | save_pickings (const char *name=NULL) |
template<typename T > | |
void | write_classed_pickings (idx< T > &m, idx< ubyte > &correct, string &name_, const char *name2_=NULL, bool plot_correct=true, const char *ylabel="") |
Write plot of m organized by class and correctness. | |
virtual void | save_state () |
virtual void | restore_state () |
Restore previously saved internal iterators. | |
virtual void | pretty () |
Print info about the datasource on the standard output. | |
virtual void | pretty_scales () |
Print info about the scales data on the standard output. | |
virtual void | pretty_progress (bool newline=true) |
Protected Member Functions | |
virtual bool | pick_current () |
Protected Attributes | |
intg | nclasses |
Number of classes. | |
vector< string * > * | lblstr |
Name of each class. | |
vector< string * > | clblstr |
Name of each class, may differ. | |
bool | bexclusion |
Exclusion is used. | |
vector< bool > | excluded |
Vector of excluded classes. | |
intg | included |
Number of included classes. | |
idx< Tlabel > | olabels |
Original class labels (may differ from labels). | |
bool | balance |
Balance iterating or not. | |
vector< vector< intg > > | bal_indices |
Balanced iterating indices. | |
vector< uint > | bal_it |
Sample iterators for each class. | |
vector< uint > | class_order |
Order in which to balance classes. | |
bool | random_class_order |
uint | class_it |
Iterator on classes. | |
uint | class_it_it |
Iterator on classes iterator. | |
bool | perclass_norm |
Normalize probas per class. | |
vector< vector< intg > > | bal_indices_saved |
vector< uint > | bal_it_saved |
uint | class_it_saved |
uint | class_it_it_saved |
Friends | |
class | class_datasource_gui |
class | supervised_trainer |
class_datasource A datasource associating samples with a discrete class. The order of samples distributed to training can be balanced to present the same amount of samples for each class if these are unbalanced.
ebl::class_datasource< Tnet, Tdata, Tlabel >::class_datasource | ( | midx< Tdata > & | data, |
idx< Tlabel > & | labels, | ||
vector< string * > * | lblstr = NULL , |
||
const char * | name = NULL |
||
) |
Construct dataset with 'data' and its corresponding 'labels', where data is a multi-matrix matrix (midx type). This allows for dynamic loading of data and avoids the need to fit all data in memory.
lblstr | An optional vector of strings describing each class. |
name | An optional name for this dataset. |
ebl::class_datasource< Tnet, Tdata, Tlabel >::class_datasource | ( | idx< Tdata > & | data, |
idx< Tlabel > & | labels, | ||
vector< string * > * | lblstr = NULL , |
||
const char * | name = NULL |
||
) |
Construct dataset with 'data' and its corresponding 'labels'.
lblstr | An optional vector of strings describing each class. |
name | An optional name for this dataset. |
ebl::class_datasource< Tnet, Tdata, Tlabel >::class_datasource | ( | midx< Tdata > & | data, |
idx< Tlabel > & | labels, | ||
idx< ubyte > & | classes, | ||
const char * | name = NULL |
||
) |
Construct dataset with 'data' and its corresponding 'labels', where data is a multi-matrix matrix (midx type). This allows for dynamic loading of data and avoids the need to fit all data in memory.
classes | A vector of strings describing each class. |
name | An optional name for this dataset. |
ebl::class_datasource< Tnet, Tdata, Tlabel >::class_datasource | ( | idx< Tdata > & | data, |
idx< Tlabel > & | labels, | ||
idx< ubyte > & | classes, | ||
const char * | name = NULL |
||
) |
Construct dataset with 'data' and its corresponding 'labels'.
classes | A vector of strings describing each class. |
name | An optional name for this dataset. |
ebl::class_datasource< Tnet, Tdata, Tlabel >::class_datasource | ( | const char * | data_name, |
const char * | labels_name, | ||
const char * | jitters_name = NULL , |
||
const char * | scales_name = NULL , |
||
const char * | classes_name = NULL , |
||
const char * | name = NULL |
||
) |
Constructor from full names for each dataset file. Note: jitters and classes files are optional.
name | An optional name for this dataset. |
bool ebl::class_datasource< Tnet, Tdata, Tlabel >::epoch_done | ( | ) | [virtual] |
Return true if current epoch is finished. Call init_epoch() to restart a new epoch.
Reimplemented from ebl::datasource< Tnet, Tdata >.
Tlabel ebl::class_datasource< Tnet, Tdata, Tlabel >::get_label | ( | ) | [virtual] |
Return the value contained in current sample's label. This supposes a label is only constituted of 1 element and will produce and error otherwise.
Reimplemented in ebl::hierarchy_datasource< Tnet, Tdata, Tlabel >.
intg ebl::class_datasource< Tnet, Tdata, Tlabel >::get_lowest_common_size | ( | ) | [virtual] |
Return the lowest (non-zero) size per class, multiplied by the number of classes. e.g. if a dataset has 10 classes with 100 examples and 5 classes with 50 examples, it will return 50 * (10 + 5) = 750, whereas size() will return 1250. This is useful to keep iterations to a meaningful size when a class has many more examples than another.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::init | ( | const char * | data_fname, |
const char * | labels_fname, | ||
const char * | jitters_fname = NULL , |
||
const char * | scales_fname = NULL , |
||
const char * | classes_fname = NULL , |
||
const char * | name = NULL , |
||
uint | max_size = 0 |
||
) |
Intialize from matrices filenames.
max_size | If > 0, limit the number of samples to this value. |
void ebl::class_datasource< Tnet, Tdata, Tlabel >::init_class_labels | ( | ) | [virtual] |
Fills 'clabels' and 'clblstr' which are the true class labels if labels are not consecutive values starting from 0. If they are, then 'clabels' is equivalent to 'labels', same for 'clblstr' and 'lblstr'.
Reimplemented in ebl::hierarchy_datasource< Tnet, Tdata, Tlabel >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::init_epoch | ( | ) | [virtual] |
Restarts a new epoch, i.e. resets counters but do not reset iterators positions.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::init_local | ( | vector< string * > * | lblstr | ) |
Initialize things specific to this class. The rest can be initialized with parent init methods.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::init_root | ( | const char * | root_dsname, |
const char * | name = NULL |
||
) |
Constructor from directory 'root' and individual names for data, labels, jitters and classes files (appending names and extension of each subfile of a dataset created with dscompile tool).
Reimplemented from ebl::labeled_datasource< Tnet, Tdata, Tlabel >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::limit_classes | ( | intg | n, |
intg | offset = 0 , |
||
bool | random = false |
||
) | [virtual] |
Excludes use of classes other than ones in [offset, offset + n] range.
random | If true, keep only n classes at random. |
bool ebl::class_datasource< Tnet, Tdata, Tlabel >::next | ( | ) | [virtual] |
Move to the next datum (in the original order of the dataset). Returns false if we reached the end. This should be used during testing. It will always return the data in the same order with the same probability of 1. See next_train() for data returned with variable probability, balance, etc. (used for training only).
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::next_balanced_class | ( | ) | [virtual] |
This move class iterator 'class_it' to the next class. If reaching the end of the classes, then the class order is shuffled and iterator starts at first class again.
bool ebl::class_datasource< Tnet, Tdata, Tlabel >::next_train | ( | ) | [virtual] |
Move to the next datum in a way suited for training (_not_ for testing, for testing see next()). If balance is activated (see set_balanced()) this will return samples in a class-balanced way, i.e. showing each class sequentially, with different probabilities based on sample's difficulty, or/and in a random order after each pass. When all samples of a class have been shown, it loops back to the first sample of that class. This should be used during training only. If a sample was not selected because of a low probability, this will return false, if it was selected it returns true. In any case, internal iterators will always be set to the next sample, regardless if it was selected or not. It is up to the caller, to train on the sample if selected, or only test and update its energy if not selected.
Reimplemented from ebl::datasource< Tnet, Tdata >.
Reimplemented in ebl::hierarchy_datasource< Tnet, Tdata, Tlabel >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::normalize_all_probas | ( | ) | [virtual] |
Normalize picking probabilities by maximum probability for all classes if perclass_norm is true, or globally otherwise.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::normalize_probas | ( | int | classid = -1 | ) | [virtual] |
Normalize picking probabilities by maximum probability of classid if perclass_norm is true, or globally otherwise.
bool ebl::class_datasource< Tnet, Tdata, Tlabel >::pick_current | ( | ) | [protected, virtual] |
Draw a random number between 0 and 1 and return true if higher than current sample's probability.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::pretty_progress | ( | bool | newline = true | ) | [virtual] |
Pretty the progress of current training epoch.
newline | If true, end pretty with a new line. |
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::reset_class_order | ( | ) | [virtual] |
If 'random_class_order' is true, this will randomly change to order in which classes are presented for balanced training, otherwise it will do nothing and leave them in the original order.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::save_pickings | ( | const char * | name = NULL | ) | [virtual] |
Output statistics of samples picking, i.e. the number of times each sample has been picked for training.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::save_state | ( | ) | [virtual] |
Save internal iterators. Calling restore_state() will return to the current sample.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::seek_begin | ( | ) | [virtual] |
Move to the beginning of the data, for the test iterators only, i.e. only next() is affected, next_train() is unaffected.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::seek_begin_train | ( | ) | [virtual] |
Move to the beginning of the data, for the train iterators only, i.e. only next_train() is affected, next() is unaffected.
Reimplemented from ebl::datasource< Tnet, Tdata >.
void ebl::class_datasource< Tnet, Tdata, Tlabel >::set_balanced | ( | bool | bal = true | ) | [virtual] |
If 'bal' is true, make the next_train() method call sequentially one sample of each class instead of following the dataset's distribution. This is important to use when the dataset is unbalanced. This is set to true by default. Balance is used only by next_train(), not by next().
void ebl::class_datasource< Tnet, Tdata, Tlabel >::set_random_class_order | ( | bool | ran | ) | [virtual] |
If 'ran' is true, then the order in which classes are presented during balanced training is randomized.