libeblearn
ebl::detector< T, Tstate > Class Template Reference

List of all members.

Public Member Functions

 detector (module_1_1< T, Tstate > &thenet, vector< string > &labels, answer_module< T, T, T, Tstate > *answer=NULL, resizepp_module< T, Tstate > *resize=NULL, const char *background=NULL, std::ostream &out=std::cout, std::ostream &err=std::cerr, bool adapt_scales=false)
virtual ~detector ()
 Destructor.
void set_scaling_original ()
 Set the multi-scale to 1 scale only: the image's original scale.
void set_scaling_type (t_scaling type)
 Set the scaling type.
void set_resolutions (const midxdim &scales)
 Set all scales manually.
void set_resolutions (const vector< double > &factors)
 Set scales a factors of the input sizes.
void set_resolution (double factor)
 Set 1 scale only as a factor of the input sizes.
void set_resolutions (int resolutions)
void set_resolutions (idx< uint > &resolutions)
void set_zpads (float hzpad, float wzpad)
void set_resolutions (double scales_steps, double max_scale=1.0, double min_scale=1.0)
int get_class_id (const string &name)
 Return the id of the class 'name' or -1 if not found.
void set_bgclass (const char *bg=NULL)
 set background class (which will be ignored).
bool set_mask_class (const char *mask)
void set_silent ()
 set detector to silent: do not print results on std output
void set_min_resolution (uint min_size)
void set_max_resolution (uint max_size)
void set_raw_thresholds (vector< float > &t)
 Set different thresholds for each scale during raw extraction.
void set_nms (t_nms type=nms_overlap, float pre_threshold=0.0, float post_threshold=0.0, float pre_hfact=1.0, float pre_wfact=1.0, float post_hfact=1.0, float post_wfact=1.0, float woverh=1.0, float max_overlap=1.0, float max_hcenter_dist=0.0, float max_wcenter_dist=0.0, float vote_max_overlap=1.0, float vote_max_hcenter_dist=0.0, float vote_max_wcenter_dist=0.0)
void set_scaler_mode (bool set)
 Enable or disable scaler mode, i.e. using scale prediction for boxes.
void set_smoothing (uint type)
 Set output smoothing type. 0: none, 1: 3x3 kernel.
void set_mem_optimization (Tstate &in, Tstate &out, bool keep_inputs=false)
void set_netdim (idxdim &d)
 Set by hand the minimum network input and fix it.
void set_outputs_dumping (const char *name)
void set_bboxes_off ()
 Turn off the extraction of bounding boxes.
vector< string > & get_labels ()
 Returns the vector of label strings.
void set_ignore_outsiders ()
 Ignore bboxes that overlap with the outside of the image.
void set_corners_inference (uint type)
void set_bbox_decision (uint type)
void set_bbox_scalings (mfidxdim &scalings)
template<class Tin >
bboxesfprop (idx< Tin > &img, const char *fname=NULL)
void fprop_nms (bboxes &in, bboxes &out)
 Run non-maximum suppression on 'in' and put result in 'out'.
vector< idx< T > > & get_originals ()
midx< T > get_preprocessed (const bbox &b)
svector< midx< T > > & get_preprocessed (bboxes &out, uint n=0, bool diverse=false, uint pre_diverse_max=100)
svector< midx< T > > & get_preprocessed (bboxes &in, bboxes &out, uint n=0, bool diverse=false, uint pre_diverse_max=100)
idx< T > get_mask (string &classname)
uint get_total_saved ()
 Returns the number of bboxes saved so far.
string & set_save (const string &directory, uint nmax=0, bool diversity=false)
void init (idxdim &dinput, const char *frame_name=NULL)
 initialize dimensions and multi-resolution buffers.

Protected Member Functions

void compute_scales (midxdim &scales, idxdim &netdim, idxdim &mindim, idxdim &maxdim, idxdim &indim, t_scaling type, uint nscales, double scales_step, const char *frame_name=NULL)
void compute_resolutions (midxdim &scales, idxdim &mindim, idxdim &maxdim, uint nscales)
void compute_resolutions (midxdim &scales, idxdim &indim, vector< double > &scale_factors)
void compute_resolutions (midxdim &scales, idxdim &mindim, idxdim &maxdim, double scales_step)
void compute_resolutions_up (midxdim &scales, idxdim &indim, idxdim &mindim, idxdim &maxdim, double scales_step)
void validate_resolutions ()
void smooth_outputs ()
 Smooth outputs.
void update_merge_alignment ()
void get_corners (mstate< Tstate > &outputs)
void extract_bboxes (T threshold, bboxes &bboxes)
void save_bboxes (bboxes &bboxes, const string &dir, const char *frame_name=NULL)
void add_class (const char *name)
template<class Tin >
void prepare (idx< Tin > &img, const char *fname=NULL)
void prepare_scale (uint i)
void multi_res_fprop ()
 do a fprop on thenet with multiple rescaled inputs

Protected Attributes

module_1_1< T, Tstate > & thenet
 The network.
resizepp_module< T, Tstate > * resizepp
 Resize module for multi-scaling.
bool resizepp_delete
 We are responsible for deleting.
idx< T > image
double contrast
double brightness
idx< float > sizes
fstate_idx< T > finput
Tstate * input
 A forward buffer containing input image.
mstate< Tstate > output
 output buffer
Tstate * tmp
 tmp.
Tstate * minput
 input buffer, used with mem optim.
svector< mstate< Tstate > > ppinputs
 Preprocessed inputs of all scales.
svector< mstate< Tstate > > outputs
 Output buffers of all scales.
vector< string > labels
 String label of each class.
idxdim indim
 Input dimensions.
idxdim netdim
 network's input dimensions
bool netdim_fixed
 Do not update netdim if true.
vector< rect< int > > original_bboxes
 Bboxes in image after resizing.
int bgclass
int mask_class
idx< T > mask
nmspnms
 Non-maximum suppression object.
midxdim scales
 Multi-scale (ideal) scales.
midxdim actual_scales
 Actually used scales.
midxdim manual_scales
 Scales set manually.
vector< double > scale_factors
 A list of scale factors.
uint nscales
 Number of scales if set by hand.
double scales_step
double min_scale
 Minimum scale as factor of original res.
double max_scale
 Maximum scale as factor of original res.
t_scaling restype
 resolution type
bool silent
 print results on std output if not silent
bool save_mode
 save detected windows or not
string save_dir
 directory where to save detections
vector< uint > save_counts
 file counter for each class
bboxes raw_bboxes
 raw bboxes extracted from outputs
bboxes pruned_bboxes
 scale-pruned bboxes
uint min_size
 minimum input size to network
uint max_size
 maximum input size to network
vector< idx< T > > odetections
 original windows yielding detection
svector< midx< T > > ppdetections
 preprocessed wins yielding detection
bool bodetections
 odetections is up-to-date or not
bool bppdetections
 ppdetections is up-to-date or not
uint save_max_per_frame
 max number of region saved
bool diverse_ordering
 Saved samples diverse ordering.
bool mem_optimization
 optimize memory or not.
bool optimization_swap
 swap buffers or not.
bool keep_inputs
uint hzpad
 optimize input buffers or not.
uint wzpad
 Zero-pad on height (each side).
std::ostream & mout
 Zero-pad on width (each side).
std::ostream & merr
 output stream.
uint smoothing_type
 error output stream.
idx< T > smoothing_kernel
bool initialized
string outputs_dump
 Outputs dumping name.
bool bboxes_off
 Do not extract bboxes if true.
bool adapt_scales
 Adapt scales to network structure.
bool scaler_mode
answer_module< T, T, T, Tstate > * answer
mstate< Tstate > answers
 Buffers holding last answers.
bool ignore_outsiders
 Ignore bbs overlapping outside.
uint corners_inference
 0: from net 1: from net + save 2: load
bool corners_infered
 Allows to infer only once.
mfidxdim itl
mfidxdim itr
mfidxdim ibl
mfidxdim ibr
 4 corners in input space.
mfidxdim pptl
mfidxdim pptr
mfidxdim ppbl
mfidxdim ppbr
 4 corners in pp input space.
float pre_threshold
 Threshold for initial bbox extraction.
vector< float > raw_thresholds
 Thresholds for each scale.
vector< uint > scale_indices
 Input scales indices for each output.
uint bbox_decision
 Decision type, 0: regular, 1: corners only.
mfidxdim bbox_scalings

Friends

class detector_gui
class detection_thread
class bootstrapping

template<typename T, class Tstate = fstate_idx<T>>
class ebl::detector< T, Tstate >


Constructor & Destructor Documentation

template<typename T , class Tstate >
ebl::detector< T, Tstate >::detector ( module_1_1< T, Tstate > &  thenet,
vector< string > &  labels,
answer_module< T, T, T, Tstate > *  answer = NULL,
resizepp_module< T, Tstate > *  resize = NULL,
const char *  background = NULL,
std::ostream &  out = std::cout,
std::ostream &  err = std::cerr,
bool  adapt_scales = false 
)

Constructor. Default resolutions are 1, 2 and 4 times the network's size. Resolutions can be set using set_resolutions(). Background class name default "bg" will be searched in the list of class names. To specify another background class, pass a non NULL background parameter.

Parameters:
labelsA vector of label strings.
ppAn optional resizing (and preprocessing) module, e.g. resizepp_module. If null, use resize_module by default.
backgroundThe name of the background class. Default is "bg". If given, positive answers for this class are ignored.
adapt_scalesIf true, adapt each scale so that they are valid input sizes for 'thenet' network. Otherwise, the network must crop inputs itself (see 'crop' attribute of modules).

Member Function Documentation

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::add_class ( const char *  name) [protected]

Add a name to the vector of class names. This can be useful when generating intermediate classes from existing classes.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::compute_resolutions ( midxdim &  scales,
idxdim &  indim,
vector< double > &  scale_factors 
) [protected]

Compute each scale as a factor of 'indim' for each element of 'scale_factors' and put them into 'scales' vector.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::compute_resolutions ( midxdim &  scales,
idxdim &  mindim,
idxdim &  maxdim,
double  scales_step 
) [protected]

Compute each scale with a step of 'scales_step' starting from 'maxdim' down to 'mindim'.

Parameters:
mindimThe minimum scale size.
maxdimThe maximum scale size.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::compute_resolutions ( midxdim &  scales,
idxdim &  mindim,
idxdim &  maxdim,
uint  nscales 
) [protected]

Compute 'nscales' scales between 'mindim' and 'maxdim' resolutions and push them into 'scales' vector.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::compute_resolutions_up ( midxdim &  scales,
idxdim &  indim,
idxdim &  mindim,
idxdim &  maxdim,
double  scales_step 
) [protected]

Compute each scale with a step of 'scales_step' starting from 'mindim' up to 'maxdim'.

Parameters:
mindimThe minimum scale size.
maxdimThe maximum scale size.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::compute_scales ( midxdim &  scales,
idxdim &  netdim,
idxdim &  mindim,
idxdim &  maxdim,
idxdim &  indim,
t_scaling  type,
uint  nscales,
double  scales_step,
const char *  frame_name = NULL 
) [protected]

Compute all scales based on minimum, maximum and input dimensions, and scaling type.

Parameters:
netdimThe network's minimal input size.
mindimThe minimum scale size.
maxdimThe maximum scale size.
indimThe original input dimensions.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::extract_bboxes ( threshold,
bboxes bboxes 
) [protected]

Extract bounding boxes with higher confidence than 'threshold' from internal 'outputs' into 'bboxes'.

template<typename T , class Tstate >
template<class Tin >
bboxes & ebl::detector< T, Tstate >::fprop ( idx< Tin > &  img,
const char *  fname = NULL 
)

fprop input image throught network. if image's and network's type differ, cast image into network's type through an idx_copy (avoid for better performance).

Parameters:
fnameOptional name for the frame being processed, used in the output files to be saved.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::get_corners ( mstate< Tstate > &  outputs) [protected]

Fills internal buffers with 4 image corners coordinates in input and preprocessing space given the output sizes of 'outputs'.

template<typename T , class Tstate >
idx< T > ebl::detector< T, Tstate >::get_mask ( string &  classname)

Return a mask of output maps with the same size as the input. The mask is a max of all output resolution.

template<typename T , class Tstate >
vector< idx< T > > & ebl::detector< T, Tstate >::get_originals ( )

Return a reference to a vector of windows in the original image that yielded a detection.

template<typename T , class Tstate >
svector< midx< T > > & ebl::detector< T, Tstate >::get_preprocessed ( bboxes out,
uint  n = 0,
bool  diverse = false,
uint  pre_diverse_max = 100 
)

Return a reference to a vector of windows in the preprocessed/scaled image that yielded a detection.

Parameters:
outA vector of boxes filled with returned samples boxes.
nLimit number of samples to 'n'. If n equals 0, return all.
diverseIf true, order samples by diversity.
pre_diverse_maxLimit the number of samples if diverse is enabled because it is an expensive process with complexity O(n^2).
template<typename T , class Tstate >
svector< midx< T > > & ebl::detector< T, Tstate >::get_preprocessed ( bboxes in,
bboxes out,
uint  n = 0,
bool  diverse = false,
uint  pre_diverse_max = 100 
)

Return a reference to a vector of windows in the preprocessed/scaled image that yielded a detection.

Parameters:
inInput boxes of samples to return.
outA vector of boxes filled with returned samples boxes.
nLimit number of samples to 'n'. If n equals 0, return all.
diverseIf true, order samples by diversity.
pre_diverse_maxLimit the number of samples if diverse is enabled because it is an expensive process with complexity O(n^2).
template<typename T , class Tstate >
midx< T > ebl::detector< T, Tstate >::get_preprocessed ( const bbox b)

Return the preprocessed input corresponding to bounding box 'b' or throws an exception if out of bounds.

template<typename T , class Tstate >
template<class Tin >
void ebl::detector< T, Tstate >::prepare ( idx< Tin > &  img,
const char *  fname = NULL 
) [protected]

Prepare image and resolutions. This should be called before preprocess_resolution(). This mostly involves casting image into network's type and computing each scale's dimensions (no resizing) based on image's size.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::prepare_scale ( uint  i) [protected]

Do preprocessing (resizing and channel/edge processing) for a particular resolution. This will set 'input' and 'output' buffers, that can then be used to fprop the network. This uses the 'image' member prepared by prepare() and should therefore be called after prepare().

Parameters:
resThe resolution to be preprocessed.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::save_bboxes ( bboxes bboxes,
const string &  dir,
const char *  frame_name = NULL 
) [protected]

save all bounding boxes of original (in original resolution) and preprocessed (resized and filtered) input into directory dir.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_bbox_decision ( uint  type)

Select the bbox extraction decision type. 0: decision based on confidence threshold. 1: only extract output corners.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_corners_inference ( uint  type)

The image corners transformation from outputs to input is infered back through the network (type is 0 or 1), and saved into "corners.mat" (type 1), or loaded from "corners.mat" only (type 2).

template<typename T , class Tstate >
bool ebl::detector< T, Tstate >::set_mask_class ( const char *  mask)

Set the mask class, which is ignored by bounding box detection. Instead, call get_mask() to retrieve a mask map of values above a given threshold. This is useful for continuous classes rather than discrete classes.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_max_resolution ( uint  max_size)

Set the maximum size of each side of an input to use as input to the network. I.e. a input to the network will be at most max_size * max_size big.

Parameters:
max_sizeThe maximum width or height in pixels.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_mem_optimization ( Tstate &  in,
Tstate &  out,
bool  keep_inputs = false 
)

Enable memory optimization by using only 2 buffers (in and out) for entire flow. Those same buffers must have been passed to the network's constructor.

Parameters:
keep_inputsIf false, re-use input buffers for optimization. If true, some operations may be unavailable, such as saving the pre-processed detected windows.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_min_resolution ( uint  min_size)

Set the minimum size of each side of an input to use as input to the network. I.e. a input to the network will be at least min_size * min_size big.

Parameters:
min_sizeThe minimum width or height in pixels.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_nms ( t_nms  type = nms_overlap,
float  pre_threshold = 0.0,
float  post_threshold = 0.0,
float  pre_hfact = 1.0,
float  pre_wfact = 1.0,
float  post_hfact = 1.0,
float  post_wfact = 1.0,
float  woverh = 1.0,
float  max_overlap = 1.0,
float  max_hcenter_dist = 0.0,
float  max_wcenter_dist = 0.0,
float  vote_max_overlap = 1.0,
float  vote_max_hcenter_dist = 0.0,
float  vote_max_wcenter_dist = 0.0 
)

Enable nms of type 'type'. Refer to t_pruning declaration for different types. Default type is 1, regular pruning.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_outputs_dumping ( const char *  name)

Enables dumping of all outputs using the base name 'name', to which is appending the idx's size and '.mat'. Each resolution will be dump as a separate matrix file. Dumping will be called at the end of each fprop call for each resolution.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_resolutions ( double  scales_steps,
double  max_scale = 1.0,
double  min_scale = 1.0 
)

Specify resolutions by the factor step, starting from factor 1 (network's size), adding factor_steps until reaching the original resolution.

Parameters:
max_scaleThe maximum scale factor of the original resolution, 1.0 by default, i.e. the original resolution.
min_scaleThe minimum scale factor of the smallest network size 1.0 by default, meaning the minimum network input size.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_resolutions ( int  resolutions)

Use nresolutions resolutions between the maximum resolution and the minimum resolution.

Parameters:
nresolutionsThe number of resolutions to use.
template<typename T , class Tstate = fstate_idx<T>>
void ebl::detector< T, Tstate >::set_resolutions ( idx< uint > &  resolutions)

Specify resolutions by hand in an nx2 idx (heightxwidth), e.g. 240x320, 120x160.

Parameters:
resolutionsA uint idx containing resolutions (of size nx2)
template<typename T , class Tstate >
string & ebl::detector< T, Tstate >::set_save ( const string &  directory,
uint  nmax = 0,
bool  diversity = false 
)

Enable saving of each (preprocessed) window inducing a positive detection into directory. All detections except for the background class are dumped into a directory corresponding to the class' name. This returns the directory string used.

Parameters:
nmaxLimit the number of windows saved per frame.
diversityIf true, order samples to be saved by diversity.
template<typename T , class Tstate >
void ebl::detector< T, Tstate >::set_zpads ( float  hzpad,
float  wzpad 
)

Add zero padding of (hzpad * the network's minimum input height) on each vertical sides and (wzpad * min width) on horizontal sides.

template<typename T , class Tstate >
void ebl::detector< T, Tstate >::update_merge_alignment ( ) [protected]

If a merge module was found in the network, update its parameters so that merging is aligned on top left corner of all inputs.

template<typename T , class Tstate = fstate_idx<T>>
void ebl::detector< T, Tstate >::validate_resolutions ( ) [protected]

checks that resolutions match the network size, if not adjust them. this method assumes nresolutions and resolutions members have already been initialized.


Member Data Documentation

template<typename T , class Tstate = fstate_idx<T>>
Tstate* ebl::detector< T, Tstate >::input [protected]

A forward buffer containing input image.

input buffer


The documentation for this class was generated from the following files: