libeblearn
|
Public Member Functions | |
detector (module_1_1< T, Tstate > &thenet, vector< string > &labels, answer_module< T, T, T, Tstate > *answer=NULL, resizepp_module< T, Tstate > *resize=NULL, const char *background=NULL, std::ostream &out=std::cout, std::ostream &err=std::cerr, bool adapt_scales=false) | |
virtual | ~detector () |
Destructor. | |
void | set_scaling_original () |
Set the multi-scale to 1 scale only: the image's original scale. | |
void | set_scaling_type (t_scaling type) |
Set the scaling type. | |
void | set_resolutions (const midxdim &scales) |
Set all scales manually. | |
void | set_resolutions (const vector< double > &factors) |
Set scales a factors of the input sizes. | |
void | set_resolution (double factor) |
Set 1 scale only as a factor of the input sizes. | |
void | set_resolutions (int resolutions) |
void | set_resolutions (idx< uint > &resolutions) |
void | set_zpads (float hzpad, float wzpad) |
void | set_resolutions (double scales_steps, double max_scale=1.0, double min_scale=1.0) |
int | get_class_id (const string &name) |
Return the id of the class 'name' or -1 if not found. | |
void | set_bgclass (const char *bg=NULL) |
set background class (which will be ignored). | |
bool | set_mask_class (const char *mask) |
void | set_silent () |
set detector to silent: do not print results on std output | |
void | set_min_resolution (uint min_size) |
void | set_max_resolution (uint max_size) |
void | set_raw_thresholds (vector< float > &t) |
Set different thresholds for each scale during raw extraction. | |
void | set_nms (t_nms type=nms_overlap, float pre_threshold=0.0, float post_threshold=0.0, float pre_hfact=1.0, float pre_wfact=1.0, float post_hfact=1.0, float post_wfact=1.0, float woverh=1.0, float max_overlap=1.0, float max_hcenter_dist=0.0, float max_wcenter_dist=0.0, float vote_max_overlap=1.0, float vote_max_hcenter_dist=0.0, float vote_max_wcenter_dist=0.0) |
void | set_scaler_mode (bool set) |
Enable or disable scaler mode, i.e. using scale prediction for boxes. | |
void | set_smoothing (uint type) |
Set output smoothing type. 0: none, 1: 3x3 kernel. | |
void | set_mem_optimization (Tstate &in, Tstate &out, bool keep_inputs=false) |
void | set_netdim (idxdim &d) |
Set by hand the minimum network input and fix it. | |
void | set_outputs_dumping (const char *name) |
void | set_bboxes_off () |
Turn off the extraction of bounding boxes. | |
vector< string > & | get_labels () |
Returns the vector of label strings. | |
void | set_ignore_outsiders () |
Ignore bboxes that overlap with the outside of the image. | |
void | set_corners_inference (uint type) |
void | set_bbox_decision (uint type) |
void | set_bbox_scalings (mfidxdim &scalings) |
template<class Tin > | |
bboxes & | fprop (idx< Tin > &img, const char *fname=NULL) |
void | fprop_nms (bboxes &in, bboxes &out) |
Run non-maximum suppression on 'in' and put result in 'out'. | |
vector< idx< T > > & | get_originals () |
midx< T > | get_preprocessed (const bbox &b) |
svector< midx< T > > & | get_preprocessed (bboxes &out, uint n=0, bool diverse=false, uint pre_diverse_max=100) |
svector< midx< T > > & | get_preprocessed (bboxes &in, bboxes &out, uint n=0, bool diverse=false, uint pre_diverse_max=100) |
idx< T > | get_mask (string &classname) |
uint | get_total_saved () |
Returns the number of bboxes saved so far. | |
string & | set_save (const string &directory, uint nmax=0, bool diversity=false) |
void | init (idxdim &dinput, const char *frame_name=NULL) |
initialize dimensions and multi-resolution buffers. | |
Protected Member Functions | |
void | compute_scales (midxdim &scales, idxdim &netdim, idxdim &mindim, idxdim &maxdim, idxdim &indim, t_scaling type, uint nscales, double scales_step, const char *frame_name=NULL) |
void | compute_resolutions (midxdim &scales, idxdim &mindim, idxdim &maxdim, uint nscales) |
void | compute_resolutions (midxdim &scales, idxdim &indim, vector< double > &scale_factors) |
void | compute_resolutions (midxdim &scales, idxdim &mindim, idxdim &maxdim, double scales_step) |
void | compute_resolutions_up (midxdim &scales, idxdim &indim, idxdim &mindim, idxdim &maxdim, double scales_step) |
void | validate_resolutions () |
void | smooth_outputs () |
Smooth outputs. | |
void | update_merge_alignment () |
void | get_corners (mstate< Tstate > &outputs) |
void | extract_bboxes (T threshold, bboxes &bboxes) |
void | save_bboxes (bboxes &bboxes, const string &dir, const char *frame_name=NULL) |
void | add_class (const char *name) |
template<class Tin > | |
void | prepare (idx< Tin > &img, const char *fname=NULL) |
void | prepare_scale (uint i) |
void | multi_res_fprop () |
do a fprop on thenet with multiple rescaled inputs | |
Protected Attributes | |
module_1_1< T, Tstate > & | thenet |
The network. | |
resizepp_module< T, Tstate > * | resizepp |
Resize module for multi-scaling. | |
bool | resizepp_delete |
We are responsible for deleting. | |
idx< T > | image |
double | contrast |
double | brightness |
idx< float > | sizes |
fstate_idx< T > | finput |
Tstate * | input |
A forward buffer containing input image. | |
mstate< Tstate > | output |
output buffer | |
Tstate * | tmp |
tmp. | |
Tstate * | minput |
input buffer, used with mem optim. | |
svector< mstate< Tstate > > | ppinputs |
Preprocessed inputs of all scales. | |
svector< mstate< Tstate > > | outputs |
Output buffers of all scales. | |
vector< string > | labels |
String label of each class. | |
idxdim | indim |
Input dimensions. | |
idxdim | netdim |
network's input dimensions | |
bool | netdim_fixed |
Do not update netdim if true. | |
vector< rect< int > > | original_bboxes |
Bboxes in image after resizing. | |
int | bgclass |
int | mask_class |
idx< T > | mask |
nms * | pnms |
Non-maximum suppression object. | |
midxdim | scales |
Multi-scale (ideal) scales. | |
midxdim | actual_scales |
Actually used scales. | |
midxdim | manual_scales |
Scales set manually. | |
vector< double > | scale_factors |
A list of scale factors. | |
uint | nscales |
Number of scales if set by hand. | |
double | scales_step |
double | min_scale |
Minimum scale as factor of original res. | |
double | max_scale |
Maximum scale as factor of original res. | |
t_scaling | restype |
resolution type | |
bool | silent |
print results on std output if not silent | |
bool | save_mode |
save detected windows or not | |
string | save_dir |
directory where to save detections | |
vector< uint > | save_counts |
file counter for each class | |
bboxes | raw_bboxes |
raw bboxes extracted from outputs | |
bboxes | pruned_bboxes |
scale-pruned bboxes | |
uint | min_size |
minimum input size to network | |
uint | max_size |
maximum input size to network | |
vector< idx< T > > | odetections |
original windows yielding detection | |
svector< midx< T > > | ppdetections |
preprocessed wins yielding detection | |
bool | bodetections |
odetections is up-to-date or not | |
bool | bppdetections |
ppdetections is up-to-date or not | |
uint | save_max_per_frame |
max number of region saved | |
bool | diverse_ordering |
Saved samples diverse ordering. | |
bool | mem_optimization |
optimize memory or not. | |
bool | optimization_swap |
swap buffers or not. | |
bool | keep_inputs |
uint | hzpad |
optimize input buffers or not. | |
uint | wzpad |
Zero-pad on height (each side). | |
std::ostream & | mout |
Zero-pad on width (each side). | |
std::ostream & | merr |
output stream. | |
uint | smoothing_type |
error output stream. | |
idx< T > | smoothing_kernel |
bool | initialized |
string | outputs_dump |
Outputs dumping name. | |
bool | bboxes_off |
Do not extract bboxes if true. | |
bool | adapt_scales |
Adapt scales to network structure. | |
bool | scaler_mode |
answer_module< T, T, T, Tstate > * | answer |
mstate< Tstate > | answers |
Buffers holding last answers. | |
bool | ignore_outsiders |
Ignore bbs overlapping outside. | |
uint | corners_inference |
0: from net 1: from net + save 2: load | |
bool | corners_infered |
Allows to infer only once. | |
mfidxdim | itl |
mfidxdim | itr |
mfidxdim | ibl |
mfidxdim | ibr |
4 corners in input space. | |
mfidxdim | pptl |
mfidxdim | pptr |
mfidxdim | ppbl |
mfidxdim | ppbr |
4 corners in pp input space. | |
float | pre_threshold |
Threshold for initial bbox extraction. | |
vector< float > | raw_thresholds |
Thresholds for each scale. | |
vector< uint > | scale_indices |
Input scales indices for each output. | |
uint | bbox_decision |
Decision type, 0: regular, 1: corners only. | |
mfidxdim | bbox_scalings |
Friends | |
class | detector_gui |
class | detection_thread |
class | bootstrapping |
ebl::detector< T, Tstate >::detector | ( | module_1_1< T, Tstate > & | thenet, |
vector< string > & | labels, | ||
answer_module< T, T, T, Tstate > * | answer = NULL , |
||
resizepp_module< T, Tstate > * | resize = NULL , |
||
const char * | background = NULL , |
||
std::ostream & | out = std::cout , |
||
std::ostream & | err = std::cerr , |
||
bool | adapt_scales = false |
||
) |
Constructor. Default resolutions are 1, 2 and 4 times the network's size. Resolutions can be set using set_resolutions(). Background class name default "bg" will be searched in the list of class names. To specify another background class, pass a non NULL background parameter.
labels | A vector of label strings. |
pp | An optional resizing (and preprocessing) module, e.g. resizepp_module. If null, use resize_module by default. |
background | The name of the background class. Default is "bg". If given, positive answers for this class are ignored. |
adapt_scales | If true, adapt each scale so that they are valid input sizes for 'thenet' network. Otherwise, the network must crop inputs itself (see 'crop' attribute of modules). |
void ebl::detector< T, Tstate >::add_class | ( | const char * | name | ) | [protected] |
Add a name to the vector of class names. This can be useful when generating intermediate classes from existing classes.
void ebl::detector< T, Tstate >::compute_resolutions | ( | midxdim & | scales, |
idxdim & | indim, | ||
vector< double > & | scale_factors | ||
) | [protected] |
Compute each scale as a factor of 'indim' for each element of 'scale_factors' and put them into 'scales' vector.
void ebl::detector< T, Tstate >::compute_resolutions | ( | midxdim & | scales, |
idxdim & | mindim, | ||
idxdim & | maxdim, | ||
double | scales_step | ||
) | [protected] |
Compute each scale with a step of 'scales_step' starting from 'maxdim' down to 'mindim'.
mindim | The minimum scale size. |
maxdim | The maximum scale size. |
void ebl::detector< T, Tstate >::compute_resolutions | ( | midxdim & | scales, |
idxdim & | mindim, | ||
idxdim & | maxdim, | ||
uint | nscales | ||
) | [protected] |
Compute 'nscales' scales between 'mindim' and 'maxdim' resolutions and push them into 'scales' vector.
void ebl::detector< T, Tstate >::compute_resolutions_up | ( | midxdim & | scales, |
idxdim & | indim, | ||
idxdim & | mindim, | ||
idxdim & | maxdim, | ||
double | scales_step | ||
) | [protected] |
Compute each scale with a step of 'scales_step' starting from 'mindim' up to 'maxdim'.
mindim | The minimum scale size. |
maxdim | The maximum scale size. |
void ebl::detector< T, Tstate >::compute_scales | ( | midxdim & | scales, |
idxdim & | netdim, | ||
idxdim & | mindim, | ||
idxdim & | maxdim, | ||
idxdim & | indim, | ||
t_scaling | type, | ||
uint | nscales, | ||
double | scales_step, | ||
const char * | frame_name = NULL |
||
) | [protected] |
Compute all scales based on minimum, maximum and input dimensions, and scaling type.
netdim | The network's minimal input size. |
mindim | The minimum scale size. |
maxdim | The maximum scale size. |
indim | The original input dimensions. |
void ebl::detector< T, Tstate >::extract_bboxes | ( | T | threshold, |
bboxes & | bboxes | ||
) | [protected] |
Extract bounding boxes with higher confidence than 'threshold' from internal 'outputs' into 'bboxes'.
bboxes & ebl::detector< T, Tstate >::fprop | ( | idx< Tin > & | img, |
const char * | fname = NULL |
||
) |
fprop input image throught network. if image's and network's type differ, cast image into network's type through an idx_copy (avoid for better performance).
fname | Optional name for the frame being processed, used in the output files to be saved. |
void ebl::detector< T, Tstate >::get_corners | ( | mstate< Tstate > & | outputs | ) | [protected] |
Fills internal buffers with 4 image corners coordinates in input and preprocessing space given the output sizes of 'outputs'.
idx< T > ebl::detector< T, Tstate >::get_mask | ( | string & | classname | ) |
Return a mask of output maps with the same size as the input. The mask is a max of all output resolution.
vector< idx< T > > & ebl::detector< T, Tstate >::get_originals | ( | ) |
Return a reference to a vector of windows in the original image that yielded a detection.
svector< midx< T > > & ebl::detector< T, Tstate >::get_preprocessed | ( | bboxes & | out, |
uint | n = 0 , |
||
bool | diverse = false , |
||
uint | pre_diverse_max = 100 |
||
) |
Return a reference to a vector of windows in the preprocessed/scaled image that yielded a detection.
out | A vector of boxes filled with returned samples boxes. |
n | Limit number of samples to 'n'. If n equals 0, return all. |
diverse | If true, order samples by diversity. |
pre_diverse_max | Limit the number of samples if diverse is enabled because it is an expensive process with complexity O(n^2). |
svector< midx< T > > & ebl::detector< T, Tstate >::get_preprocessed | ( | bboxes & | in, |
bboxes & | out, | ||
uint | n = 0 , |
||
bool | diverse = false , |
||
uint | pre_diverse_max = 100 |
||
) |
Return a reference to a vector of windows in the preprocessed/scaled image that yielded a detection.
in | Input boxes of samples to return. |
out | A vector of boxes filled with returned samples boxes. |
n | Limit number of samples to 'n'. If n equals 0, return all. |
diverse | If true, order samples by diversity. |
pre_diverse_max | Limit the number of samples if diverse is enabled because it is an expensive process with complexity O(n^2). |
midx< T > ebl::detector< T, Tstate >::get_preprocessed | ( | const bbox & | b | ) |
Return the preprocessed input corresponding to bounding box 'b' or throws an exception if out of bounds.
void ebl::detector< T, Tstate >::prepare | ( | idx< Tin > & | img, |
const char * | fname = NULL |
||
) | [protected] |
Prepare image and resolutions. This should be called before preprocess_resolution(). This mostly involves casting image into network's type and computing each scale's dimensions (no resizing) based on image's size.
void ebl::detector< T, Tstate >::prepare_scale | ( | uint | i | ) | [protected] |
Do preprocessing (resizing and channel/edge processing) for a particular resolution. This will set 'input' and 'output' buffers, that can then be used to fprop the network. This uses the 'image' member prepared by prepare() and should therefore be called after prepare().
res | The resolution to be preprocessed. |
void ebl::detector< T, Tstate >::save_bboxes | ( | bboxes & | bboxes, |
const string & | dir, | ||
const char * | frame_name = NULL |
||
) | [protected] |
save all bounding boxes of original (in original resolution) and preprocessed (resized and filtered) input into directory dir.
void ebl::detector< T, Tstate >::set_bbox_decision | ( | uint | type | ) |
Select the bbox extraction decision type. 0: decision based on confidence threshold. 1: only extract output corners.
void ebl::detector< T, Tstate >::set_corners_inference | ( | uint | type | ) |
The image corners transformation from outputs to input is infered back through the network (type is 0 or 1), and saved into "corners.mat" (type 1), or loaded from "corners.mat" only (type 2).
bool ebl::detector< T, Tstate >::set_mask_class | ( | const char * | mask | ) |
Set the mask class, which is ignored by bounding box detection. Instead, call get_mask() to retrieve a mask map of values above a given threshold. This is useful for continuous classes rather than discrete classes.
void ebl::detector< T, Tstate >::set_max_resolution | ( | uint | max_size | ) |
Set the maximum size of each side of an input to use as input to the network. I.e. a input to the network will be at most max_size * max_size big.
max_size | The maximum width or height in pixels. |
void ebl::detector< T, Tstate >::set_mem_optimization | ( | Tstate & | in, |
Tstate & | out, | ||
bool | keep_inputs = false |
||
) |
Enable memory optimization by using only 2 buffers (in and out) for entire flow. Those same buffers must have been passed to the network's constructor.
keep_inputs | If false, re-use input buffers for optimization. If true, some operations may be unavailable, such as saving the pre-processed detected windows. |
void ebl::detector< T, Tstate >::set_min_resolution | ( | uint | min_size | ) |
Set the minimum size of each side of an input to use as input to the network. I.e. a input to the network will be at least min_size * min_size big.
min_size | The minimum width or height in pixels. |
void ebl::detector< T, Tstate >::set_nms | ( | t_nms | type = nms_overlap , |
float | pre_threshold = 0.0 , |
||
float | post_threshold = 0.0 , |
||
float | pre_hfact = 1.0 , |
||
float | pre_wfact = 1.0 , |
||
float | post_hfact = 1.0 , |
||
float | post_wfact = 1.0 , |
||
float | woverh = 1.0 , |
||
float | max_overlap = 1.0 , |
||
float | max_hcenter_dist = 0.0 , |
||
float | max_wcenter_dist = 0.0 , |
||
float | vote_max_overlap = 1.0 , |
||
float | vote_max_hcenter_dist = 0.0 , |
||
float | vote_max_wcenter_dist = 0.0 |
||
) |
Enable nms of type 'type'. Refer to t_pruning declaration for different types. Default type is 1, regular pruning.
void ebl::detector< T, Tstate >::set_outputs_dumping | ( | const char * | name | ) |
Enables dumping of all outputs using the base name 'name', to which is appending the idx's size and '.mat'. Each resolution will be dump as a separate matrix file. Dumping will be called at the end of each fprop call for each resolution.
void ebl::detector< T, Tstate >::set_resolutions | ( | double | scales_steps, |
double | max_scale = 1.0 , |
||
double | min_scale = 1.0 |
||
) |
Specify resolutions by the factor step, starting from factor 1 (network's size), adding factor_steps until reaching the original resolution.
max_scale | The maximum scale factor of the original resolution, 1.0 by default, i.e. the original resolution. |
min_scale | The minimum scale factor of the smallest network size 1.0 by default, meaning the minimum network input size. |
void ebl::detector< T, Tstate >::set_resolutions | ( | int | resolutions | ) |
Use nresolutions resolutions between the maximum resolution and the minimum resolution.
nresolutions | The number of resolutions to use. |
void ebl::detector< T, Tstate >::set_resolutions | ( | idx< uint > & | resolutions | ) |
Specify resolutions by hand in an nx2 idx (heightxwidth), e.g. 240x320, 120x160.
resolutions | A uint idx containing resolutions (of size nx2) |
string & ebl::detector< T, Tstate >::set_save | ( | const string & | directory, |
uint | nmax = 0 , |
||
bool | diversity = false |
||
) |
Enable saving of each (preprocessed) window inducing a positive detection into directory. All detections except for the background class are dumped into a directory corresponding to the class' name. This returns the directory string used.
nmax | Limit the number of windows saved per frame. |
diversity | If true, order samples to be saved by diversity. |
void ebl::detector< T, Tstate >::set_zpads | ( | float | hzpad, |
float | wzpad | ||
) |
Add zero padding of (hzpad * the network's minimum input height) on each vertical sides and (wzpad * min width) on horizontal sides.
void ebl::detector< T, Tstate >::update_merge_alignment | ( | ) | [protected] |
If a merge module was found in the network, update its parameters so that merging is aligned on top left corner of all inputs.
void ebl::detector< T, Tstate >::validate_resolutions | ( | ) | [protected] |
checks that resolutions match the network size, if not adjust them. this method assumes nresolutions and resolutions members have already been initialized.
Tstate* ebl::detector< T, Tstate >::input [protected] |
A forward buffer containing input image.
input buffer