|
detect
detect finds objects in images at multiple scales and outputs
corresponding bounding boxes, given a trained model and variables
definitions found a configuration file.
Calls
./detect pedestrians.conf
- When variable 'input_dir' is not defined and the inputs are image
files (camera = directory), one can pass the image path as
second argument, e.g.:
./detect pedestrians.conf /images/pedestrians/
- When the input is a video file (camera = video),
one must pass the file path as second argument, e.g.:
./detect pedestrians.conf /videos/pedestrians.flv
Outputs
All outputs are saved in a directory named 'out_[timestamp]'
created in the calling directory.
- When variable 'save_detections' equals 1, each detected region
in the original image is saved in a single image file in subdirectory
'detections/originals', and the exact input to the model
(usually preprocessed
and scaled) is saved as a matrix file in subdirectory
'detections/preprocessed'.
- When variable 'save_video' equals 1, original images with bounding
boxes overlaid are saved in subdirectory 'video'.
- When variable 'bbox_saving' is different than 0, bounding boxes
coordinates are saved in file 'bbox.txt'. Different values for
'bbox_saving' correspond to different saving formats:
- 1: save in all formats (in different files)
- 2: save in eblearn format
- 3: save in caltech format
Required configuration variables
- weights:
weights file of the trained model.
- classes:
names of each output class in a matrix file.
- camera:
the type of input. Possible inputs are:
- directory: inputs are images from a
directory, which path
can be defined by variable 'input_dir' or as second argument
in the command line.
- video: take a video file as input,
which path must be given
as second argument in the command line.
- v4l2: use v4l2 camera interface (linux only).
User must
also defined variable 'device' to select the v4l2 device, e.g.
'device = /dev/video0'
- kinect: use Kinect's camera.
- opencv: use OpenCV's camera interface
as video input.
- shmem: use shared memory as video feed.
Optional configuration variables
- input_dir:
the directory containing images, when using images as inputs
(i.e. when 'camera' equals 'directory').
- meta_name: ${name}_${machine} # name of this meta job
- meta_max_jobs: 12 # maximum number of jobs to run at the same time
- meta_output_dir: ${root}/../out/ # directory where to write outputs
- meta_gnuplot_params: "set grid ytics;set ytics;set mytics;set grid mytics;set logscale y; set mxtics; set grid xtics; " # extra gnuplot parameters
# analyze processes output or not. if 0, the meta_trainer will have no notion
# of iteration and will only send 1 report at the very end.
# if 1, meta_trainer will try to find iteration and other variable values
# in each process' output.
- meta_analyze: 1
- meta_send_email: 1 # emailing results or not
- meta_email: ${myemail} # email to use (use environment variable "myemail")
# iterations at which to send an email
- meta_email_iters: 0,1,2,3,4,5,7,10,15,20,30,50,75,100,200
- meta_email_period: 1 # send email with this freq (if email_iters not defined)
- meta_watch_interval: 30 # interval sec to analyze outputs and check who's alive
# variables to minimize, process and iteration with lowest value will
# be used to report best weights, or start consequent training
- meta_minimize: test_errors,errors,test_energy,energy,1FPPI,.01FPPI
- meta_ignore_iter0: 1 # do not take results for i: 0 into account
- meta_sticky_vars: job,config,classes # vars to keep around at each iterations
- meta_watch_vars: #job,1FPPI,.01FPPI # restrict variable watching to those
- meta_nbest: 5 # number of best answers to show/send
- meta_send_best: 0 # if 1 send best answers files minimizing meta_minimize's value
- meta_send_logs: 0 # send logs of all jobs or not
- meta_no_conf_id: 0 # do not use conf ids for naming
################################################################################
# local program configuration
- name: inria
- machine: ${HOSTNAME}a
- ebl: ${HOME}/eblpierre/ # eblearn root
- root: ${HOME}/${machine}data/ped/${name}/ds/ # datasets root
- #root2: ${ebl}/demos/pedestrians/trained/ # trained weights root
# network high level switches ##################################################
- net_type: cscsc #cscscf
- net: sr1c #mr1c mr2c # net archictecture
- manual_load: 1 # manually load weights for individual modules
- color: 1
# architecture #################################################################
- arch: ${arch_${net}_${net_type}} # global architecture
- mirror: 0 # mirror instead of zero-padding (default)
- nonlin: tanh # stdsig # non-linearity module
# machines
- arch_mr1_cscsc: ${c0},${s1},${c2},${s3},branch5,merge21,${f5}
- arch_mr2_cscsc: ${c0},${s1},branch2,${c2},${s3},branch5,merge22,${f5}
- arch_sr1c_cscsc: branch1,${c0},${s1},merge1,${c2},${s3},${c5}
- arch_mr1c_cscsc: branch1,${c0},${s1},merge1,${c2},${s3},branch5,merge21,${f5}
- #arch_mr2c_cscsc: branch1,${c0},${s1},merge1,branch2,${c2},${s3},branch5,merge22,${f5}
#arch_mr2c_cscscf: branch1,${c0},${s1},merge1,branch2,${c2},${s3},branch5,merge22,${f6},${f7}
- arch_mr2c_cscsc: branch1,${c0},${s1},merge1,branch2,${c2},${s3},merge22,${f5}
- arch_mr2c_cscscf: branch1,${c0},${s1},merge1,branch2,${c2},${s3},merge22,${f6},${f7}
- branch1: resize00,${c00} # color branch
- branch2: ${s20} # 1st multi-scale branch (biggest)
- #branch2: ${s20},branch3 # 1st multi-scale branch (biggest)
- branch3: ${s21},${branch4} # 1st multi-scale branch (medium)
- branch4: ${s22} # 1st multi-scale branch (smallest)
- branch5: ${s50},branch6 # 2nd multi-scale branch (medium)
- branch6: ${s51} # 2nd multi-scale branch (smallest)
# main branch layers
- c0: conv0,addc0,${nonlin},diag0,abs0,wstd0
- s1: subs1,addc1,${nonlin}
- c2: conv2,addc2,${nonlin},diag2,abs2,wstd2
- s3: subs3,addc3,${nonlin}
- c5: conv5,addc5,${nonlin}
- f5: linear5,addc5,${nonlin}
- f6: linear6,addc6,${nonlin}
- f7: linear7,addc7,${nonlin}
# color branch layers
- c00: conv00,addc00,${nonlin}00,diag00,abs00,wstd00
# multi scale branches layers
- s20: subs00,addc,${nonlin}
- s21: ${s20}
- s22: ${s20}
- s50: ${s20}
- s51: ${s20}
# branches parameters
- branch1_type: narrow # feed UV color into branch
- branch1_narrow_dim: 0
- branch1_narrow_size: 2 # U and V
- branch1_narrow_offset: 1
- branch2_type: copy # feed all features to multi-scale branch
- branch3_type: copy # keep 1st layer scale 1 data
- branch4_type: copy # keep 1st layer scale 2 data
- branch5_type: copy # keep 2nd layer scale 2 data
- branch6_type: copy # keep 2nd layer scale 3 data
- merge1_type: concat
- merge1_branches: branch1
- merge1_concat_dim: 0
- merge21_type: flat
- merge21_branches: branch5,branch6
- merge22_type: flat
- #merge22_branches: branch2,branch3,branch4,branch5,branch6
- merge22_branches: branch2
- merge22_inh: 16
- merge22_inw: 8
- merge22_branches_inh: 20
- merge22_branches_inw: 12
- merge22_strideh: 1
- merge22_stridew: ${merge22_strideh}
- merge22_branches_strideh: 1
- merge22_branches_stridew: ${merge22_branches_strideh}
# merge22_inh: 16
# merge22_inw: 8
# merge22_branches_inh: 20,10,5,8,4
# merge22_branches_inw: 12,6,3,4,2
# merge22_strideh: 4
# merge22_stridew: ${merge22_strideh}
# merge22_branches_strideh: 4,2,1,2,1
# merge22_branches_stridew: ${merge22_branches_strideh}
- conv00_kerh: 5 # convolution kernel's height
- conv00_kerw: 5 # convolution kernel's width
- conv00_strideh: 1 # convolution stride in height
- conv00_stridew: 1 # convolution stride in width
- conv00_table: ${table00} # convolution table (optional)
- conv00_table_in: 2 # conv input max, used if table file not defined
- conv00_table_out: 6 # features max, used if table file not defined
- conv00_weights: ${wroot0c}${sp0}_layer1_convolution_kernel.mat
- addc00_weights: ${wroot0c}${sp0}_layer2_bias_bias.mat
- diag00_weights: ${wroot0c}${sp0}_layer4_diag_coeff.mat
- wstd00_kerh: ${conv00_kerh} # normalization kernel's height
- wstd00_kerw: ${conv00_kerw} # normalization kernel's width
- subs00_kerh: 2 # subsampling kernel's height
- subs00_kerw: 2 # subsampling kernel's width
- subs00_strideh: ${subs00_kerh} # subsampling stride in height
- subs00_stridew: ${subs00_kerw} # subsampling stride in width
- resize00_hratio: 0.349206349 # 44 / 126
- resize00_wratio: 0.358974359 # 28 / 78
# main branch parameters
- inputh: 126 # input's height
- inputw: 78 # input's width
- conv0_kerh: 7 # convolution kernel's height
- conv0_kerw: 7 # convolution kernel's width
- conv0_strideh: 1 # convolution stride in height
- conv0_stridew: 1 # convolution stride in width
- conv0_table: ${table0_color0} # convolution table (optional)
- conv0_table_in: 1 # conv input max, used if table file not defined
- conv0_table_out: ${table0_max} # features max, used if table file not defined
- conv0_weights: ${wroot0}${sp0}_layer1_convolution_kernel.mat
- addc0_weights: ${wroot0}${sp0}_layer2_bias_bias.mat
- diag0_weights: ${wroot0}${sp0}_layer4_diag_coeff.mat
- wstd0_kerh: 7 # normalization kernel's height
- wstd0_kerw: 7 # normalization kernel's width
- subs1_kerh: 3 # subsampling kernel's height
- subs1_kerw: 3 # subsampling kernel's width
- subs1_strideh: ${subs1_kerh} # subsampling stride in height
- subs1_stridew: ${subs1_kerw} # subsampling stride in width
- addc1_weights: # weights to be loaded if manual_load: 1
- conv2_kerh: 9 # convolution kernel's height
- conv2_kerw: 9 # convolution kernel's width
- conv2_strideh: 1 # convolution stride in height
- conv2_stridew: 1 # convolution stride in width
- conv2_table: ${table1} # convolution table (optional)
- conv2_table_in: thickness # use current thickness as max table input
- conv2_table_out: ${table1_max} # features max, used if table file not defined
- conv2_weights: ${wroot1c}${sp0}${sp1}_layer1_convolution_kernel.mat
- addc2_weights: ${wroot1c}${sp0}${sp1}_layer2_bias_bias.mat
- diag2_weights: ${wroot1c}${sp0}${sp1}_layer4_diag_coeff.mat
- wstd2_kerh: ${conv2_kerh} # normalization kernel's height
- wstd2_kerw: ${conv2_kerw} # normalization kernel's width
- subs3_kerh: 2 # subsampling kernel's height
- subs3_kerw: 2 # subsampling kernel's width
- subs3_strideh: ${subs3_kerh} # subsampling stride in height
- subs3_stridew: ${subs3_kerw} # subsampling stride in width
- addc3_weights: # weights to be loaded if manual_load: 1
- linear5_in: ${linear5_in_${net}} #thickness # linear module input features size
- linear5_out: noutputs # use number of classes as max table output
- linear6_in: ${linear6_in_${net}} #thickness # linear module input features size
- linear6_out: 10 # use number of classes as max table output
- linear7_in: ${linear6_out} #thickness # linear module input features size
- linear7_out: noutputs # use number of classes as max table output
- conv5_kerh: 16 # convolution kernel's height
- conv5_kerw: 8 # convolution kernel's width
- conv5_strideh: 1 # convolution stride in height
- conv5_stridew: 1 # convolution stride in width
- conv5_table_in: thickness # use current thickness as max table input
- conv5_table_out: noutputs # features max, used if table file not defined
- linear5_in_mr1: 10752
- linear5_in_mr2: 20832
- linear5_in_mr1c: 11424
- linear5_in_mr2c: 23394
- linear5_in_mr2c: 17824
- linear6_in_mr2c: 17824 #8704
# manual loading ##############################################################
- sp0: 12 #0703 2007 # 0301 1205 #03 07 12 # sparsity layer0
- sp1: 12 #03 07 12 # sparsity layer1
#wroot0: /home/pierre/eblpierre/tools/demos/pedestrians/inria/koray/pedmachines/machine5732
#wroot0: ${HOME}/koray/color/machine57
#wroot0: /data/koray/pedmachines/machine5732
#wroot1: /data/koray/pedmachines2/machine95732
- wroot0: /data/koray/pedmachines_Y/machine50732
- wroot0c: /data/koray/pedmachines_UV/machine50706
- wroot1c: /data/koray/pedmachines_YUV/machine
# tables #######################################################################
- tblroot: ${ebl}/tools/data/tables/ # location of table files
# conv00
- table00: ${tblroot}/table_2_6_connect_6_fanin_0_density_0.5_uv0_u3_v6.mat
# conv0
- table0_max: 32 #64 # full table output max (overridden if table file defined)
- table0_color0: # no color, use table0_max for full table
- table0_color1: ${tblroot}/table_3_32_connect_32_fanin_0_density_0.33_yuv0_y26_u29_v32.mat
# conv1
- tbl: 3
- table1_max: 64 # 64 96 128 256
- tbl1_mr1: ${tblroot}/table_32_64_connect_1664_fanin_26_density_0.81_random.mat
- tbl1_mr2: ${tblroot}/table_32_64_connect_1664_fanin_26_density_0.81_random.mat
- tbl1_2: ${tblroot}/table_32_96_connect_2496_fanin_26_density_0.81_random.mat
- tbl1_sr1c: ${tblroot}/table_38_68_connect_2040_fanin_30_density_0.79_random.mat
- tbl1_mr1c: ${tblroot}/table_38_68_connect_2040_fanin_30_density_0.79_random.mat
- tbl1_mr2c: ${tblroot}/table_38_68_connect_2040_fanin_30_density_0.79_random.mat
#tbl_mrc: ${tbl1_3}
#tbl_mr: ${tbl1_1}
- table1: ${tbl1_${net}}
# preprocessing ################################################################
- preprocessing: 1 # 0: none 1: contrast normalization (optional)
- resize: mean # bilinear
- normalization_size: 7 # 9
# training #####################################################################
- ds: 1 # dataset id
- dsname: ${name}_${resize}${inputh}x${inputw}_ker${normalization_size}_bg
- train: ${dsname}_train_${ds} # training set
- val: ${dsname}_val_${ds} # validation set
#root: /data/pedestrians/small/
#train: small
#val: small
- reg: 0 .000001
#reg: 0 .0001
#reg: .000001
#eta: .000001 .00001 # learning rate
- eta: .000005 .00001 .00002 # learning rate
#eta: .0000001 .0000005 .000001 .0000025 .000005 .0000075 #.00001 #.00002 # learning rate
#eta: .0000005 .000005 .00001 # learning rate
- reg_l1: ${reg} #.0001 # L1 regularization
- reg_l2: ${reg} #.0001 # L2 regularization
- iterations: 20 # number of training iterations
- ndiaghessian: 400 #800 1200 # number of sample for 2nd derivatives estimation
- epoch_mode: 0 # 0: fixed number 1: show all at least once
- epoch_size: 4000 # number of training samples per epoch. comment to ignore.
- epoch_show_modulo: 100 # print message every n training samples
- sample_probabilities: 0 # use probabilities to pick samples
- hardest_focus: 1 # 0: focus on easiest samples 1: focus on hardest ones
- ignore_correct: 1 # If 1, do not train on correctly classified samples
- min_sample_weight: 0 #.1 .5 1 # minimum probability of each sample
- per_class_norm: 1 # normalize probabiliy by class (1) or globally (0)
- shuffle_passes: 1 # shuffle samples between passes
- balanced_training: 1 # show each class the same amount of samples or not
- no_training_test: 1 # do not test on training set if 1
- target_factor: 1 # multiply targets -1 and 1 by:
- save_pickings: 1 # save sample picking statistics
- binary_target: 0 # use only 1 output, -1 for negative examples, +1 for positive
# training display #############################################################
- show_train: 1 # enable/disable all training display
- show_train_ninternals: 0 # number of internal examples to display
- show_train_errors: 0 # show worst errors on training set
- show_val_errors: 1 # show worst errors on validation set
- show_val_correct: 1 # show worst corrects on validation set
- show_hsample: 5 # number of samples to show on height axis
- show_wsample: 18 # number of samples to show on height axis
# retraining ###################################################################
- retrain: 0
- retrain_weights: # ${root1}/${job_name_retraining}_net040.mat
# detection ####################################################################
- net_min_height: ${inputh}
- net_min_width: ${inputw}
- nthreads: 1 # number of detection threads
- ipp_cores: 1 # number of cores used by IPP
- weights: ${root2}/${weights_file}
- classes: ${root2}/${job_name}_classes.mat
- threshold: .1 # confidence detection threshold
- gain: 1
- input_height: -1 # use -1 to use original size
- input_width: -1 # use -1 to use original size
- input_min: 0 # minimum height or width for minimum scale
- input_max: 1200 # maximum height or width for maximum scale
# multi-scaling type. 0: manually set each scale sizes, 1: manually set each
# scale step, 2: number of scales between min and max, 3: step factor between
# min and max, 4: 1 scale, the original image size.
- scaling_type: 3
- scaling: 1.1 # scaling ratio between scales
- min_scale: .75 # min scale as factor of minimal network size
- max_scale: 1.3 # max scale as factor of original resolution
- input_random: 1 # randomize input list (only works for 'directory' camera).
- input_npasses: 1 # passes on the input list (only works for 'directory' cam).
- hzpad: .3 # vertical zero padding on each side as ratio of network's min input
- wzpad: .3 # horizontal zero padding on each side as ratio of network's min in
#mem_optimization: 1
- bbox_saving: 2 # 0: none 1: all styles 2: eblearn style 3: caltech style
- max_object_hratio: 0 #13.5 # image's height / object's height, 0 to ignore
- smoothing: 0 # smooth network outputs
- background_name: bg # name of background class (optional)
non-maximum suppression (nms)
- nms: 1 # 0: no pruning 1: ignore overlapping bb 2: pedestrian custom
- bbox_file: #bbox.txt # ignore processing, feed pre-computed bboxes to nms
- bbhfactor: .7 # height factor to apply to bounding boxes
- bbwfactor: .5 # width factor to apply to bounding boxes
- confidence_type: 2 # 0: sqrdist 1: single output 2: max other (recommended)
- max_bb_overlap: .5 # minimum ratio with smallest bbox to declare overlap
- min_hcenter_dist: .4 # centers closer than this ratio over height cancel out
- min_wcenter_dist: .2 # centers closer than this ratio over width cancel out
detection display
- skip_frames: 0 # skip this number of frames before each processed frame
- save_detections: 0 # output saving and display
- save_max: 25000 # Exit when this number of objects have been saved
- save_max_per_frame: 10 # Only save the first n objects per frame
- save_video: 0 # save each classified frame and make a video out of it
- save_video_fps: 5
- use_original_fps : 0
- display: 1
- display_zoom: 1 # zooming
- display_min: -1.7 # minimum data range to display (optional)
- display_max: 1.7 # maximum data range to display (optional)
- display_in_min: 0 # input image min display range (optional)
- display_in_max: 255 # input image max display range (optional)
- display_bb_transparency: .5 # bbox transp factor (modulated by confidence)
- display_threads: 0 # each thread displays on its own
- display_states: 0 # display internal states of 1 resolution
- show_parts: 0 # show parts composing an object or not
- silent: 0 # minimize outputs to be printed
- sync_outputs: 1 # synchronize output between threads
- minimal_display: 1 # only show classified input
- display_sleep: 0 # sleep in milliseconds after displaying
- ninternals: 1
# demo display variables
- queue1: 0
- qstep1: 1
- qheight1: 5
- qwidth1: 2
- queue2: 0
- qstep2: 50
- qheight2: 5
- qwidth2: 5
- precamera: 0 # pre-camera (used before regular camera)
- precamdir: ${root2}/
- camera: directory # camera options: opencv shmem video directory
# specify a custom image search pattern (optional)
- file_pattern: #".*[.](png|jpg|jpeg|PNG|JPG|JPEG|bmp|BMP|ppm|PPM|pnm|PNM|pgm|PGM|gif|GIF)"
# limit of input video duration in seconds, 0 means no limit
- input_video_max_duration: 0
# step between input frames in seconds, 0 means no step
- input_video_sstep: 0
evaluation
- set: Test
- evaluate: 1
- evaluate_cmd: "${visiongrader} ${visiongrader_params}"
- visiongrader: ${HOME}/visiongrader/src/main.py
- visiongrader_params: "${input_params} ${groundtruth_params} ${compare_params} ${curve_params} ${ignore} "
- input_params: "--input bbox.txt --input_parser eblearn --sampling 50 "
- annotations: ${root}/../INRIAPerson/${set}/annotations/
- groundtruth_params: "--groundtruth ${annotations} --groundtruth_parser inria --gt_whratio .43 "
- compare_params: "--comparator overlap50percent --comparator_param .5 "
- curve_params: "--det --saving-file curve.pickle --show-no-curve "
- input_dir: /home/sermanet/${machine}data/ped/inria/INRIAPerson/${set}/pos
- ignore: "--ignore ${HOME}/visiongrader/datasets/pedestrians/inria/ignore/${set}/ "
|