1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #pragma once
19
20 #ifdef _WIN32
21 #ifdef DBREG_EXPORTS
22 #define DBREG_API __declspec(dllexport)
23 #else
24 #define DBREG_API __declspec(dllimport)
25 #endif
26 #else
27 #define DBREG_API
28 #endif
29
30 // @jke - the next few lines are for extracting timing data. TODO: Remove after test
31 #define PROFILE 0
32
33 #include "dbstabsmooth.h"
34
35 #include <db_feature_detection.h>
36 #include <db_feature_matching.h>
37 #include <db_rob_image_homography.h>
38
39 #if PROFILE
40 #include <sys/time.h>
41 #endif
42
43 /*! \mainpage db_FrameToReferenceRegistration
44
45 \section intro Introduction
46
47 db_FrameToReferenceRegistration provides a simple interface to a set of sophisticated algorithms for stabilizing
48 video sequences. As its name suggests, the class is used to compute parameters that will allow us to warp incoming video
49 frames and register them with respect to a so-called <i>reference</i> frame. The reference frame is simply the first
50 frame of a sequence; the registration process is that of estimating the parameters of a warp that can be applied to
51 subsequent frames to make those frames align with the reference. A video made up of these warped frames will be more
52 stable than the input video.
53
54 For more technical information on the internal structure of the algorithms used within the db_FrameToRegistration class,
55 please follow this <a href="../Sarnoff image registration.docx">link</a>.
56
57 \section usage Usage
58 In addition to the class constructor, there are two main functions of db_FrameToReferenceRegistration that are of
59 interest to the programmer. db_FrameToReferenceRegistration::Init(...) is used to initialize the parameters of the
60 registration algorithm. db_FrameToReferenceRegistration::AddFrame(...) is the method by which each new video frame
61 is introduced to the registration algorithm, and produces the estimated registration warp parameters.
62
63 The following example illustrates how the major methods of the class db_FrameToReferenceRegistration can be used together
64 to calculate the registration parameters for an image sequence. In the example, the calls to the methods of
65 db_FrameToReferenceRegistration match those found in the API, but supporting code should be considered pseudo-code.
66 For a more complete example, please consult the source code for dbregtest.
67
68
69 \code
70 // feature-based image registration class:
71 db_FrameToReferenceRegistration reg;
72
73 // Image data
74 const unsigned char * const * image_storage;
75
76 // The 3x3 frame to reference registration parameters
77 double frame_to_ref_homography[9];
78
79 // a counter to count the number of frames processed.
80 unsigned long frame_counter;
81 // ...
82
83 // main loop - keep going while there are images to process.
84 while (ImagesAreAvailable)
85 {
86 // Call functions to place latest data into image_storage
87 // ...
88
89 // if the registration object is not yet initialized, then do so
90 // The arguments to this function are explained in the accompanying
91 // html API documentation
92 if (!reg.Initialized())
93 {
94 reg.Init(w,h,motion_model_type,25,linear_polish,quarter_resolution,
95 DB_POINT_STANDARDDEV,reference_update_period,
96 do_motion_smoothing,motion_smoothing_gain,
97 DB_DEFAULT_NR_SAMPLES,DB_DEFAULT_CHUNK_SIZE,
98 nr_corners,max_disparity);
99 }
100
101 // Present the new image data to the registration algorithm,
102 // with the result being stored in the frame_to_ref_homography
103 // variable.
104 reg.AddFrame(image_storage,frame_to_ref_homography);
105
106 // frame_to_ref_homography now contains the stabilizing transform
107 // use this to warp the latest image for display, etc.
108
109 // if this is the first frame, we need to tell the registration
110 // class to store the image as its reference. Otherwise, AddFrame
111 // takes care of that.
112 if (frame_counter == 0)
113 {
114 reg.UpdateReference(image_storage);
115 }
116
117 // increment the frame counter
118 frame_counter++;
119 }
120
121 \endcode
122
123 */
124
125 /*!
126 * Performs feature-based frame to reference image registration.
127 */
128 class DBREG_API db_FrameToReferenceRegistration
129 {
130 public:
131 db_FrameToReferenceRegistration(void);
132 ~db_FrameToReferenceRegistration();
133
134 /*!
135 * Set parameters and allocate memory. Note: The default values of these parameters have been set to the values used for the android implementation (i.e. the demo APK).
136 * \param width image width
137 * \param height image height
138 * \param homography_type see definitions in \ref LMRobImageHomography
139 * \param max_iterations max number of polishing steps
140 * \param linear_polish whether to perform a linear polishing step after RANSAC
141 * \param quarter_resolution whether to process input images at quarter resolution (for computational efficiency)
142 * \param scale Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
143 * \param reference_update_period how often to update the alignment reference (in units of number of frames)
144 * \param do_motion_smoothing whether to perform display reference smoothing
145 * \param motion_smoothing_gain weight factor to reflect how fast the display reference must follow the current frame if motion smoothing is enabled
146 * \param nr_samples number of times to compute a hypothesis
147 * \param chunk_size size of cost chunks
148 * \param cd_target_nr_corners target number of corners for corner detector
149 * \param cm_max_disparity maximum disparity search range for corner matcher (in units of ratio of image width)
150 * \param cm_use_smaller_matching_window if set to true, uses a correlation window of 5x5 instead of the default 11x11
151 * \param cd_nr_horz_blocks the number of horizontal blocks for the corner detector to partition the image
152 * \param cd_nr_vert_blocks the number of vertical blocks for the corner detector to partition the image
153 */
154 void Init(int width, int height,
155 int homography_type = DB_HOMOGRAPHY_TYPE_DEFAULT,
156 int max_iterations = DB_DEFAULT_MAX_ITERATIONS,
157 bool linear_polish = false,
158 bool quarter_resolution = true,
159 double scale = DB_POINT_STANDARDDEV,
160 unsigned int reference_update_period = 3,
161 bool do_motion_smoothing = false,
162 double motion_smoothing_gain = 0.75,
163 int nr_samples = DB_DEFAULT_NR_SAMPLES,
164 int chunk_size = DB_DEFAULT_CHUNK_SIZE,
165 int cd_target_nr_corners = 500,
166 double cm_max_disparity = 0.2,
167 bool cm_use_smaller_matching_window = false,
168 int cd_nr_horz_blocks = 5,
169 int cd_nr_vert_blocks = 5);
170
171 /*!
172 * Reset the transformation type that is being use to perform alignment. Use this to change the alignment type at run time.
173 * \param homography_type the type of transformation to use for performing alignment (see definitions in \ref LMRobImageHomography)
174 */
ResetHomographyType(int homography_type)175 void ResetHomographyType(int homography_type) { m_homography_type = homography_type; }
176
177 /*!
178 * Enable/Disable motion smoothing. Use this to turn motion smoothing on/off at run time.
179 * \param enable flag indicating whether to turn the motion smoothing on or off.
180 */
ResetSmoothing(bool enable)181 void ResetSmoothing(bool enable) { m_do_motion_smoothing = enable; }
182
183 /*!
184 * Align an inspection image to an existing reference image, update the reference image if due and perform motion smoothing if enabled.
185 * \param im new inspection image
186 * \param H computed transformation from reference to inspection coordinate frame. Identity is returned if no reference frame was set.
187 * \param force_reference make this the new reference image
188 */
189 int AddFrame(const unsigned char * const * im, double H[9], bool force_reference=false, bool prewarp=false);
190
191 /*!
192 * Returns true if Init() was run.
193 */
Initialized()194 bool Initialized() const { return m_initialized; }
195
196 /*!
197 * Returns true if the current frame is being used as the alignment reference.
198 */
IsCurrentReference()199 bool IsCurrentReference() const { return m_current_is_reference; }
200
201 /*!
202 * Returns true if we need to call UpdateReference now.
203 */
204 bool NeedReferenceUpdate();
205
206 /*!
207 * Returns the pointer reference to the alignment reference image data
208 */
GetReferenceImage()209 unsigned char ** GetReferenceImage() { return m_reference_image; }
210
211 /*!
212 * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched reference image corners.
213 */
GetRefCorners()214 double * GetRefCorners() { return m_corners_ref; }
215 /*!
216 * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched inspection image corners.
217 */
GetInsCorners()218 double * GetInsCorners() { return m_corners_ins; }
219 /*!
220 * Returns the number of correspondences between the reference and inspection images.
221 */
GetNrMatches()222 int GetNrMatches() { return m_nr_matches; }
223
224 /*!
225 * Returns the number of corners detected in the current reference image.
226 */
GetNrRefCorners()227 int GetNrRefCorners() { return m_nr_corners_ref; }
228
229 /*!
230 * Returns the pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
231 */
GetInliers()232 int* GetInliers() { return m_inlier_indices; }
233
234 /*!
235 * Returns the number of inliers from the RANSAC matching step.
236 */
GetNrInliers()237 int GetNrInliers() { return m_num_inlier_indices; }
238
239 //std::vector<int>& GetInliers();
240 //void Polish(std::vector<int> &inlier_indices);
241
242 /*!
243 * Perform a linear polishing step by re-estimating the alignment transformation using the RANSAC inliers.
244 * \param inlier_indices pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
245 * \param num_inlier_indices number of inliers i.e. the length of the array passed as the first argument.
246 */
247 void Polish(int *inlier_indices, int &num_inlier_indices);
248
249 /*!
250 * Reset the motion smoothing parameters to their initial values.
251 */
ResetMotionSmoothingParameters()252 void ResetMotionSmoothingParameters() { m_stab_smoother.Init(); }
253
254 /*!
255 * Update the alignment reference image to the specified image.
256 * \param im pointer to the image data to be used as the new alignment reference.
257 * \param subsample boolean flag to control whether the function should internally subsample the provided image to the size provided in the Init() function.
258 */
259 int UpdateReference(const unsigned char * const * im, bool subsample = true, bool detect_corners = true);
260
261 /*!
262 * Returns the transformation from the display reference to the alignment reference frame
263 */
264 void Get_H_dref_to_ref(double H[9]);
265 /*!
266 * Returns the transformation from the display reference to the inspection reference frame
267 */
268 void Get_H_dref_to_ins(double H[9]);
269 /*!
270 * Set the transformation from the display reference to the inspection reference frame
271 * \param H the transformation to set
272 */
273 void Set_H_dref_to_ins(double H[9]);
274
275 /*!
276 * Reset the display reference to the current frame.
277 */
278 void ResetDisplayReference();
279
280 /*!
281 * Estimate a secondary motion model starting from the specified transformation.
282 * \param H the primary motion model to start from
283 */
284 void EstimateSecondaryModel(double H[9]);
285
286 /*!
287 *
288 */
289 void SelectOutliers();
290
291 char *profile_string;
292
293 protected:
294 void Clean();
295 void GenerateQuarterResImage(const unsigned char* const * im);
296
297 int m_im_width;
298 int m_im_height;
299
300 // RANSAC and refinement parameters:
301 int m_homography_type;
302 int m_max_iterations;
303 double m_scale;
304 int m_nr_samples;
305 int m_chunk_size;
306 double m_outlier_t2;
307
308 // Whether to fit a linear model to just the inliers at the end
309 bool m_linear_polish;
310 double m_polish_C[36];
311 double m_polish_D[6];
312
313 // local state
314 bool m_current_is_reference;
315 bool m_initialized;
316
317 // inspection to reference homography:
318 double m_H_ref_to_ins[9];
319 double m_H_dref_to_ref[9];
320
321 // feature extraction and matching:
322 db_CornerDetector_u m_cd;
323 db_Matcher_u m_cm;
324
325 // length of corner arrays:
326 unsigned long m_max_nr_corners;
327
328 // corner locations of reference image features:
329 double * m_x_corners_ref;
330 double * m_y_corners_ref;
331 int m_nr_corners_ref;
332
333 // corner locations of inspection image features:
334 double * m_x_corners_ins;
335 double * m_y_corners_ins;
336 int m_nr_corners_ins;
337
338 // length of match index arrays:
339 unsigned long m_max_nr_matches;
340
341 // match indices:
342 int * m_match_index_ref;
343 int * m_match_index_ins;
344 int m_nr_matches;
345
346 // pointer to internal copy of the reference image:
347 unsigned char ** m_reference_image;
348
349 // pointer to internal copy of last aligned inspection image:
350 unsigned char ** m_aligned_ins_image;
351
352 // pointer to quarter resolution image, if used.
353 unsigned char** m_quarter_res_image;
354
355 // temporary storage for the quarter resolution image processing
356 unsigned char** m_horz_smooth_subsample_image;
357
358 // temporary space for homography computation:
359 double * m_temp_double;
360 int * m_temp_int;
361
362 // homogenous image point arrays:
363 double * m_corners_ref;
364 double * m_corners_ins;
365
366 // Indices of the points within the match lists
367 int * m_inlier_indices;
368 int m_num_inlier_indices;
369
370 //void ComputeInliers(double H[9], std::vector<int> &inlier_indices);
371 void ComputeInliers(double H[9]);
372
373 // cost arrays:
374 void ComputeCostArray();
375 bool m_sq_cost_computed;
376 double * m_sq_cost;
377
378 // cost histogram:
379 void ComputeCostHistogram();
380 int *m_cost_histogram;
381
382 void SetOutlierThreshold();
383
384 // utility function for smoothing the motion parameters.
385 void SmoothMotion(void);
386
387 private:
388 double m_K[9];
389 const int m_over_allocation;
390
391 bool m_reference_set;
392
393 // Maximum number of inliers seen until now w.r.t the current reference frame
394 int m_max_inlier_count;
395
396 // Number of cost histogram bins:
397 int m_nr_bins;
398 // All costs above this threshold get put into the last bin:
399 int m_max_cost_pix;
400
401 // whether to quarter the image resolution for processing, or not
402 bool m_quarter_resolution;
403
404 // the period (in number of frames) for reference update.
405 unsigned int m_reference_update_period;
406
407 // the number of frames processed so far.
408 unsigned int m_nr_frames_processed;
409
410 // smoother for motion transformations
411 db_StabilizationSmoother m_stab_smoother;
412
413 // boolean to control whether motion smoothing occurs (or not)
414 bool m_do_motion_smoothing;
415
416 // double to set the gain for motion smoothing
417 double m_motion_smoothing_gain;
418 };
419 /*!
420 Create look-up tables to undistort images. Only Bougeut (Matlab toolkit)
421 is currently supported. Can be used with db_WarpImageLut_u().
422 \code
423 xd = H*xs;
424 xd = xd/xd(3);
425 \endcode
426 \param lut_x pre-allocated float image
427 \param lut_y pre-allocated float image
428 \param w width
429 \param h height
430 \param H image homography from source to destination
431 */
db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9])432 inline void db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9])
433 {
434 assert(lut_x && lut_y);
435 double x[3] = {0.0,0.0,1.0};
436 double xb[3];
437
438 /*
439 double xl[3];
440
441 // Determine the output coordinate system ROI
442 double Hinv[9];
443 db_InvertAffineTransform(Hinv,H);
444 db_Multiply3x3_3x1(xl, Hinv, x);
445 xl[0] = db_SafeDivision(xl[0],xl[2]);
446 xl[1] = db_SafeDivision(xl[1],xl[2]);
447 */
448
449 for ( int i = 0; i < w; ++i )
450 for ( int j = 0; j < h; ++j )
451 {
452 x[0] = double(i);
453 x[1] = double(j);
454 db_Multiply3x3_3x1(xb, H, x);
455 xb[0] = db_SafeDivision(xb[0],xb[2]);
456 xb[1] = db_SafeDivision(xb[1],xb[2]);
457
458 lut_x[j][i] = float(xb[0]);
459 lut_y[j][i] = float(xb[1]);
460 }
461 }
462
463 /*!
464 * Perform a look-up table warp for packed RGB ([rgbrgbrgb...]) images.
465 * The LUTs must be float images of the same size as source image.
466 * The source value x_s is determined from destination (x_d,y_d) through lut_x
467 * and y_s is determined from lut_y:
468 \code
469 x_s = lut_x[y_d][x_d];
470 y_s = lut_y[y_d][x_d];
471 \endcode
472
473 * \param src source image (w*3 by h)
474 * \param dst destination image (w*3 by h)
475 * \param w width
476 * \param h height
477 * \param lut_x LUT for x
478 * \param lut_y LUT for y
479 */
db_WarpImageLutFast_rgb(const unsigned char * const * src,unsigned char ** dst,int w,int h,const float * const * lut_x,const float * const * lut_y)480 inline void db_WarpImageLutFast_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
481 const float * const * lut_x, const float * const * lut_y)
482 {
483 assert(src && dst);
484 int xd=0, yd=0;
485
486 for ( int i = 0; i < w; ++i )
487 for ( int j = 0; j < h; ++j )
488 {
489 xd = static_cast<unsigned int>(lut_x[j][i]);
490 yd = static_cast<unsigned int>(lut_y[j][i]);
491 if ( xd >= w || yd >= h ||
492 xd < 0 || yd < 0)
493 {
494 dst[j][3*i ] = 0;
495 dst[j][3*i+1] = 0;
496 dst[j][3*i+2] = 0;
497 }
498 else
499 {
500 dst[j][3*i ] = src[yd][3*xd ];
501 dst[j][3*i+1] = src[yd][3*xd+1];
502 dst[j][3*i+2] = src[yd][3*xd+2];
503 }
504 }
505 }
506
db_BilinearInterpolationRGB(double y,double x,const unsigned char * const * v,int offset)507 inline unsigned char db_BilinearInterpolationRGB(double y, double x, const unsigned char * const * v, int offset)
508 {
509 int floor_x=(int) x;
510 int floor_y=(int) y;
511
512 int ceil_x=floor_x+1;
513 int ceil_y=floor_y+1;
514
515 unsigned char f00 = v[floor_y][3*floor_x+offset];
516 unsigned char f01 = v[floor_y][3*ceil_x+offset];
517 unsigned char f10 = v[ceil_y][3*floor_x+offset];
518 unsigned char f11 = v[ceil_y][3*ceil_x+offset];
519
520 double xl = x-floor_x;
521 double yl = y-floor_y;
522
523 return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
524 }
525
db_WarpImageLutBilinear_rgb(const unsigned char * const * src,unsigned char ** dst,int w,int h,const float * const * lut_x,const float * const * lut_y)526 inline void db_WarpImageLutBilinear_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
527 const float * const * lut_x, const float * const * lut_y)
528 {
529 assert(src && dst);
530 double xd=0.0, yd=0.0;
531
532 for ( int i = 0; i < w; ++i )
533 for ( int j = 0; j < h; ++j )
534 {
535 xd = static_cast<double>(lut_x[j][i]);
536 yd = static_cast<double>(lut_y[j][i]);
537 if ( xd > w-2 || yd > h-2 ||
538 xd < 0.0 || yd < 0.0)
539 {
540 dst[j][3*i ] = 0;
541 dst[j][3*i+1] = 0;
542 dst[j][3*i+2] = 0;
543 }
544 else
545 {
546 dst[j][3*i ] = db_BilinearInterpolationRGB(yd,xd,src,0);
547 dst[j][3*i+1] = db_BilinearInterpolationRGB(yd,xd,src,1);
548 dst[j][3*i+2] = db_BilinearInterpolationRGB(yd,xd,src,2);
549 }
550 }
551 }
552
SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3])553 inline double SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3]){
554 double x0,x1,x2,mult;
555 double sd;
556
557 x0=H[0]*x[0]+H[1]*x[1]+H[2];
558 x1=H[3]*x[0]+H[4]*x[1]+H[5];
559 x2=H[6]*x[0]+H[7]*x[1]+H[8];
560 mult=1.0/((x2!=0.0)?x2:1.0);
561 sd=(y[0]-x0*mult)*(y[0]-x0*mult)+(y[1]-x1*mult)*(y[1]-x1*mult);
562
563 return(sd);
564 }
565
566
567 // functions related to profiling
568 #if PROFILE
569
570 /* return current time in milliseconds */
571 static double
now_ms(void)572 now_ms(void)
573 {
574 //struct timespec res;
575 struct timeval res;
576 //clock_gettime(CLOCK_REALTIME, &res);
577 gettimeofday(&res, NULL);
578 return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
579 }
580
581 #endif
582