Difference between revisions of "Example Multiview Scene Reconstruction"

From BoofCV
Jump to navigationJump to search
m
 
(12 intermediate revisions by the same user not shown)
Line 5: Line 5:
</center>
</center>


Example showing how multiple views from a calibrated camera can be combined together to estimate the camera location and the 3D location of interest points.  As discussed in the source code this is still a work in progress and the output is very noisy still.
Example showing how multiple views from a calibrated camera can be combined together to estimate the camera location and the 3D location of interest points.  '''As discussed in the source code this is still a work in progress and the output is very noisy still.'''


Example Code:
Example Code:
* [https://github.com/lessthanoptimal/BoofCV/blob/v0.17/examples/src/boofcv/examples/sfm/ExampleStructureFromMotion.java ExampleStructureFromMotion.java]
* [https://github.com/lessthanoptimal/BoofCV/blob/v0.36/examples/src/main/java/boofcv/examples/sfm/ExampleMultiviewSceneReconstruction.java ExampleMultiviewSceneReconstruction.java]


Concepts:
Concepts:
Line 14: Line 14:
* Multiple Views
* Multiple Views
* Single camera
* Single camera
* calibrated camera
* Calibrated camera
 
* [[Example_Sparse_Bundle_Adjustment|Sparse Bundle Adjustment]]
Relevant Applets:
* [[Example_Stereo_Single_Camera| Stereo Single Camera]]


= Example Code =
= Example Code =
Line 27: Line 25:
  * structure from motion to understand.  In other words, this is not for beginners and requires good clean set of
  * structure from motion to understand.  In other words, this is not for beginners and requires good clean set of
  * images to work.
  * images to work.
*
* One key element it is missing is bundle adjustment to improve the estimated camera location and 3D points.  The
* current bundle adjustment in BoofCV is too inefficient.  Better noise removal and numerous other improvements
* are needed before it can compete with commercial equivalents.
  *
  *
  * @author Peter Abeles
  * @author Peter Abeles
  */
  */
public class ExampleStructureFromMotion {
public class ExampleMultiviewSceneReconstruction {
 
// Converts a point from pixel to normalized image coordinates
PointTransform_F64 pixelToNorm;
 
// ratio of matching features to unmatched features for two images to be considered connected
double connectThreshold = 0.3;
 
// tolerance for inliers in pixels
double inlierTol = 1.5;
 
// Detects and describes image interest points
DetectDescribePoint<ImageFloat32, SurfFeature> detDesc = FactoryDetectDescribe.surfStable(null, null, null, ImageFloat32.class);
// score ans association algorithm
ScoreAssociation<SurfFeature> scorer = FactoryAssociation.scoreEuclidean(SurfFeature.class, true);
AssociateDescription<SurfFeature> associate = FactoryAssociation.greedy(scorer, 1, true);
 
// Triangulates the 3D coordinate of a point from two observations
TriangulateTwoViewsCalibrated triangulate = FactoryTriangulate.twoGeometric();
 
// List of visual features (e.g. SURF) descriptions in each image
List<FastQueue<SurfFeature>> imageVisualFeatures = new ArrayList<FastQueue<SurfFeature>>();
// List of visual feature locations as normalized image coordinates in each image
List<FastQueue<Point2D_F64>> imagePixels = new ArrayList<FastQueue<Point2D_F64>>();
// Color of the pixel at each feature location
List<GrowQueue_I32> imageColors = new ArrayList<GrowQueue_I32>();
// List of 3D features in each image
List<List<Feature3D>> imageFeature3D = new ArrayList<List<Feature3D>>();
 
// Transform from world to each camera image
Se3_F64 motionWorldToCamera[];
 
// indicates if an image has had its motion estimated yet
boolean estimatedImage[];
// if true the image has been processed.  Estimation could have failed. so this can be true but estimated false
boolean processedImage[];
 
// List of all 3D features
List<Feature3D> featuresAll = new ArrayList<Feature3D>();
 
// used to provide initial estimate of the 3D scene
ModelMatcher<Se3_F64, AssociatedPair> estimateEssential;
ModelMatcher<Se3_F64, Point2D3D> estimatePnP;
ModelFitter<Se3_F64, Point2D3D> refinePnP = FactoryMultiView.refinePnP(1e-12,40);
 
/**
/**
* Process the images and reconstructor the scene as a point cloud using matching interest points between
* Process the images and reconstructor the scene as a point cloud using matching interest points between
* images.
* images.
*/
*/
public void process(IntrinsicParameters intrinsic , List<BufferedImage> colorImages ) {
public void process(CameraPinholeBrown intrinsic , List<BufferedImage> colorImages ) {
 
pixelToNorm = LensDistortionOps.transformRadialToNorm_F64(intrinsic);
 
setupEssential(intrinsic);
setupPnP(intrinsic);
 
// find features in each image
detectImageFeatures(colorImages);
 
// see which images are the most similar to each o ther
double[][] matrix = computeConnections();


printConnectionMatrix(matrix);
DetectDescribePoint detDesc = FactoryDetectDescribe.surfStable(null, null, null, GrayF32.class);
ScoreAssociation scorer = FactoryAssociation.defaultScore(detDesc.getDescriptionType());
AssociateDescription<TupleDesc> associate =
FactoryAssociation.greedy(new ConfigAssociateGreedy(true),scorer);
PairwiseImageMatching<GrayF32> imageMatching = new PairwiseImageMatching<>(detDesc,associate);
imageMatching.setVerbose(System.out,0);


// find the image which is connected to the most other images.  Use that as the origin of the arbitrary
String cameraName = "camera";
// coordinate system
imageMatching.addCamera(cameraName, LensDistortionFactory.narrow(intrinsic).undistort_F64(true,false),intrinsic);
int bestImage = selectMostConnectFrame(colorImages, matrix);


// Use two images to initialize the scene reconstruction
initializeReconstruction(colorImages, matrix, bestImage);
// Process rest of the images and compute 3D coordinates
List<Integer> seed = new ArrayList<Integer>();
seed.add(bestImage);
performReconstruction(seed, -1, matrix);
// Bundle adjustment would normally be done at this point, but has been omitted since the current
// implementation is too slow for a large number of points
// display a point cloud from the 3D features
PointCloudViewer gui = new PointCloudViewer(intrinsic,1);
for( Feature3D t : featuresAll) {
gui.addPoint(t.worldPt.x,t.worldPt.y,t.worldPt.z,t.color);
}
gui.setPreferredSize(new Dimension(500,500));
ShowImages.showWindow(gui, "Points");
}
/**
* Initialize the reconstruction by finding the image which is most similar to the "best" image.  Estimate
* its pose up to a scale factor and create the initial set of 3D features
*/
private void initializeReconstruction(List<BufferedImage> colorImages, double[][] matrix, int bestImage) {
// Set all images, but the best one, as not having been estimated yet
estimatedImage = new boolean[colorImages.size()];
processedImage = new boolean[colorImages.size()];
estimatedImage[bestImage] = true;
processedImage[bestImage] = true;
// declare stored for found motion of each image
motionWorldToCamera = new Se3_F64[colorImages.size()];
for (int i = 0; i < colorImages.size(); i++) {
motionWorldToCamera[i] = new Se3_F64();
imageFeature3D.add(new ArrayList<Feature3D>());
}
// pick the image most similar to the original image to initialize pose estimation
int firstChild = findBestFit(matrix, bestImage);
initialize(bestImage, firstChild);
}
/**
* Select the frame which has the most connections to all other frames.  The is probably a good location
* to start since it will require fewer hops to estimate the motion of other frames
*/
private int selectMostConnectFrame(List<BufferedImage> colorImages, double[][] matrix) {
int bestImage = -1;
int bestCount = 0;
for (int i = 0; i < colorImages.size(); i++) {
for (int i = 0; i < colorImages.size(); i++) {
int count = 0;
for (int j = 0; j < colorImages.size(); j++) {
if( matrix[i][j] > connectThreshold ) {
count++;
}
}
System.out.println(i+"  count "+count);
if( count > bestCount ) {
bestCount = count;
bestImage = i;
}
}
return bestImage;
}
/**
* Detect image features in all the images.  Save location, description, and color
*/
private void detectImageFeatures(List<BufferedImage> colorImages) {
System.out.println("Detecting Features in each image.  Total "+colorImages.size());
for (int i = 0; i < colorImages.size(); i++) {
System.out.print("*");
BufferedImage colorImage = colorImages.get(i);
BufferedImage colorImage = colorImages.get(i);
FastQueue<SurfFeature> features = new SurfFeatureQueue(64);
if( colorImage.getWidth() != intrinsic.width || colorImage.getHeight() != intrinsic.height )
FastQueue<Point2D_F64> pixels = new FastQueue<Point2D_F64>(Point2D_F64.class, true);
throw new RuntimeException("Looks like you tried to hack this example and run it on random images. Please RTFM");
GrowQueue_I32 colors = new GrowQueue_I32();
GrayF32 image = ConvertBufferedImage.convertFrom(colorImage, (GrayF32) null);
detectFeatures(colorImage, features, pixels, colors);
imageMatching.addImage(image,cameraName);
 
imageVisualFeatures.add(features);
imagePixels.add(pixels);
imageColors.add(colors);
}
System.out.println();
}
 
/**
* Compute connectivity matrix based on fraction of matching image features
*/
private double[][] computeConnections() {
double matrix[][] = new double[imageVisualFeatures.size()][imageVisualFeatures.size()];
 
for (int i = 0; i < imageVisualFeatures.size(); i++) {
for (int j = i+1; j < imageVisualFeatures.size(); j++) {
System.out.printf("Associated %02d %02d ",i,j);
associate.setSource(imageVisualFeatures.get(i));
associate.setDestination(imageVisualFeatures.get(j));
associate.associate();
 
matrix[i][j] = associate.getMatches().size()/(double) imageVisualFeatures.get(i).size();
matrix[j][i] = associate.getMatches().size()/(double) imageVisualFeatures.get(j).size();
 
System.out.println(" = "+matrix[i][j]);
}
}
return matrix;
}
 
/**
* Prints out which frames are connected to each other
*/
private void printConnectionMatrix( double[][] matrix) {
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix.length; j++) {
if( matrix[i][j] >= connectThreshold )
System.out.print("#");
else
System.out.print(".");
}
System.out.println();
}
}
 
/**
* Configure robust algorithm for estimating pose from 3D features
*/
private void setupPnP( IntrinsicParameters intrinsic ) {
Estimate1ofPnP estimator = FactoryMultiView.computePnP_1(EnumPNP.P3P_FINSTERWALDER, -1, 2);
final DistanceModelMonoPixels<Se3_F64,Point2D3D> distance = new PnPDistanceReprojectionSq();
distance.setIntrinsic(intrinsic.fx, intrinsic.fy, intrinsic.skew);
 
ModelManagerSe3_F64 manager = new ModelManagerSe3_F64();
EstimatorToGenerator<Se3_F64,Point2D3D> generator = new EstimatorToGenerator<Se3_F64,Point2D3D>(estimator);
 
// 1/2 a pixel tolerance for RANSAC inliers
double ransacTOL = inlierTol * inlierTol;
 
estimatePnP = new Ransac<Se3_F64, Point2D3D>(2323, manager, generator, distance, 4000, ransacTOL);
}
 
/**
* Configure robust algorithm for estimating pose from essential matrix
*/
private void setupEssential( IntrinsicParameters intrinsic ) {
// Since this is the first frame estimate the camera motion up to a translational scale factor
Estimate1ofEpipolar essentialAlg = FactoryMultiView.computeFundamental_1(EnumEpipolar.ESSENTIAL_5_NISTER, 5);
 
ModelManager<Se3_F64> manager = new ModelManagerSe3_F64();
ModelGenerator<Se3_F64, AssociatedPair> generateEpipolarMotion =
new Se3FromEssentialGenerator(essentialAlg, triangulate);
 
DistanceFromModel<Se3_F64, AssociatedPair> distanceSe3 =
new DistanceSe3SymmetricSq(triangulate,
intrinsic.fx, intrinsic.fy, intrinsic.skew,
intrinsic.fx, intrinsic.fy, intrinsic.skew);
 
// tolerance for RANSAC inliers
double ransacTOL = inlierTol * inlierTol * 2.0;
 
estimateEssential = new Ransac<Se3_F64, AssociatedPair>(2323, manager, generateEpipolarMotion, distanceSe3,
4000, ransacTOL);
}
 
/**
* Detects image features.  Saves their location, description, and pixel color
*/
private void detectFeatures(BufferedImage colorImage,
FastQueue<SurfFeature> features, FastQueue<Point2D_F64> pixels,
GrowQueue_I32 colors ) {
 
ImageFloat32 image = ConvertBufferedImage.convertFrom(colorImage, (ImageFloat32) null);
 
features.reset();
pixels.reset();
colors.reset();
detDesc.detect(image);
for (int i = 0; i < detDesc.getNumberOfFeatures(); i++) {
Point2D_F64 p = detDesc.getLocation(i);
 
features.grow().set(detDesc.getDescription(i));
// store pixels are normalized image coordinates
pixelToNorm.compute(p.x, p.y, pixels.grow());
 
colors.add( colorImage.getRGB((int)p.x,(int)p.y) );
}
}
}


/**
if( !imageMatching.process() ) {
* Finds the frame which is the best match for the given target frame
throw new RuntimeException("Failed to match images! total="+colorImages.size());
*/
private int findBestFit( double matrix[][] , int target ) {
 
// find the image which is the closest fit
int bestIndex = -1;
double bestRatio = 0;
 
for (int i = 0; i < estimatedImage.length; i++) {
double ratio = matrix[target][i];
if( ratio > bestRatio ) {
bestRatio = ratio;
bestIndex = i;
}
}
}


return bestIndex;
EstimateSceneCalibrated estimateScene = new EstimateSceneCalibrated();
}
estimateScene.setVerbose(System.out,0);


/**
if( !estimateScene.process(imageMatching.getGraph()))
* Initialize the 3D world given these two images. imageA is assumed to be the origin of the world.
throw new RuntimeException("Scene estimation failed");
*/
private void initialize( int imageA , int imageB ) {
System.out.println("Initializing 3D world using "+imageA+" and "+imageB);


// Compute the 3D pose and find valid image features
// get the results
Se3_F64 motionAtoB = new Se3_F64();
SceneStructureMetric structure = estimateScene.getSceneStructure();
List<AssociatedIndex> inliers = new ArrayList<AssociatedIndex>();
SceneObservations observations = estimateScene.getObservations();


if( !estimateStereoPose(imageA, imageB, motionAtoB, inliers))
// Configure bundle adjustment
throw new RuntimeException("The first image pair is a bad keyframe!");
ConfigLevenbergMarquardt configLM = new ConfigLevenbergMarquardt();
configLM.dampeningInitial = 1e-12;
configLM.hessianScaling = true;
ConfigBundleAdjustment configSBA = new ConfigBundleAdjustment();
configSBA.configOptimizer = configLM;


motionWorldToCamera[imageB].set(motionAtoB);
BundleAdjustment<SceneStructureMetric> sba = FactoryMultiView.bundleSparseMetric(configSBA);
estimatedImage[imageB] = true;
sba.configure(1e-10,1e-10,100);
processedImage[imageB] = true;
sba.setVerbose(System.out,0);


// create tracks for only those features in the inlier list
// Scale to improve numerical accuracy
FastQueue<Point2D_F64> pixelsA = imagePixels.get(imageA);
ScaleSceneStructure bundleScale = new ScaleSceneStructure();
FastQueue<Point2D_F64> pixelsB = imagePixels.get(imageB);
List<Feature3D> tracksA = imageFeature3D.get(imageA);
List<Feature3D> tracksB = imageFeature3D.get(imageB);


GrowQueue_I32 colorsA = imageColors.get(imageA);
PruneStructureFromSceneMetric pruner = new PruneStructureFromSceneMetric(structure,observations);


for (int i = 0; i < inliers.size(); i++) {
// Requiring 3 views per point reduces the number of outliers by a lot but also removes
AssociatedIndex a = inliers.get(i);
// many valid points
pruner.prunePoints(3);


Feature3D t = new Feature3D();
// Optimize the results
t.color = colorsA.get(a.src);
int pruneCycles=5;
t.obs.grow().set( pixelsA.get(a.src) );
for (int i = 0; i < pruneCycles; i++) {
t.obs.grow().set( pixelsB.get(a.dst) );
System.out.println("BA + Prune iteration = "+i+"  points="+structure.points.size+"  obs="+observations.getObservationCount());
t.frame.add(imageA);
bundleScale.applyScale(structure,observations);
t.frame.add(imageB);
sba.setParameters(structure,observations);
// compute the 3D coordinate of the feature
if( !sba.optimize(structure) ) {
Point2D_F64 pa = pixelsA.get(a.src);
throw new RuntimeException("Bundle adjustment failed!");
Point2D_F64 pb = pixelsB.get(a.dst);
 
if( !triangulate.triangulate(pa, pb, motionAtoB, t.worldPt) )
continue;
// the feature has to be in front of the camera
if (t.worldPt.z > 0) {
featuresAll.add(t);
tracksA.add(t);
tracksB.add(t);
}
}
}
// adjust the scale so that it's not excessively large or small
normalizeScale(motionWorldToCamera[imageB],tracksA);
}
/**
* Perform a breadth first search through connection graph until the motion to all images has been found
*/
private void performReconstruction(List<Integer> parents, int childAdd, double matrix[][]) {


System.out.println("--------- Total Parents "+parents.size());
bundleScale.undoScale(structure,observations);


List<Integer> children = new ArrayList<Integer>();
if( i == pruneCycles-1 )
break;


if( childAdd != -1 ) {
System.out.println("Pruning....");
children.add(childAdd);
pruner.pruneObservationsByErrorRank(0.97);  // Prune outlier observations
pruner.prunePoints(3,0.4);            // Prune stray points in 3D space
pruner.prunePoints(2);                          // Prune invalid points
pruner.pruneViews(10);                           // Prune views with too few observations
}
}


for( int parent : parents ) {
visualizeResults(structure,colorImages);
for (int i = 0; i < estimatedImage.length; i++) {
System.out.println("Done!");
// see if it is connected to the target and has not had its motion estimated
if( matrix[parent][i] > connectThreshold && !processedImage[i] ) {
estimateMotionPnP(parent,i);
children.add(i);
}
}
}
 
if( !children.isEmpty() )
performReconstruction(children, -1, matrix);
}
}


/**
/**
* Estimate the motion between two images. Image A is assumed to have known features with 3D coordinates already
* Opens a window showing the found point cloud. Points are colorized using the pixel value inside
* and image B is an unprocessed image with no 3D features yet.
* one of the input images
*/
*/
private void estimateMotionPnP( int imageA , int imageB ) {
private void visualizeResults( SceneStructureMetric structure,
// Mark image B as processed so that it isn't processed a second time.
  List<BufferedImage> colorImages ) {
processedImage[imageB] = true;


System.out.println("Estimating PnP motion between "+imageA+" and "+imageB);
List<Point3D_F64> cloudXyz = new ArrayList<>();
GrowQueue_I32 cloudRgb = new GrowQueue_I32();
Point3D_F64 world = new Point3D_F64();
Point3D_F64 camera = new Point3D_F64();
Point2D_F64 pixel = new Point2D_F64();
for( int i = 0; i < structure.points.size; i++ ) {
// Get 3D location
SceneStructureMetric.Point p = structure.points.get(i);
p.get(world);


// initially prune features using essential matrix
// Project point into an arbitrary view
Se3_F64 dummy = new Se3_F64();
for (int j = 0; j < p.views.size; j++) {
List<AssociatedIndex> inliers = new ArrayList<AssociatedIndex>();
int viewIdx  = p.views.get(j);
SePointOps_F64.transform(structure.views.data[viewIdx].worldToView,world,camera);
int cameraIdx = structure.views.data[viewIdx].camera;
structure.cameras.get(cameraIdx).model.project(camera.x,camera.y,camera.z,pixel);


if( !estimateStereoPose(imageA, imageB, dummy, inliers))
// Get the points color
throw new RuntimeException("The first image pair is a bad keyframe!");
BufferedImage image = colorImages.get(viewIdx);
int x = (int)pixel.x;
int y = (int)pixel.y;


FastQueue<Point2D_F64> pixelsA = imagePixels.get(imageA);
// After optimization it might have been moved out of the camera's original FOV.
FastQueue<Point2D_F64> pixelsB = imagePixels.get(imageB);
// hopefully this isn't too common
List<Feature3D> featuresA = imageFeature3D.get(imageA);
if( x < 0 || y < 0 || x >= image.getWidth() || y >= image.getHeight() )
List<Feature3D> featuresB = imageFeature3D.get(imageB); // this should be empty
continue;
 
cloudXyz.add( world.copy() );
// create the associated pair for motion estimation
cloudRgb.add(image.getRGB((int)pixel.x,(int)pixel.y));
List<Point2D3D> features = new ArrayList<Point2D3D>();
break;
List<AssociatedIndex> inputRansac = new ArrayList<AssociatedIndex>();
List<AssociatedIndex> unmatched = new ArrayList<AssociatedIndex>();
for (int i = 0; i < inliers.size(); i++) {
AssociatedIndex a = inliers.get(i);
Feature3D t = lookupFeature(featuresA, imageA, pixelsA.get(a.src));
if( t != null ) {
Point2D_F64 p = pixelsB.get(a.dst);
features.add(new Point2D3D(p, t.worldPt));
inputRansac.add(a);
} else {
unmatched.add(a);
}
}
 
// make sure there are enough features to estimate motion
if( features.size() < 15 ) {
System.out.println("  Too few features for PnP!!  "+features.size());
return;
}
 
// estimate the motion between the two images
if( !estimatePnP.process(features))
throw new RuntimeException("Motion estimation failed");
 
// refine the motion estimate using non-linear optimization
Se3_F64 motionWorldToB = new Se3_F64();
if( !refinePnP.fitModel(estimatePnP.getMatchSet(), estimatePnP.getModelParameters(), motionWorldToB) )
throw new RuntimeException("Refine failed!?!?");
 
motionWorldToCamera[imageB].set(motionWorldToB);
estimatedImage[imageB] = true;
 
// Add all tracks in the inlier list to the B's list of 3D features
int N = estimatePnP.getMatchSet().size();
boolean inlierPnP[] = new boolean[features.size()];
for (int i = 0; i < N; i++) {
int index = estimatePnP.getInputIndex(i);
AssociatedIndex a = inputRansac.get(index);
 
// find the track that this was associated with and add it to B
Feature3D t = lookupFeature(featuresA, imageA, pixelsA.get(a.src));
featuresB.add(t);
t.frame.add(imageB);
t.obs.grow().set( pixelsB.get(a.dst) );
inlierPnP[index] = true;
}
 
// Create new tracks for all features which were a member of essential matrix but not used to estimate
// the motion using PnP.
Se3_F64 motionBtoWorld = motionWorldToB.invert(null);
Se3_F64 motionWorldToA = motionWorldToCamera[imageA];
Se3_F64 motionBtoA =  motionBtoWorld.concat(motionWorldToA, null);
Point3D_F64 pt_in_b = new Point3D_F64();
 
int totalAdded = 0;
GrowQueue_I32 colorsA = imageColors.get(imageA);
for( AssociatedIndex a : unmatched ) {
 
if( !triangulate.triangulate(pixelsB.get(a.dst),pixelsA.get(a.src),motionBtoA,pt_in_b) )
continue;
 
// the feature has to be in front of the camera
if( pt_in_b.z > 0 ) {
Feature3D t = new Feature3D();
 
// transform from B back to world frame
SePointOps_F64.transform(motionBtoWorld, pt_in_b, t.worldPt);
 
t.color = colorsA.get(a.src);
t.obs.grow().set( pixelsA.get(a.src) );
t.obs.grow().set( pixelsB.get(a.dst) );
t.frame.add(imageA);
t.frame.add(imageB);
 
featuresAll.add(t);
featuresA.add(t);
featuresB.add(t);
 
totalAdded++;
}
}
}
}


// create new tracks for existing tracks which were not in the inlier set. Maybe things will work
PointCloudViewer viewer = VisualizeData.createPointCloudViewer();
// out better if the 3D coordinate is re-triangulated as a new feature
viewer.setTranslationStep(0.05);
for (int i = 0; i < features.size(); i++) {
viewer.addCloud(cloudXyz,cloudRgb.data);
if( inlierPnP[i] )
viewer.setCameraHFov(UtilAngle.radian(60));
continue;


AssociatedIndex a = inputRansac.get(i);
SwingUtilities.invokeLater(()->{
viewer.getComponent().setPreferredSize(new Dimension(500,500));
ShowImages.showWindow(viewer.getComponent(), "Reconstruction Points", true);
});


if( !triangulate.triangulate(pixelsB.get(a.dst),pixelsA.get(a.src),motionBtoA,pt_in_b) )
continue;
// the feature has to be in front of the camera
if( pt_in_b.z > 0 ) {
Feature3D t = new Feature3D();
// transform from B back to world frame
SePointOps_F64.transform(motionBtoWorld, pt_in_b, t.worldPt);
// only add this feature to image B since a similar one already exists in A.
t.color = colorsA.get(a.src);
t.obs.grow().set(pixelsB.get(a.dst));
t.frame.add(imageB);
featuresAll.add(t);
featuresB.add(t);
totalAdded++;
}
}
System.out.println("  New added " + totalAdded + "  tracksA.size = " + featuresA.size() + "  tracksB.size = " + featuresB.size());
}
}


/**
public static void main(String[] args) {
* Given a list of 3D features, find the feature which was observed at the specified frame at the
* specified location.  If no feature is found return null.
*/
private Feature3D lookupFeature(List<Feature3D> features, int frameIndex, Point2D_F64 pixel) {
for (int i = 0; i < features.size(); i++) {
Feature3D t = features.get(i);
for (int j = 0; j < t.frame.size(); j++) {
if( t.frame.get(j) == frameIndex ) {
Point2D_F64 o = t.obs.get(j);
if( o.x == pixel.x && o.y == pixel.y ) {
return t;
} else {
break;
}
}
}
}
return null;
}
 
/**
* Given two images compute the relative location of each image using the essential matrix.
*/
protected boolean estimateStereoPose(int imageA, int imageB, Se3_F64 motionAtoB,
List<AssociatedIndex> inliers)
{
// associate the features together
associate.setSource(imageVisualFeatures.get(imageA));
associate.setDestination(imageVisualFeatures.get(imageB));
associate.associate();


FastQueue<AssociatedIndex> matches = associate.getMatches();
String directory = UtilIO.pathExample("sfm/chair");


// create the associated pair for motion estimation
CameraPinholeBrown intrinsic = CalibrationIO.load(
FastQueue<Point2D_F64> pixelsA = imagePixels.get(imageA);
new File(directory,"/intrinsic_DSC-HX5_3648x2736_to_640x480.yaml"));
FastQueue<Point2D_F64> pixelsB = imagePixels.get(imageB);
List<AssociatedPair> pairs = new ArrayList<AssociatedPair>();
for (int i = 0; i < matches.size(); i++) {
AssociatedIndex a = matches.get(i);
pairs.add(new AssociatedPair(pixelsA.get(a.src), pixelsB.get(a.dst)));
}


if( !estimateEssential.process(pairs) )
List<BufferedImage> images = UtilImageIO.loadImages(directory,".*jpg");
throw new RuntimeException("Motion estimation failed");
 
List<AssociatedPair> inliersEssential = estimateEssential.getMatchSet();
 
motionAtoB.set(estimateEssential.getModelParameters());
 
for (int i = 0; i < inliersEssential.size(); i++) {
int index = estimateEssential.getInputIndex(i);
 
inliers.add( matches.get(index));
}
 
return true;
}
 
/**
* Scale can only be estimated up to a scale factor.  Might as well set the distance to 1 since it is
* less likely to have overflow/underflow issues.  This step is not strictly necessary.
*/
public void normalizeScale( Se3_F64 transform , List<Feature3D> features) {
 
double T = transform.T.norm();
double scale = 1.0/T;
 
for( Se3_F64 m : motionWorldToCamera) {
m.T.scale(scale);
}


for( Feature3D t : features) {
int N = 8;
t.worldPt.scale(scale);
while( images.size() > N ) {
images.remove(N);
}
}
}
public static class Feature3D {
// color of the pixel first found int
int color;
// estimate 3D position of the feature
Point3D_F64 worldPt = new Point3D_F64();
// observations in each frame that it's visible
FastQueue<Point2D_F64> obs = new FastQueue<Point2D_F64>(Point2D_F64.class, true);
// index of each frame its visible in
GrowQueue_I32 frame = new GrowQueue_I32();
}
public static void main(String[] args) {
String directory = "../data/applet/sfm/chair";
IntrinsicParameters intrinsic = UtilIO.loadXML(directory+"/intrinsic_DSC-HX5_3648x2736_to_640x480.xml");
List<BufferedImage> images = UtilImageIO.loadImages(directory,".*jpg");


ExampleStructureFromMotion example = new ExampleStructureFromMotion();
ExampleMultiviewSceneReconstruction example = new ExampleMultiviewSceneReconstruction();


long before = System.currentTimeMillis();
long before = System.currentTimeMillis();

Latest revision as of 22:17, 17 May 2020

Example showing how multiple views from a calibrated camera can be combined together to estimate the camera location and the 3D location of interest points. As discussed in the source code this is still a work in progress and the output is very noisy still.

Example Code:

Concepts:

Example Code

/**
 * Demonstration on how to do 3D reconstruction from a set of unordered photos with known intrinsic camera calibration.
 * The code below is still a work in process and is very basic, but still require a solid understanding of
 * structure from motion to understand.  In other words, this is not for beginners and requires good clean set of
 * images to work.
 *
 * @author Peter Abeles
 */
public class ExampleMultiviewSceneReconstruction {
	/**
	 * Process the images and reconstructor the scene as a point cloud using matching interest points between
	 * images.
	 */
	public void process(CameraPinholeBrown intrinsic , List<BufferedImage> colorImages ) {

		DetectDescribePoint detDesc = FactoryDetectDescribe.surfStable(null, null, null, GrayF32.class);
		ScoreAssociation scorer = FactoryAssociation.defaultScore(detDesc.getDescriptionType());
		AssociateDescription<TupleDesc> associate =
				FactoryAssociation.greedy(new ConfigAssociateGreedy(true),scorer);
		PairwiseImageMatching<GrayF32> imageMatching = new PairwiseImageMatching<>(detDesc,associate);
		imageMatching.setVerbose(System.out,0);

		String cameraName = "camera";
		imageMatching.addCamera(cameraName, LensDistortionFactory.narrow(intrinsic).undistort_F64(true,false),intrinsic);

		for (int i = 0; i < colorImages.size(); i++) {
			BufferedImage colorImage = colorImages.get(i);
			if( colorImage.getWidth() != intrinsic.width || colorImage.getHeight() != intrinsic.height )
				throw new RuntimeException("Looks like you tried to hack this example and run it on random images. Please RTFM");
			GrayF32 image = ConvertBufferedImage.convertFrom(colorImage, (GrayF32) null);
			imageMatching.addImage(image,cameraName);
		}

		if( !imageMatching.process() ) {
			throw new RuntimeException("Failed to match images! total="+colorImages.size());
		}

		EstimateSceneCalibrated estimateScene = new EstimateSceneCalibrated();
		estimateScene.setVerbose(System.out,0);

		if( !estimateScene.process(imageMatching.getGraph()))
			throw new RuntimeException("Scene estimation failed");

		// get the results
		SceneStructureMetric structure = estimateScene.getSceneStructure();
		SceneObservations observations = estimateScene.getObservations();

		// Configure bundle adjustment
		ConfigLevenbergMarquardt configLM = new ConfigLevenbergMarquardt();
		configLM.dampeningInitial = 1e-12;
		configLM.hessianScaling = true;
		ConfigBundleAdjustment configSBA = new ConfigBundleAdjustment();
		configSBA.configOptimizer = configLM;

		BundleAdjustment<SceneStructureMetric> sba = FactoryMultiView.bundleSparseMetric(configSBA);
		sba.configure(1e-10,1e-10,100);
		sba.setVerbose(System.out,0);

		// Scale to improve numerical accuracy
		ScaleSceneStructure bundleScale = new ScaleSceneStructure();

		PruneStructureFromSceneMetric pruner = new PruneStructureFromSceneMetric(structure,observations);

		// Requiring 3 views per point reduces the number of outliers by a lot but also removes
		// many valid points
		pruner.prunePoints(3);

		// Optimize the results
		int pruneCycles=5;
		for (int i = 0; i < pruneCycles; i++) {
			System.out.println("BA + Prune iteration = "+i+"  points="+structure.points.size+"  obs="+observations.getObservationCount());
			bundleScale.applyScale(structure,observations);
			sba.setParameters(structure,observations);
			if( !sba.optimize(structure) ) {
				throw new RuntimeException("Bundle adjustment failed!");
			}

			bundleScale.undoScale(structure,observations);

			if( i == pruneCycles-1 )
				break;

			System.out.println("Pruning....");
			pruner.pruneObservationsByErrorRank(0.97);  // Prune outlier observations
			pruner.prunePoints(3,0.4);            // Prune stray points in 3D space
			pruner.prunePoints(2);                           // Prune invalid points
			pruner.pruneViews(10);                           // Prune views with too few observations
		}

		visualizeResults(structure,colorImages);
		System.out.println("Done!");
	}

	/**
	 * Opens a window showing the found point cloud. Points are colorized using the pixel value inside
	 * one of the input images
	 */
	private void visualizeResults( SceneStructureMetric structure,
								   List<BufferedImage> colorImages ) {

		List<Point3D_F64> cloudXyz = new ArrayList<>();
		GrowQueue_I32 cloudRgb = new GrowQueue_I32();
		Point3D_F64 world = new Point3D_F64();
		Point3D_F64 camera = new Point3D_F64();
		Point2D_F64 pixel = new Point2D_F64();
		for( int i = 0; i < structure.points.size; i++ ) {
			// Get 3D location
			SceneStructureMetric.Point p = structure.points.get(i);
			p.get(world);

			// Project point into an arbitrary view
			for (int j = 0; j < p.views.size; j++) {
				int viewIdx  = p.views.get(j);
				SePointOps_F64.transform(structure.views.data[viewIdx].worldToView,world,camera);
				int cameraIdx = structure.views.data[viewIdx].camera;
				structure.cameras.get(cameraIdx).model.project(camera.x,camera.y,camera.z,pixel);

				// Get the points color
				BufferedImage image = colorImages.get(viewIdx);
				int x = (int)pixel.x;
				int y = (int)pixel.y;

				// After optimization it might have been moved out of the camera's original FOV.
				// hopefully this isn't too common
				if( x < 0 || y < 0 || x >= image.getWidth() || y >= image.getHeight() )
					continue;
				cloudXyz.add( world.copy() );
				cloudRgb.add(image.getRGB((int)pixel.x,(int)pixel.y));
				break;
			}
		}

		PointCloudViewer viewer = VisualizeData.createPointCloudViewer();
		viewer.setTranslationStep(0.05);
		viewer.addCloud(cloudXyz,cloudRgb.data);
		viewer.setCameraHFov(UtilAngle.radian(60));

		SwingUtilities.invokeLater(()->{
			viewer.getComponent().setPreferredSize(new Dimension(500,500));
			ShowImages.showWindow(viewer.getComponent(), "Reconstruction Points", true);
		});

	}

	public static void main(String[] args) {

		String directory = UtilIO.pathExample("sfm/chair");

		CameraPinholeBrown intrinsic = CalibrationIO.load(
				new File(directory,"/intrinsic_DSC-HX5_3648x2736_to_640x480.yaml"));

		List<BufferedImage> images = UtilImageIO.loadImages(directory,".*jpg");

		int N = 8;
		while( images.size() > N ) {
			images.remove(N);
		}

		ExampleMultiviewSceneReconstruction example = new ExampleMultiviewSceneReconstruction();

		long before = System.currentTimeMillis();
		example.process(intrinsic,images);
		long after = System.currentTimeMillis();

		System.out.println("Elapsed time "+(after-before)/1000.0+" (s)");
	}
}