1
+ # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Helper utils for processing meshroom data into the nerfstudio format."""
16
+
17
+ import json
18
+ import math
19
+ from copy import deepcopy as dc
20
+ from pathlib import Path
21
+ from typing import Dict , List , Optional
22
+
23
+ import numpy as np
24
+
25
+ from nerfstudio .process_data .process_data_utils import CAMERA_MODELS
26
+ from nerfstudio .utils .rich_utils import CONSOLE
27
+
28
+ # Rotation matrix to adjust coordinate system
29
+ ROT_MAT = np .array ([[1 , 0 , 0 , 0 ],
30
+ [0 , 0 , 1 , 0 ],
31
+ [0 ,- 1 , 0 , 0 ],
32
+ [0 , 0 , 0 , 1 ]])
33
+
34
+ def reflect (axis , size = 4 ):
35
+ """Create a reflection matrix along the specified axis."""
36
+ _diag = np .ones (size )
37
+ _diag [axis ] = - 1
38
+ refl = np .diag (_diag )
39
+ return refl
40
+
41
+ def Mat2Nerf (mat ):
42
+ """Convert a matrix to NeRF coordinate system."""
43
+ M = np .array (mat )
44
+ M = ((M @ reflect (2 )) @ reflect (1 ))
45
+ return M
46
+
47
+ def closest_point_2_lines (oa , da , ob , db ):
48
+ """Find the point closest to both rays of form o+t*d."""
49
+ da = da / np .linalg .norm (da )
50
+ db = db / np .linalg .norm (db )
51
+ c = np .cross (da , db )
52
+ denom = np .linalg .norm (c )** 2
53
+ t = ob - oa
54
+ ta = np .linalg .det ([t , db , c ]) / (denom + 1e-10 )
55
+ tb = np .linalg .det ([t , da , c ]) / (denom + 1e-10 )
56
+ if ta > 0 :
57
+ ta = 0
58
+ if tb > 0 :
59
+ tb = 0
60
+ return (oa + ta * da + ob + tb * db ) * 0.5 , denom
61
+
62
+ def central_point (out ):
63
+ """Find a central point all cameras are looking at."""
64
+ CONSOLE .print ("Computing center of attention..." )
65
+ totw = 0.0
66
+ totp = np .array ([0.0 , 0.0 , 0.0 ])
67
+ for f in out ["frames" ]:
68
+ mf = np .array (f ["transform_matrix" ])[0 :3 ,:]
69
+ for g in out ["frames" ]:
70
+ mg = np .array (g ["transform_matrix" ])[0 :3 ,:]
71
+ p , w = closest_point_2_lines (mf [:,3 ], mf [:,2 ], mg [:,3 ], mg [:,2 ])
72
+ if w > 0.01 :
73
+ totp += p * w
74
+ totw += w
75
+
76
+ if len (out ["frames" ]) == 0 :
77
+ CONSOLE .print ("[bold red]No frames found when computing center of attention[/bold red]" )
78
+ return totp
79
+
80
+ if (totw == 0 ) and (not totp .any ()):
81
+ CONSOLE .print ("[bold red]Center of attention is zero[/bold red]" )
82
+ return totp
83
+
84
+ totp /= totw
85
+ CONSOLE .print (f"The center of attention is: { totp } " )
86
+
87
+ return totp
88
+
89
+ def build_sensor (intrinsic ):
90
+ """Build camera intrinsics from Meshroom data."""
91
+ out = {}
92
+ out ["w" ] = float (intrinsic ['width' ])
93
+ out ["h" ] = float (intrinsic ['height' ])
94
+
95
+ # Focal length in mm
96
+ focal = float (intrinsic ['focalLength' ])
97
+
98
+ # Sensor width in mm
99
+ sensor_width = float (intrinsic ['sensorWidth' ])
100
+ sensor_height = float (intrinsic ['sensorHeight' ])
101
+
102
+ # Focal length in pixels
103
+ out ["fl_x" ] = (out ["w" ] * focal ) / sensor_width
104
+
105
+ # Check W/H ratio to sensor ratio
106
+ if np .isclose ((out ["w" ] / out ["h" ]), (sensor_width / sensor_height )):
107
+ out ["fl_y" ] = (out ["h" ] * focal ) / sensor_height
108
+ else :
109
+ CONSOLE .print ("[yellow]WARNING: W/H ratio does not match sensor ratio, this is likely a bug from Meshroom. Will use fl_x to set fl_y.[/yellow]" )
110
+ out ["fl_y" ] = out ["fl_x" ]
111
+
112
+ camera_angle_x = math .atan (out ["w" ] / (out ['fl_x' ]) * 2 ) * 2
113
+ camera_angle_y = math .atan (out ["h" ] / (out ['fl_y' ]) * 2 ) * 2
114
+
115
+ out ["camera_angle_x" ] = camera_angle_x
116
+ out ["camera_angle_y" ] = camera_angle_y
117
+
118
+ out ["cx" ] = float (intrinsic ['principalPoint' ][0 ]) + (out ["w" ] / 2.0 )
119
+ out ["cy" ] = float (intrinsic ['principalPoint' ][1 ]) + (out ["h" ] / 2.0 )
120
+
121
+ if intrinsic ['type' ] == 'radial3' :
122
+ for i , coef in enumerate (intrinsic ['distortionParams' ]):
123
+ out [f"k{ i + 1 } " ] = float (coef )
124
+
125
+ return out
126
+
127
+ def meshroom_to_json (
128
+ image_filename_map : Dict [str , Path ],
129
+ json_filename : Path ,
130
+ output_dir : Path ,
131
+ ply_filename : Optional [Path ] = None ,
132
+ verbose : bool = False ,
133
+ ) -> List [str ]:
134
+ """Convert Meshroom data into a nerfstudio dataset.
135
+
136
+ Args:
137
+ image_filename_map: Mapping of original image filenames to their saved locations.
138
+ json_filename: Path to the Meshroom json file.
139
+ output_dir: Path to the output directory.
140
+ ply_filename: Path to the exported ply file.
141
+ verbose: Whether to print verbose output.
142
+
143
+ Returns:
144
+ Summary of the conversion.
145
+ """
146
+ summary_log = []
147
+
148
+ with open (json_filename , 'r' ) as f :
149
+ data = json .load (f )
150
+
151
+ # Create output structure
152
+ out = {}
153
+ out ['aabb_scale' ] = 16 # Default value
154
+
155
+ # Extract transforms from Meshroom data
156
+ transforms = {}
157
+ for pose in data .get ('poses' , []):
158
+ transform = pose ['pose' ]['transform' ]
159
+ rot = np .asarray (transform ['rotation' ])
160
+ rot = rot .reshape (3 , 3 ).astype (float )
161
+
162
+ ctr = np .asarray (transform ['center' ])
163
+ ctr = ctr .astype (float )
164
+
165
+ M = np .eye (4 )
166
+ M [:3 , :3 ] = rot
167
+ M [:3 , 3 ] = ctr
168
+
169
+ M = Mat2Nerf (M .astype (float ))
170
+ transforms [pose ['poseId' ]] = np .dot (ROT_MAT , M )
171
+
172
+ # Extract intrinsics from Meshroom data
173
+ intrinsics = {}
174
+ for intrinsic in data .get ('intrinsics' , []):
175
+ intrinsics [intrinsic ['intrinsicId' ]] = build_sensor (intrinsic )
176
+
177
+ # Set camera model based on intrinsic type
178
+ if data .get ('intrinsics' ) and 'type' in data ['intrinsics' ][0 ]:
179
+ intrinsic_type = data ['intrinsics' ][0 ]['type' ]
180
+ if intrinsic_type in ['radial1' , 'radial3' ]:
181
+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
182
+ elif intrinsic_type in ['fisheye' , 'fisheye4' ]:
183
+ out ["camera_model" ] = CAMERA_MODELS ["fisheye" ].value
184
+ else :
185
+ # Default to perspective
186
+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
187
+ else :
188
+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
189
+
190
+ # Build frames
191
+ frames = []
192
+ skipped_images = 0
193
+
194
+ for view in data .get ('views' , []):
195
+ # Get the image name from the path
196
+ path = Path (view ['path' ])
197
+ name = path .stem
198
+
199
+ # Check if the image exists in our mapping
200
+ if name not in image_filename_map :
201
+ if verbose :
202
+ CONSOLE .print (f"[yellow]Missing image for { name } , skipping[/yellow]" )
203
+ skipped_images += 1
204
+ continue
205
+
206
+ # Get poseId and intrinsicId
207
+ poseId = view ['poseId' ]
208
+ intrinsicId = view ['intrinsicId' ]
209
+
210
+ # Check if we have the necessary data
211
+ if poseId not in transforms :
212
+ if verbose :
213
+ CONSOLE .print (f"[yellow]PoseId { poseId } not found in transforms, skipping image: { name } [/yellow]" )
214
+ skipped_images += 1
215
+ continue
216
+
217
+ if intrinsicId not in intrinsics :
218
+ if verbose :
219
+ CONSOLE .print (f"[yellow]IntrinsicId { intrinsicId } not found, skipping image: { name } [/yellow]" )
220
+ skipped_images += 1
221
+ continue
222
+
223
+ # Create camera data
224
+ camera = {}
225
+ camera .update (dc (intrinsics [intrinsicId ]))
226
+ camera ['transform_matrix' ] = transforms [poseId ]
227
+ camera ['file_path' ] = image_filename_map [name ].as_posix ()
228
+
229
+ frames .append (camera )
230
+
231
+ out ['frames' ] = frames
232
+
233
+ # Calculate center point
234
+ center = central_point (out )
235
+
236
+ # Adjust camera positions by centering
237
+ for f in out ["frames" ]:
238
+ f ["transform_matrix" ][0 :3 , 3 ] -= center
239
+ f ["transform_matrix" ] = f ["transform_matrix" ].tolist ()
240
+
241
+ # Include point cloud if provided
242
+ if ply_filename is not None :
243
+ import open3d as o3d
244
+
245
+ # Create the applied transform
246
+ applied_transform = np .eye (4 )[:3 , :]
247
+ applied_transform = applied_transform [np .array ([2 , 0 , 1 ]), :]
248
+ out ["applied_transform" ] = applied_transform .tolist ()
249
+
250
+ # Load and transform point cloud
251
+ pc = o3d .io .read_point_cloud (str (ply_filename ))
252
+ points3D = np .asarray (pc .points )
253
+ points3D = np .einsum ("ij,bj->bi" , applied_transform [:3 , :3 ], points3D ) + applied_transform [:3 , 3 ]
254
+ pc .points = o3d .utility .Vector3dVector (points3D )
255
+ o3d .io .write_point_cloud (str (output_dir / "sparse_pc.ply" ), pc )
256
+ out ["ply_file_path" ] = "sparse_pc.ply"
257
+ summary_log .append (f"Imported { ply_filename } as starting points" )
258
+
259
+ # Write output
260
+ with open (output_dir / "transforms.json" , "w" , encoding = "utf-8" ) as f :
261
+ json .dump (out , f , indent = 4 )
262
+
263
+ # Add summary info
264
+ if skipped_images == 1 :
265
+ summary_log .append (f"{ skipped_images } image skipped due to missing camera pose or intrinsic data." )
266
+ elif skipped_images > 1 :
267
+ summary_log .append (f"{ skipped_images } images were skipped due to missing camera poses or intrinsic data." )
268
+
269
+ summary_log .append (f"Final dataset contains { len (out ['frames' ])} frames." )
270
+
271
+ return summary_log
0 commit comments