Coverage for frank/statistical_models.py: 78%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

437 statements  

1# Frankenstein: 1D disc brightness profile reconstruction from Fourier data 

2# using non-parametric Gaussian Processes 

3# 

4# Copyright (C) 2019-2020 R. Booth, J. Jennings, M. Tazzari 

5# 

6# This program is free software: you can redistribute it and/or modify 

7# it under the terms of the GNU General Public License as published by 

8# the Free Software Foundation, either version 3 of the License, or 

9# (at your option) any later version. 

10# 

11# This program is distributed in the hope that it will be useful, 

12# but WITHOUT ANY WARRANTY; without even the implied warranty of 

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

14# GNU General Public License for more details. 

15# 

16# You should have received a copy of the GNU General Public License 

17# along with this program. If not, see <https://www.gnu.org/licenses/> 

18# 

19 

20import numpy as np 

21import scipy.linalg 

22import scipy.sparse 

23import scipy.optimize 

24import logging 

25 

26from frank.constants import rad_to_arcsec, deg_to_rad 

27from frank.minimizer import LineSearch, MinimizeNewton 

28 

29class VisibilityMapping: 

30 r"""Builds the mapping between the visibility and image planes. 

31 

32 VisibilityMapping generates the transform matrices :math:`H(q)` such that 

33 :math:`V_\nu(q) = H(q) I_\nu`. It also uses these to construct the design 

34 matrices :math:`M` and :math:`j` used in the fitting.  

35  

36 VisibilityMapping supports following models: 

37 1. An optically thick and geometrically thin disc 

38 2. An optically thin and geometrically thin disc 

39 3. An opticall thin disc with a known Gaussian verictal structure. 

40 All models are axisymmetric. 

41 

42 Parameters 

43 ---------- 

44 DHT : DiscreteHankelTransform 

45 A DHT object with N bins that defines H(p). The DHT is used to compute 

46 :math:`S(p)` 

47 geometry: SourceGeometry object, optional 

48 Geometry used to correct the visibilities for the source inclination.  

49 vis_model : string, 

50 One of ['opt_thick', 'opt_thin', 'debris'], corresponding to models 

51 1-3 described above, respectively.  

52 scale_height : function H(R), units = arcsec 

53 The vertical thickness of the disc in terms of its Guassian scale-height. 

54 Only used if vis_model="debris". 

55 block_data : bool, default = True 

56 Large temporary matrices are needed to set up the data. If block_data 

57 is True, we avoid this, limiting the memory requirement to block_size 

58 elements. 

59 block_size : int, default = 10**5 

60 Size of the matrices if blocking is used 

61 check_qbounds: bool, default = True 

62 Whether to check if the first (last) collocation point is smaller 

63 (larger) than the shortest (longest) deprojected baseline in the dataset 

64 verbose : bool, default = False 

65 Whether to print notification messages 

66 """ 

67 def __init__(self, DHT, geometry, 

68 vis_model='opt_thick', scale_height=None, block_data=True, 

69 block_size=10 ** 5, check_qbounds=True, verbose=True): 

70 

71 _vis_models = ['opt_thick', 'opt_thin', 'debris'] 

72 if vis_model not in _vis_models: 

73 raise ValueError(f"vis_model must be one of {_vis_models}") 

74 

75 # Store flags 

76 self._vis_model = vis_model 

77 self.check_qbounds = check_qbounds 

78 self._verbose = verbose 

79 

80 self._chunking = block_data 

81 self._chunk_size = block_size 

82 

83 self._DHT = DHT 

84 self._geometry = geometry 

85 

86 # Check for consistency and report the model choice. 

87 self._scale_height = None 

88 if self._vis_model == 'opt_thick': 

89 if self._verbose: 

90 logging.info(' Assuming an optically thick model (the default): ' 

91 'Scaling the total flux to account for the source ' 

92 'inclination') 

93 elif self._vis_model == 'opt_thin': 

94 if self._verbose: 

95 logging.info(' Assuming an optically thin model: *Not* scaling the ' 

96 'total flux to account for the source inclination') 

97 elif self._vis_model == 'debris': 

98 if scale_height is None: 

99 raise ValueError('You requested a model with a non-zero scale height' 

100 ' but did not specify H(R) (scale_height=None)') 

101 self._scale_height = scale_height(self.r) 

102 self._H2 = 0.5*(2*np.pi*self._scale_height / rad_to_arcsec)**2 

103 

104 if self._verbose: 

105 logging.info(' Assuming an optically thin model but geometrically ' 

106 'thick model: *Not* scaling the total flux to account for ' 

107 'the source inclination') 

108 

109 def map_visibilities(self, u, v, V, weights, frequencies=None, geometry=None): 

110 r""" 

111 Compute the matrices :math:`M` abd :math:`j` from the visibility data. 

112 

113 Also compute the null likelihood, 

114 .. math: 

115 `H0 = 0.5*\log[det(weights/(2*np.pi))] 

116 - 0.5*np.sum(V * weights * V):math:` 

117  

118 Parameters 

119 ---------- 

120 u,v : 1D array, unit = :math:`\lambda` 

121 uv-points of the visibilies 

122 V : 1D array, unit = Jy 

123 Visibility amplitudes at q 

124 weights : 1D array, optional, unit = J^-2 

125 Weights of the visibilities, weight = 1 / sigma^2, where sigma is 

126 the standard deviation 

127 frequencies : 1D array, optional, unit = Hz 

128 Channel frequencies of each data point. If not provided, a single  

129 channel is assumed. 

130 geometry : SourceGeometry object 

131 Geometry used to deproject the visibilities before fitting. If not 

132 provided the geometry passed in during construction will be used. 

133  

134 Returns 

135 ------- 

136 mapped visibilities : dict.  

137 The format will be depend whether a single channel was assumed  

138 (frequencies=None). The following data maybe present: 

139 'multi_freq' : bool, 

140 Specifies if mult-frequency analysis was done 

141 'channels' : array, 1D, mult-frequency analysis only 

142 The frequency of each channel. 

143 'M' : array, 2D or 3D. 

144 The matrix :math:`M` of the mapping. This will have shape 

145 (num_channels, size, size) for multi_freq analysis and 

146 (size, size) for single channel analysis. 

147 'j' : array, 1D or 2D. 

148 The matrix :math:`j` of the mapping. This will have shape 

149 (num_channels, size) for multi_freq analysis and (size,) 

150 for single channel analysis. 

151 'null_likelihood' : float, 

152 The likelihood of a model with I=0. See above. 

153 'hash' : list, 

154 Identifying data, used to ensure compatability between the 

155 mapped visibilies and fitting objects. 

156 """ 

157 

158 if geometry is None: 

159 geometry = self._geometry 

160 

161 if self._verbose: 

162 logging.info(' Building visibility matrices M and j') 

163 

164 # Deproject the visibilities 

165 u, v, k, V = self._geometry.apply_correction(u, v, V, use3D=True) 

166 q = np.hypot(u, v) 

167 

168 # Check consistency of the uv points with the model 

169 self._check_uv_range(q) 

170 

171 # Use only the real part of V.  

172 V = V.real 

173 w = np.ones_like(V) * weights 

174 

175 multi_freq = True 

176 if frequencies is None: 

177 multi_freq = False 

178 frequencies = np.ones_like(V) 

179 

180 channels = np.unique(frequencies) 

181 Ms = np.zeros([len(channels), self.size, self.size], dtype='f8') 

182 js = np.zeros([len(channels), self.size], dtype='f8') 

183 for i, f in enumerate(channels): 

184 idx = frequencies == f 

185 

186 qi = q[idx] 

187 ki = k[idx] 

188 wi = w[idx] 

189 Vi = V[idx] 

190 

191 # If chunking is used, we will build up M and j chunk-by-chunk 

192 if self._chunking: 

193 Nstep = int(self._chunk_size / self.size + 1) 

194 else: 

195 Nstep = len(Vi) 

196 

197 start = 0 

198 end = Nstep 

199 Ndata = len(Vi) 

200 while start < Ndata: 

201 qs = qi[start:end] 

202 ks = ki[start:end] 

203 ws = wi[start:end] 

204 Vs = Vi[start:end] 

205 

206 X = self._get_mapping_coefficients(qs, ks) 

207 

208 wXT = np.array(X.T * ws, order='C') 

209 

210 Ms[i] += np.dot(wXT, X) 

211 js[i] += np.dot(wXT, Vs) 

212 

213 start = end 

214 end = min(Ndata, end + Nstep) 

215 

216 # Compute likelihood normalization H_0, i.e., the 

217 # log-likelihood of a source with I=0. 

218 H0 = 0.5 * np.sum(np.log(w / (2 * np.pi)) - V * w * V) 

219 

220 if multi_freq: 

221 return { 

222 'mult_freq' : True, 

223 'channels' : channels, 

224 'M' : Ms, 

225 'j' : js, 

226 'null_likelihood' : H0, 

227 'hash' : [True, self._DHT, geometry, self._vis_model, self._scale_height], 

228 } 

229 else: 

230 return { 

231 'mult_freq' : False, 

232 'channels' : None, 

233 'M' : Ms[0], 

234 'j' : js[0], 

235 'null_likelihood' : H0, 

236 'hash' : [False, self._DHT, geometry, self._vis_model, self._scale_height], 

237 } 

238 

239 def check_hash(self, hash, multi_freq=False, geometry=None): 

240 """Checks whether the hash of some mapped visibilities are compatible 

241 with this VisibilityMapping. 

242  

243 Parameters 

244 ---------- 

245 hash : list 

246 Hash to compare 

247 multi_freq : bool 

248 Whether we are expected a multi-frequncy fit 

249 geometry : SourceGeometry object, optional 

250 Geometry to use in the comparison. 

251 """ 

252 if geometry is None: 

253 geometry = self._geometry 

254 

255 passed = ( 

256 multi_freq == hash[0] and 

257 self._DHT.Rmax == hash[1].Rmax and 

258 self._DHT.size == hash[1].size and 

259 self._DHT.order == hash[1].order and 

260 geometry.inc == hash[2].inc and 

261 geometry.PA == hash[2].PA and 

262 geometry.dRA == hash[2].dRA and 

263 geometry.dDec == hash[2].dDec and 

264 self._vis_model == hash[3] 

265 ) 

266 

267 if not passed: 

268 return False 

269 

270 if self._scale_height is None: 

271 return hash[4] is None 

272 else: 

273 if hash[4] is None: 

274 return False 

275 else: 

276 return np.all(self._scale_height == hash[4]) 

277 

278 

279 def predict_visibilities(self, I, q, k=None, geometry=None): 

280 r"""Compute the predicted visibilities given the brightness profile, I 

281  

282 Parameters 

283 ---------- 

284 I : array, unit = Jy 

285 Brightness at the collocation points. 

286 q : array, unit = :math:`\lambda` 

287 Radial uv-distance to predict the visibility at 

288 k : array, optional, unit = :math:`\lambda` 

289 Vertical uv-distance to predict the visibility. Only needed for a 

290 geometrically thick model. 

291 geometry : SourceGeometry object, optional 

292 Geometry used to correct the visibilities for the source 

293 inclination. Only needed for the optically thick model. If not  

294 provided, the geometry passed in during construction will be used.  

295  

296 Returns 

297 ------- 

298 V(q, k) : array, unit = Jy 

299 Predicted visibilties of a source with a radial flux distribution 

300 given by :math:`I` and the position angle, inclination determined 

301 by the geometry. 

302  

303 Notes 

304 ----- 

305 For an optically thick model the visibility amplitudes are reduced due 

306 to the projection but phase centre corrections are not added. 

307 """ 

308 # Chunk the visibility calulation for speed 

309 if self._chunking: 

310 Ni = int(self._chunk_size / self.size + 1) 

311 else: 

312 Ni = len(q) 

313 

314 end = 0 

315 start = 0 

316 V = [] 

317 while end < len(q): 

318 start = end 

319 end = start + Ni 

320 qi = q[start:end] 

321 

322 ki = None 

323 if k is not None: 

324 ki = k[start:end] 

325 

326 H = self._get_mapping_coefficients(qi, ki, geometry) 

327 

328 V.append(np.dot(H, I)) 

329 return np.concatenate(V) 

330 

331 def invert_visibilities(self, V, R, geometry=None): 

332 r"""Compute the brightness, I, from the visibilities.  

333  

334 Note this method does not work for an arbitrary distribution of  

335 baselines and therefore cannot be used to determine the brightness 

336 given a generic set of data. Instead it needs the visibilities at  

337 collocation points of the DiscrteHankelTransform, q. 

338 

339 For geometrically thick models the visibilities used must be those for 

340 which kz = 0. 

341 

342 Given the above constraints, this method computes the inverse of 

343 predict_visibilites. 

344  

345 Parameters 

346 ---------- 

347 V : array, unit = Jy 

348 Visibility at the collocation points. This must be the deprojected 

349 and phase-center corrected visibilities. 

350 R : array, unit = arcsec 

351 Radial distance to compute the brightness at 

352 geometry : SourceGeometry object, optional 

353 Geometry used to correct the visibilities for the source 

354 inclination. Only needed for the optically thick model. If not  

355 provided, the geometry passed in during construction will be used.  

356  

357 Returns 

358 ------- 

359 I(R) : array, unit = Jy / Sr 

360 Brightness at the radial locations, R.  

361  

362 Notes 

363 ----- 

364 The brightness is corrected under the optically thin 

365 """ 

366 # Chunk the visibility calulation for speed 

367 R = np.atleast_1d(R) 

368 if self._chunking: 

369 Ni = int(self._chunk_size / self.size + 1) 

370 else: 

371 Ni = len(R) 

372 

373 end = 0 

374 start = 0 

375 I = [] 

376 while end < len(R): 

377 start = end 

378 end = start + Ni 

379 Ri = R[start:end] 

380 

381 H = self._get_mapping_coefficients(Ri, 0, geometry, inverse=True) 

382 

383 I.append(np.dot(H, V)) 

384 return np.concatenate(I)[R < self.Rmax] 

385 

386 def transform(self, f, q=None, direction='forward'): 

387 """Apply a DHT directly to data provided 

388  

389 Parameters 

390 ---------- 

391 f : array, size = N 

392 Function to Hankel transform, evaluated at the collocation points: 

393 f[k] = f(r_k) or f[k] = f(q_k) 

394 q : array or None 

395 The frequency points at which to evaluate the Hankel 

396 transform. If not specified, the conjugate points of the 

397 DHT will be used. For the backwards transform, q should be 

398 the radius points in arcsec 

399 direction : { 'forward', 'backward' }, optional 

400 Direction of the transform. If not supplied, the forward 

401 transform is used 

402 

403 Returns 

404 ------- 

405 H[f] : array, size = N or len(q) if supplied 

406 The Hankel transform of the array f 

407 

408 """ 

409 if direction == 'backward' and q is not None: 

410 q = q / rad_to_arcsec 

411 

412 return self._DHT.transform(f, q, direction) 

413 

414 def DHT_coefficients(self, direction='forward'): 

415 """Get the coefficients of the Discrete Hankel Transform. 

416 

417 The coefficients are defined by 

418 .. math: 

419 H[h] = np.dot(H, f) 

420  

421 Parameters 

422 ---------- 

423 direction : { 'forward', 'backward' }, optional 

424 Direction of the transform. If not supplied, the forward 

425 transform is used 

426 

427 Returns 

428 ------- 

429 H : array, size = N or len(q) if supplied 

430 The Hankel transform of the array f 

431 

432 """ 

433 return self._DHT.coefficients(direction=direction) 

434 

435 def interpolate(self, f, r, space='Real'): 

436 """ 

437 Interpolate f (evaluated at the collocation points) to the new points, 

438 pts, using interpolation that is consistent with the Fourier-Bessel 

439 Series / Discrete Hankel Transform. 

440 

441 Parameters 

442 ---------- 

443 f : array, size = N 

444 Function to interpolate, evaluated at the collocation points: 

445 f[k] = f(r_k) or f[k] = f(q_k) 

446 r : array or None 

447 The points at which to evaluate the interpolation. 

448 space : { 'Real', 'Fourier' }, optional 

449 Space in which the interpolation is done. If not supplied,  

450 'Real' is assumed. 

451 

452 

453 Returns 

454 ------- 

455 f_interp : array, size = len(r) 

456 The interpolated results 

457 """ 

458 if space == 'Real': 

459 r = r / rad_to_arcsec 

460 return self._DHT.interpolate(f, r, space) 

461 

462 

463 def _get_mapping_coefficients(self, qs, ks, geometry=None, inverse=False): 

464 """Get :math:`H(q)`, such that :math:`V(q) = H(q) I_\nu`""" 

465 

466 if self._vis_model == 'opt_thick': 

467 # Optically thick & geometrically thin 

468 if geometry is None: 

469 geometry = self._geometry 

470 scale = np.cos(geometry.inc * deg_to_rad) 

471 elif self._vis_model == 'opt_thin': 

472 # Optically thin & geometrically thin 

473 scale = 1 

474 elif self._vis_model == 'debris': 

475 # Optically thin & geometrically thick 

476 scale = np.exp(-np.outer(ks*ks, self._H2)) 

477 else: 

478 raise ValueError("model not supported. Should never occur.") 

479 

480 if inverse: 

481 scale = np.atleast_1d(1/scale).reshape(1,-1) 

482 qs = qs / rad_to_arcsec 

483 direction='backward' 

484 else: 

485 direction='forward' 

486 

487 H = self._DHT.coefficients(qs, direction=direction) * scale 

488 

489 return H 

490 

491 

492 def _check_uv_range(self, uv): 

493 """Check that the uv domain is properly covered""" 

494 

495 # Check whether the first (last) collocation point is smaller (larger) 

496 # than the shortest (longest) deprojected baseline in the dataset 

497 if self.check_qbounds: 

498 if self.q[0] < uv.min(): 

499 logging.warning(r"WARNING: First collocation point, q[0] = {:.3e} \lambda," 

500 " is at a baseline shorter than the" 

501 " shortest deprojected baseline in the dataset," 

502 r" min(uv) = {:.3e} \lambda. For q[0] << min(uv)," 

503 " the fit's total flux may be biased" 

504 " low.".format(self.q[0], uv.min())) 

505 

506 if self.q[-1] < uv.max(): 

507 raise ValueError(r"ERROR: Last collocation point, {:.3e} \lambda, is at" 

508 " a shorter baseline than the longest deprojected" 

509 r" baseline in the dataset, {:.3e} \lambda. Please" 

510 " increase N in FrankMultFrequencyFitter (this is" 

511 " `hyperparameters: n` if you're using a parameter" 

512 " file). Or if you'd like to fit to shorter maximum baseline," 

513 " cut the (u, v) distribution before fitting" 

514 " (`modify_data: baseline_range` in the" 

515 " parameter file).".format(self.q[-1], uv.max())) 

516 

517 @property 

518 def r(self): 

519 """Radius points, unit = arcsec""" 

520 return self._DHT.r * rad_to_arcsec 

521 

522 @property 

523 def Rmax(self): 

524 """Maximum radius, unit = arcsec""" 

525 return self._DHT.Rmax * rad_to_arcsec 

526 

527 @property 

528 def q(self): 

529 r"""Frequency points, unit = :math:`\lambda`""" 

530 return self._DHT.q 

531 

532 @property 

533 def Qmax(self): 

534 r"""Maximum frequency, unit = :math:`\lambda`""" 

535 return self._DHT.Qmax 

536 

537 @property 

538 def size(self): 

539 """Number of points in reconstruction""" 

540 return self._DHT.size 

541 

542 @property 

543 def scale_height(self): 

544 "Vertial thickness of the disc, unit = arcsec" 

545 if self._scale_height is not None: 

546 return self._scale_height 

547 else: 

548 return None 

549 

550 

551class GaussianModel: 

552 r""" 

553 Solves the linear regression problem to compute the posterior, 

554 

555 .. math:: 

556 P(I|q,V,p) \propto G(I-\mu, D), 

557 

558 where :math:`I` is the intensity to be predicted, :math:`q` are the 

559 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`D` are 

560 the mean and covariance of the posterior distribution. 

561 

562 If :math:`p` is provided, the covariance matrix of the prior is included, 

563 with 

564 

565 .. math:: 

566 P(I|p) \propto G(I, S(p)), 

567 

568 and the Bayesian Linear Regression problem is solved. :math:`S` is computed 

569 from the power spectrum, :math:`p`, if provided. Otherwise the traditional 

570 (frequentist) linear regression is used. 

571 

572 The problem is framed in terms of the design matrix :math:`M` and 

573 information source :math:`j`. 

574 

575 :math:`H(q)` is the matrix that projects the intensity :math:`I` to 

576 visibility space. :math:`M` is defined by 

577 

578 .. math:: 

579 M = H(q)^T w H(q), 

580 

581 where :math:`w` is the weights matrix and 

582 

583 .. math:: 

584 j = H(q)^T w V. 

585 

586 The mean and covariance of the posterior are then given by 

587 

588 .. math:: 

589 \mu = D j 

590 

591 and 

592 

593 .. math:: 

594 D = [ M + S(p)^{-1}]^{-1}, 

595 

596 if the prior is provided, otherwise 

597 

598 .. math:: 

599 D = M^{-1}. 

600 

601 

602 Parameters 

603 ---------- 

604 DHT : DiscreteHankelTransform 

605 A DHT object with N bins that defines H(p). The DHT is used to compute 

606 :math:`S(p)` 

607 M : 2/3D array, size = (N, N) or (Nf, N, N) 

608 The design matrix, see above. Nf is the number of channels (frequencies). 

609 j : 1/2D array, size = N or (Nf, N) 

610 Information source, see above 

611 p : 1D array, size = N, optional 

612 Power spectrum used to generate the covarience matrix :math:`S(p)` 

613 guess: array, optional 

614 Initial guess used in computing the brightness. 

615 Nfields : int, optional 

616 Number of fields used to fit the data. Typically 1, but could be more 

617 for multi-wavelength data. If not provided it will be determined from 

618 the shape of guess. 

619 noise_likelihood : float, optional 

620 An optional parameter needed to compute the full likelihood, which 

621 should be equal to 

622 

623 .. math:: 

624 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)]. 

625 

626 If not provided, the likelihood can still be computed up to this 

627 missing constant 

628 """ 

629 

630 def __init__(self, DHT, M, j, p=None, scale=None, guess=None, 

631 Nfields=None, noise_likelihood=0): 

632 

633 self._DHT = DHT 

634 

635 # Correct shape of design matrix etc.  

636 if len(M.shape) == 2: 

637 M = M.reshape(1, *M.shape) 

638 if len(j.shape) == 1: 

639 j = j.reshape(1, *j.shape) 

640 

641 # Number of frequencies / radial points 

642 Nf, Nr = j.shape 

643 

644 # Get the number of fields 

645 if Nfields is None: 

646 if guess is None: 

647 Nfields = 1 

648 else: 

649 guess = guess.reshape(-1, Nr) 

650 Nfields = guess.shape[0] 

651 elif guess is not None: 

652 guess = guess.reshape(Nfields, Nr) 

653 self._Nfields = Nfields 

654 

655 # Create the correct shape for the power spectrum and scale factors 

656 if p is not None: 

657 p = p.reshape(-1, Nr) 

658 if p.shape[0] == 1 and Nfields > 1: 

659 p = np.repeat(p, Nfields, axis=0) 

660 self._p = p 

661 

662 if scale is None: 

663 self._scale = np.ones([Nf, Nfields], dtype='f8') 

664 else: 

665 self._scale = np.empty([Nf, Nfields], dtype='f8') 

666 self._scale[:] = scale.reshape(Nf, -1) 

667 

668 if p is not None: 

669 if np.any(p <= 0) or np.any(np.isnan(p)): 

670 print(p) 

671 raise ValueError("Bad value in power spectrum. The power" 

672 " spectrum must be positive and not contain" 

673 " any NaN values. This is likely due to" 

674 " your UVtable (incorrect units or weights), " 

675 " or the deprojection being applied (incorrect" 

676 " geometry and/or phase center). Else you may" 

677 " want to increase `rout` by 10-20% or `n` so" 

678 " that it is large, >~300.") 

679 

680 Ykm = self._DHT.coefficients() 

681 Sj = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm) 

682 

683 self._Sinv = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8') 

684 for n in range(0, Nfields): 

685 sn = n*Nr 

686 en = (n+1)*Nr 

687 

688 self._Sinv[sn:en, sn:en] += Sj[n] 

689 else: 

690 self._Sinv = None 

691 

692 # Compute the design matrix 

693 self._M = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8') 

694 self._j = np.zeros(Nr*Nfields, dtype='f8') 

695 for si, Mi, ji in zip(self._scale, M, j): 

696 

697 for n in range(0, Nfields): 

698 sn = n*Nr 

699 en = (n+1)*Nr 

700 

701 self._j[sn:en] += si[n] * ji 

702 for m in range(0, Nfields): 

703 sm = m*Nr 

704 em = (m+1)*Nr 

705 

706 self._M[sn:en, sm:em] += si[n]*si[m] * Mi 

707 

708 self._like_noise = noise_likelihood 

709 

710 self._fit() 

711 

712 def _fit(self): 

713 """Compute the mean and variance""" 

714 # Compute the inverse prior covariance, S(p)^-1 

715 Sinv = self._Sinv 

716 if Sinv is None: 

717 Sinv = 0 

718 

719 Dinv = self._M + Sinv 

720 

721 try: 

722 self._Dchol = scipy.linalg.cho_factor(Dinv) 

723 self._Dsvd = None 

724 

725 self._mu = scipy.linalg.cho_solve(self._Dchol, self._j) 

726 

727 except np.linalg.LinAlgError: 

728 U, s, V = scipy.linalg.svd(Dinv, full_matrices=False) 

729 

730 s1 = np.where(s > 0, 1. / s, 0) 

731 

732 self._Dchol = None 

733 self._Dsvd = U, s1, V 

734 

735 self._mu = np.dot(V.T, np.multiply(np.dot(U.T, self._j), s1)) 

736 

737 # Reset the covariance matrix - we will compute it when needed 

738 if self._Nfields > 1: 

739 self._mu = self._mu.reshape(self._Nfields, self.size) 

740 self._cov = None 

741 

742 def Dsolve(self, b): 

743 r""" 

744 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`. 

745 

746 Parameters 

747 ---------- 

748 b : array, size = (N,...) 

749 Right-hand side to solve for 

750 

751 Returns 

752 ------- 

753 x : array, shape = np.shape(b) 

754 Solution to the equation D x = b 

755 

756 """ 

757 if self._Dchol is not None: 

758 return scipy.linalg.cho_solve(self._Dchol, b) 

759 else: 

760 U, s1, V = self._Dsvd 

761 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1)) 

762 

763 def draw(self, N): 

764 """Compute N draws from the posterior""" 

765 draws = np.random.multivariate_normal(self.mean.reshape(-1), self.covariance, N) 

766 if self.num_fields > 1: 

767 draws = draws.reshape(N, self.num_fields, self.size) 

768 return draws 

769 

770 def log_likelihood(self, I=None): 

771 r""" 

772 Compute one of two types of likelihood. 

773 

774 If :math:`I` is provided, this computes 

775 

776 .. math: 

777 \log[P(I,V|S)]. 

778 

779 Otherwise the marginalized likelihood is computed, 

780 

781 .. math: 

782 \log[P(V|S)]. 

783 

784 

785 Parameters 

786 ---------- 

787 I : array, size = N, optional, unit = Jy / sr 

788 Intensity :math:`I(r)` to compute the likelihood of 

789 

790 Returns 

791 ------- 

792 log_P : float 

793 Log likelihood, :math:`\log[P(I,V|p)]` or :math:`\log[P(V|p)]` 

794 

795 Notes 

796 ----- 

797 1. The prior probability P(S) is not included. 

798 2. The likelihoods take the form: 

799 

800 .. math:: 

801 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I 

802 - \frac{1}{2} \log[\det(2 \pi S)] + H_0 

803 

804 and 

805 

806 .. math:: 

807 \log[P(V|p)] = \frac{1}{2} j^T D j 

808 + \frac{1}{2} \log[\det(D)/\det(S)] + H_0 

809 

810 where 

811 

812 .. math:: 

813 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi) 

814 

815 is the noise likelihood. 

816 """ 

817 

818 if I is None: 

819 like = 0.5 * np.sum(self._j * self._mu) 

820 

821 if self._Sinv is not None: 

822 Q = self.Dsolve(self._Sinv) 

823 like += 0.5 * np.linalg.slogdet(Q)[1] 

824 else: 

825 Sinv = self._Sinv 

826 if Sinv is None: 

827 Sinv = 0 

828 

829 Dinv = self._M + Sinv 

830 

831 like = np.sum(self._j * I) - 0.5 * np.dot(I, np.dot(Dinv, I)) 

832 

833 if self._Sinv is not None: 

834 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1] 

835 

836 return like + self._like_noise 

837 

838 def solve_non_negative(self): 

839 """Compute the best fit solution with non-negative intensities""" 

840 Sinv = self._Sinv 

841 if Sinv is None: 

842 Sinv = 0 

843 

844 Dinv = self._M + Sinv 

845 return scipy.optimize.nnls(Dinv, self._j, 

846 maxiter=100*len(self._j))[0] 

847 

848 @property 

849 def mean(self): 

850 """Posterior mean, unit = Jy / sr""" 

851 return self._mu 

852 

853 @property 

854 def MAP(self): 

855 """Posterior maximum, unit = Jy / sr""" 

856 return self.mean 

857 

858 @property 

859 def covariance(self): 

860 """Posterior covariance, unit = (Jy / sr)**2""" 

861 if self._cov is None: 

862 self._cov = self.Dsolve(np.eye(self.size*self.num_fields)) 

863 return self._cov 

864 

865 @property 

866 def s_0(self): 

867 return 0 

868 

869 @property 

870 def power_spectrum(self): 

871 """Power spectrum coefficients""" 

872 if self.num_fields == 1 and self._p is not None: 

873 return self._p.reshape(self.size) 

874 return self._p 

875 

876 @property 

877 def num_fields(self): 

878 """Number of fields fit for""" 

879 return self._Nfields 

880 

881 @property 

882 def size(self): 

883 """Number of points in reconstruction""" 

884 return self._DHT.size 

885 

886 

887class LogNormalMAPModel: 

888 r""" 

889 Finds the maximum a posteriori field for log-normal regression problems, 

890 

891 .. math:: 

892 P(s|q,V,p,s0) \propto G(H exp(scale*(s+s0)) - V, M) P(s|p) 

893 

894 where :math:`s` is the log-intensity to be predicted, :math:`q` are the 

895 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`H` is 

896 the design matrix of the transform, e.g. the coefficient matrix of 

897 the forward Hankel transform. 

898 

899 If :math:`p` is provided, the covariance matrix of the prior is included, 

900 with 

901 

902 .. math:: 

903 P(s|p) \propto G(s, S(p)), 

904 

905 The problem is framed in terms of the design matrix :math:`M` and 

906 information source :math:`j`. 

907 

908 :math:`H(q)` is the matrix that projects the intensity :math:`exp(s*s0)` to 

909 visibility space. :math:`M` is defined by 

910 

911 .. math:: 

912 M = H(q)^T w H(q), 

913 

914 where :math:`w` is the weights matrix and 

915 

916 .. math:: 

917 j = H(q)^T w V. 

918 

919 

920 The maximum a posteori field, s_MAP, is found by maximizing  

921 :math:`\log P(s|q,V,p,s0)` and the posterior covariance at s_MAP is 

922 

923 .. math:: 

924 D = [ M + S(p)^{-1}]^{-1}. 

925 

926 If the prior is not provided then 

927 

928 .. math:: 

929 D = M^{-1}. 

930 

931 and the posterior for exp(s) is the same as the standard Gaussian model. 

932 

933 Note: This class also supports :math:`M` and :math:`j` being split into 

934 multiple terms (i.e. :math:`M_i, j_i`) such that different scale  

935 factors, :math:`s0_i` can be applied to each system. This allows fitting 

936 multi-frequency data. 

937 

938 

939 Parameters 

940 ---------- 

941 DHT : DiscreteHankelTransform 

942 A DHT object with N bins that defines H(p). The DHT is used to compute 

943 :math:`S(p)` 

944 M : 2/3D array, size = (N, N) or (Nf, N, N) 

945 The design matrix, see above. Nf is the number of channels (frequencies). 

946 j : 1/2D array, size = N or (Nf, N) 

947 Information source, see above 

948 p : 1D array, size = N, optional 

949 Power spectrum used to generate the covarience matrix :math:`S(p)` 

950 scale : float, 1D array (size=N), or list of 

951 Scale factors s0 (see above). These factors can be a constant, one 

952 per brightness point or per band (optionally per collocation point) 

953 to enable multi-frequency fitting. 

954 s0 : float or 1D array, size Nfields. Optional 

955 Zero point for the brightness function, see above. 

956 guess: array, optional 

957 Initial guess used in computing the brightness. 

958 Nfields : int, optional 

959 Number of fields used to fit the data. Typically 1, but could be more 

960 for multi-wavelength data. If not provided it will be determined from 

961 the shape of guess. 

962 full_hessian: float, range [0, 1] 

963 If 1 then use the full Hessian in evaluating the matrix :math:`D`. When 

964 0 a term is omitted that can cause the Hessian not be a positive- 

965 definite matrix when the solution is a poor fit to the data. A value 

966 between 0 and 1 will scale this term by that factor. 

967 noise_likelihood : float, optional 

968 An optional parameter needed to compute the full likelihood, which 

969 should be equal to 

970 

971 .. math:: 

972 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)]. 

973 

974 If not provided, the likelihood can still be computed up to this 

975 missing constant 

976 """ 

977 

978 def __init__(self, DHT, M, j, p=None, scale=None, s0=None, guess=None, 

979 Nfields=None, full_hessian=1, noise_likelihood=0): 

980 

981 self._DHT = DHT 

982 self._full_hess = full_hessian 

983 

984 # Correct shape of design matrix etc.  

985 if len(M.shape) == 2: 

986 M = M.reshape(1, *M.shape) 

987 if len(j.shape) == 1: 

988 j = j.reshape(1, *j.shape) 

989 

990 self._M = M 

991 self._j = j 

992 

993 # Number of frequencies / radial points 

994 Nf, Nr = j.shape 

995 # Number of signal fields: 

996 # Get the number of fields 

997 if Nfields is None: 

998 if guess is None: 

999 Nfields = 1 

1000 else: 

1001 guess = guess.reshape(-1, Nr) 

1002 Nfields = guess.shape[0] 

1003 elif guess is not None: 

1004 guess = guess.reshape(Nfields, Nr) 

1005 self._Nfields = Nfields 

1006 

1007 

1008 # Create the correct shape for the power spectrum and scale factors 

1009 if p is not None: 

1010 p = p.reshape(-1, Nr) 

1011 if p.shape[0] == 1 and Nfields > 1: 

1012 p = np.repeat(p, Nfields, axis=0) 

1013 self._p = p 

1014 

1015 

1016 if scale is None: 

1017 self._scale = np.ones([Nf, Nfields], dtype='f8') 

1018 else: 

1019 self._scale = np.empty([Nf, Nfields], dtype='f8') 

1020 self._scale[:] = scale.reshape(Nf, -1) 

1021 

1022 if s0 is None: 

1023 self._s0 = np.ones(Nfields, dtype='f8') 

1024 else: 

1025 s0 = np.atleast_1d(s0) 

1026 if len(s0) == 1: 

1027 s0 = np.repeat(s0, Nfields) 

1028 elif len(s0) != Nfields: 

1029 raise ValueError("Signal zero-point (s0) must have the same " 

1030 "length as the number of fields or length 1") 

1031 self._s0 = s0.reshape(Nfields, 1) 

1032 

1033 if p is not None: 

1034 if np.any(p <= 0) or np.any(np.isnan(p)): 

1035 raise ValueError("Bad value in power spectrum. The power" 

1036 " spectrum must be positive and not contain" 

1037 " any NaN values. This is likely due to" 

1038 " your UVtable (incorrect units or weights), " 

1039 " or the deprojection being applied (incorrect" 

1040 " geometry and/or phase center). Else you may" 

1041 " want to increase `rout` by 10-20% or `n` so" 

1042 " that it is large, >~300.") 

1043 

1044 Ykm = self._DHT.coefficients() 

1045 self._Sinv = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm) 

1046 else: 

1047 self._Sinv = np.zeros([Nfields, Nr, Nr], dtype='f8') 

1048 

1049 self._like_noise = noise_likelihood 

1050 

1051 self._fit(guess) 

1052 

1053 def _fit(self, guess): 

1054 """Find the maximum likelihood solution and variance""" 

1055 Sinv = self._Sinv 

1056 Ns, Nr = Sinv.shape[:2] 

1057 

1058 Sflat = np.zeros([Ns*Nr, Ns*Nr]) 

1059 for i in range(Ns): 

1060 s = i*Nr 

1061 e = (i+1)*Nr 

1062 Sflat[s:e,s:e] = Sinv[i] 

1063 

1064 

1065 scale = self._scale 

1066 s0 = self._s0 

1067 

1068 def H(s): 

1069 """Log-likelihood function""" 

1070 s = s.reshape(Ns, Nr) 

1071 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1072 

1073 f = 0.5*np.einsum('ij,ijk,ik', s, Sinv, s) 

1074 

1075 f += 0.5*np.einsum('ij,ijk,ik',I,self._M,I) 

1076 f -= np.sum(I*self._j) 

1077 

1078 return f 

1079 

1080 def jac(s): 

1081 """1st Derivative of log-likelihood""" 

1082 s = s.reshape(Ns, Nr) 

1083 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1084 

1085 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr 

1086 

1087 S1_s = np.einsum('sjk,sk->sj', Sinv, s) # shape Ns, Nr 

1088 MI = np.einsum('isj,ijk,ik->sj', sI, self._M, I) 

1089 jI = np.einsum('isj,ij->sj', sI, self._j) 

1090 

1091 return (S1_s + (MI - jI)).reshape(Ns*Nr) 

1092 

1093 def hess(s): 

1094 """2nd derivative of log-likelihood""" 

1095 s = s.reshape(Ns, Nr) 

1096 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1097 

1098 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr 

1099 s2I = np.einsum('is,ij->isj',scale**2, I) # shape Nf, Ns, Nr 

1100 

1101 Mjk = np.einsum('isj,ijk,itk->sjtk',sI, self._M, sI) 

1102 

1103 resid = 0 

1104 if self._full_hess > 0: 

1105 MI = Mjk.sum(3) 

1106 jI = np.einsum('is,itj,ij->sjt', scale, sI, self._j) 

1107 

1108 resid = np.einsum('sjt,jk->sjtk', MI-jI, np.eye(Nr)).reshape(Ns*Nr, Ns*Nr) 

1109 if self._full_hess < 1: 

1110 resid *= self._full_hess 

1111 

1112 return Mjk.reshape(Ns*Nr, Ns*Nr) + resid + Sflat 

1113 

1114 x = guess.reshape(Ns*Nr) 

1115 

1116 def limit_step(dx, x): 

1117 alpha = 1.1*np.min(np.abs(x/dx)) 

1118 

1119 alpha = min(alpha, 1) 

1120 return alpha*dx 

1121 

1122 # Ignore convergence because it will often fail due to round off when 

1123 # we're super close to the minimum 

1124 search = LineSearch(reduce_step=limit_step) 

1125 s, _ = MinimizeNewton(H, jac, hess, x, search, tol=1e-7) 

1126 self._s_MAP = s.reshape(Ns, Nr) 

1127 

1128 Dinv = hess(s) 

1129 try: 

1130 self._Dchol = scipy.linalg.cho_factor(Dinv) 

1131 self._Dsvd = None 

1132 except np.linalg.LinAlgError: 

1133 U, s_svd, V = scipy.linalg.svd(Dinv, full_matrices=False) 

1134 

1135 s1 = np.where(s_svd > 0, 1./s_svd, 0) 

1136 

1137 self._Dchol = None 

1138 self._Dsvd = U, s1, V 

1139 

1140 self._cov = None 

1141 

1142 def Dsolve(self, b): 

1143 r""" 

1144 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`. 

1145 

1146 Parameters 

1147 ---------- 

1148 b : array, size = (N,...) 

1149 Right-hand side to solve for 

1150 

1151 Returns 

1152 ------- 

1153 x : array, shape = np.shape(b) 

1154 Solution to the equation D x = b 

1155 

1156 """ 

1157 if self._Dchol is not None: 

1158 return scipy.linalg.cho_solve(self._Dchol, b) 

1159 else: 

1160 U, s1, V = self._Dsvd 

1161 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1)) 

1162 

1163 def draw(self, N): 

1164 """Compute N draws from the (approximate) posterior""" 

1165 draws = np.random.multivariate_normal(self.MAP.reshape(-1), self.covariance, N) 

1166 if self.num_fields > 1: 

1167 draws = draws.reshape(N, self.num_fields, self.size) 

1168 return draws 

1169 

1170 def log_likelihood(self, s=None): 

1171 r""" 

1172 Compute the likelihood, 

1173 

1174 .. math: 

1175 \log[P(I,V|S)]. 

1176 

1177 Parameters 

1178 ---------- 

1179 s : array, size = N, optional 

1180 Log-intensity :math:`I(r)=exp(s0*s)` to compute the likelihood of 

1181 

1182 Returns 

1183 ------- 

1184 log_P : float 

1185 Log likelihood, :math:`\log[P(I,V|p)]` 

1186 

1187 Notes 

1188 ----- 

1189 1. The prior probability P(S) is not included. 

1190 2. The likelihood takes the form: 

1191 

1192 .. math:: 

1193 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I 

1194 - \frac{1}{2} \log[\det(2 \pi S)] + H_0 

1195 

1196 where 

1197 

1198 .. math:: 

1199 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi) 

1200 

1201 is the noise likelihood. 

1202 """ 

1203 

1204 if s is None: 

1205 s = self._s_MAP 

1206 

1207 Sinv = self._Sinv 

1208 if Sinv is None: 

1209 Sinv = 0 

1210 

1211 

1212 I = np.exp(np.einsum('i,j->ij', self._scale,s)) 

1213 

1214 like = - 0.5*np.dot(s-self._s0, np.dot(Sinv, s-self._s0)) 

1215 

1216 like -= 0.5*np.einsum('ij,ijk,ik',I,self._M,I) 

1217 like += np.sum(I*self._j) 

1218 

1219 

1220 if self._Sinv is not None: 

1221 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1] 

1222 

1223 return like + self._like_noise 

1224 

1225 def solve_non_negative(self): 

1226 """Compute the best fit solution with non-negative intensities""" 

1227 # The solution is alway non-negative. Provided for convenience. 

1228 return self.MAP 

1229 

1230 @property 

1231 def MAP(self): 

1232 """Posterior maximum, unit = Jy / sr""" 

1233 MAP = self._s_MAP 

1234 if MAP.shape[0] == 1: 

1235 return MAP.reshape(self.size) 

1236 return MAP 

1237 

1238 @property 

1239 def covariance(self): 

1240 """Posterior covariance at MAP, unit = (Jy / sr)**2""" 

1241 if self._cov is None: 

1242 self._cov = self.Dsolve(np.eye(self.size*self.num_fields)) 

1243 return self._cov 

1244 

1245 @property 

1246 def power_spectrum(self): 

1247 """Power spectrum coefficients""" 

1248 p = self._p 

1249 if p.shape[0] == 1: 

1250 return p.reshape(self.size) 

1251 return p 

1252 

1253 @property 

1254 def scale(self): 

1255 scale = self._scale 

1256 if scale.shape[1] == 1: 

1257 return scale[:,0] 

1258 return self._scale 

1259 

1260 @property 

1261 def s_0(self): 

1262 s0 = self._s0 

1263 if s0.shape[0] == 1: 

1264 return s0[0] 

1265 return s0 

1266 

1267 @property 

1268 def num_fields(self): 

1269 """Number of fields fit for""" 

1270 return self._Nfields 

1271 

1272 @property 

1273 def size(self): 

1274 """Number of points in reconstruction""" 

1275 return self._DHT.size