Coverage for frank/statistical_models.py: 76%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

472 statements  

1# Frankenstein: 1D disc brightness profile reconstruction from Fourier data 

2# using non-parametric Gaussian Processes 

3# 

4# Copyright (C) 2019-2020 R. Booth, J. Jennings, M. Tazzari 

5# 

6# This program is free software: you can redistribute it and/or modify 

7# it under the terms of the GNU General Public License as published by 

8# the Free Software Foundation, either version 3 of the License, or 

9# (at your option) any later version. 

10# 

11# This program is distributed in the hope that it will be useful, 

12# but WITHOUT ANY WARRANTY; without even the implied warranty of 

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

14# GNU General Public License for more details. 

15# 

16# You should have received a copy of the GNU General Public License 

17# along with this program. If not, see <https://www.gnu.org/licenses/> 

18# 

19 

20import numpy as np 

21import scipy.linalg 

22import scipy.sparse 

23import scipy.optimize 

24import logging 

25 

26from frank.constants import rad_to_arcsec, deg_to_rad 

27from frank.minimizer import LineSearch, MinimizeNewton 

28 

29class VisibilityMapping: 

30 r"""Builds the mapping between the visibility and image planes. 

31 

32 VisibilityMapping generates the transform matrices :math:`H(q)` such that 

33 :math:`V_\nu(q) = H(q) I_\nu`. It also uses these to construct the design 

34 matrices :math:`M` and :math:`j` used in the fitting.  

35  

36 VisibilityMapping supports following models: 

37 1. An optically thick and geometrically thin disc 

38 2. An optically thin and geometrically thin disc 

39 3. An opticall thin disc with a known Gaussian verictal structure. 

40 All models are axisymmetric. 

41 

42 Parameters 

43 ---------- 

44 DHT : DiscreteHankelTransform 

45 A DHT object with N bins that defines H(p). The DHT is used to compute 

46 :math:`S(p)` 

47 geometry: SourceGeometry object, optional 

48 Geometry used to correct the visibilities for the source inclination.  

49 vis_model : string, 

50 One of ['opt_thick', 'opt_thin', 'debris'], corresponding to models 

51 1-3 described above, respectively.  

52 scale_height : function H(R), units = arcsec 

53 The vertical thickness of the disc in terms of its Guassian scale-height. 

54 Only used if vis_model="debris". 

55 block_data : bool, default = True 

56 Large temporary matrices are needed to set up the data. If block_data 

57 is True, we avoid this, limiting the memory requirement to block_size 

58 elements. 

59 block_size : int, default = 10**5 

60 Size of the matrices if blocking is used 

61 check_qbounds: bool, default = True 

62 Whether to check if the first (last) collocation point is smaller 

63 (larger) than the shortest (longest) deprojected baseline in the dataset 

64 verbose : bool, default = False 

65 Whether to print notification messages 

66 """ 

67 def __init__(self, DHT, geometry, 

68 vis_model='opt_thick', scale_height=None, block_data=True, 

69 block_size=10 ** 5, check_qbounds=True, verbose=True): 

70 

71 _vis_models = ['opt_thick', 'opt_thin', 'debris'] 

72 if vis_model not in _vis_models: 

73 raise ValueError(f"vis_model must be one of {_vis_models}") 

74 

75 # Store flags 

76 self._vis_model = vis_model 

77 self.check_qbounds = check_qbounds 

78 self._verbose = verbose 

79 

80 self._chunking = block_data 

81 self._chunk_size = block_size 

82 

83 self._DHT = DHT 

84 self._geometry = geometry 

85 

86 # Check for consistency and report the model choice. 

87 self._scale_height = None 

88 if self._vis_model == 'opt_thick': 

89 if self._verbose: 

90 logging.info(' Assuming an optically thick model (the default): ' 

91 'Scaling the total flux to account for the source ' 

92 'inclination') 

93 elif self._vis_model == 'opt_thin': 

94 if self._verbose: 

95 logging.info(' Assuming an optically thin model: *Not* scaling the ' 

96 'total flux to account for the source inclination') 

97 elif self._vis_model == 'debris': 

98 if scale_height is None: 

99 raise ValueError('You requested a model with a non-zero scale height' 

100 ' but did not specify H(R) (scale_height=None)') 

101 self._scale_height = scale_height(self.r) 

102 self._H2 = 0.5*(2*np.pi*self._scale_height / rad_to_arcsec)**2 

103 

104 if self._verbose: 

105 logging.info(' Assuming an optically thin model but geometrically ' 

106 'thick model: *Not* scaling the total flux to account for ' 

107 'the source inclination') 

108 

109 def map_visibilities(self, u, v, V, weights, frequencies=None, geometry=None): 

110 r""" 

111 Compute the matrices :math:`M` abd :math:`j` from the visibility data. 

112 

113 Also compute the null likelihood, 

114 .. math: 

115 `H0 = 0.5*\log[det(weights/(2*np.pi))] 

116 - 0.5*np.sum(V * weights * V):math:` 

117  

118 Parameters 

119 ---------- 

120 u,v : 1D array, unit = :math:`\lambda` 

121 uv-points of the visibilies 

122 V : 1D array, unit = Jy 

123 Visibility amplitudes at q 

124 weights : 1D array, optional, unit = J^-2 

125 Weights of the visibilities, weight = 1 / sigma^2, where sigma is 

126 the standard deviation 

127 frequencies : 1D array, optional, unit = Hz 

128 Channel frequencies of each data point. If not provided, a single  

129 channel is assumed. 

130 geometry : SourceGeometry object 

131 Geometry used to deproject the visibilities before fitting. If not 

132 provided the geometry passed in during construction will be used. 

133  

134 Returns 

135 ------- 

136 mapped visibilities : dict.  

137 The format will be depend whether a single channel was assumed  

138 (frequencies=None). The following data maybe present: 

139 'multi_freq' : bool, 

140 Specifies if mult-frequency analysis was done 

141 'channels' : array, 1D, mult-frequency analysis only 

142 The frequency of each channel. 

143 'M' : array, 2D or 3D. 

144 The matrix :math:`M` of the mapping. This will have shape 

145 (num_channels, size, size) for multi_freq analysis and 

146 (size, size) for single channel analysis. 

147 'j' : array, 1D or 2D. 

148 The matrix :math:`j` of the mapping. This will have shape 

149 (num_channels, size) for multi_freq analysis and (size,) 

150 for single channel analysis. 

151 'null_likelihood' : float, 

152 The likelihood of a model with I=0. See above. 

153 'hash' : list, 

154 Identifying data, used to ensure compatability between the 

155 mapped visibilies and fitting objects. 

156 """ 

157 

158 if geometry is None: 

159 geometry = self._geometry 

160 

161 if self._verbose: 

162 logging.info(' Building visibility matrices M and j') 

163 

164 # Deproject the visibilities 

165 u, v, k, V = self._geometry.apply_correction(u, v, V, use3D=True) 

166 q = np.hypot(u, v) 

167 

168 # Check consistency of the uv points with the model 

169 self._check_uv_range(q) 

170 

171 # Use only the real part of V.  

172 V = V.real 

173 w = np.ones_like(V) * weights 

174 

175 multi_freq = True 

176 if frequencies is None: 

177 multi_freq = False 

178 frequencies = np.ones_like(V) 

179 

180 channels = np.unique(frequencies) 

181 Ms = np.zeros([len(channels), self.size, self.size], dtype='f8') 

182 js = np.zeros([len(channels), self.size], dtype='f8') 

183 for i, f in enumerate(channels): 

184 idx = frequencies == f 

185 

186 qi = q[idx] 

187 ki = k[idx] 

188 wi = w[idx] 

189 Vi = V[idx] 

190 

191 # If chunking is used, we will build up M and j chunk-by-chunk 

192 if self._chunking: 

193 Nstep = int(self._chunk_size / self.size + 1) 

194 else: 

195 Nstep = len(Vi) 

196 

197 start = 0 

198 end = Nstep 

199 Ndata = len(Vi) 

200 while start < Ndata: 

201 qs = qi[start:end] 

202 ks = ki[start:end] 

203 ws = wi[start:end] 

204 Vs = Vi[start:end] 

205 

206 X = self._get_mapping_coefficients(qs, ks) 

207 

208 wXT = np.array(X.T * ws, order='C') 

209 

210 Ms[i] += np.dot(wXT, X) 

211 js[i] += np.dot(wXT, Vs) 

212 

213 start = end 

214 end = min(Ndata, end + Nstep) 

215 

216 # Compute likelihood normalization H_0, i.e., the 

217 # log-likelihood of a source with I=0. 

218 H0 = 0.5 * np.sum(np.log(w / (2 * np.pi)) - V * w * V) 

219 

220 if multi_freq: 

221 return { 

222 'mult_freq' : True, 

223 'channels' : channels, 

224 'M' : Ms, 

225 'j' : js, 

226 'null_likelihood' : H0, 

227 'hash' : [True, self._DHT, geometry, self._vis_model, self._scale_height], 

228 } 

229 else: 

230 return { 

231 'mult_freq' : False, 

232 'channels' : None, 

233 'M' : Ms[0], 

234 'j' : js[0], 

235 'null_likelihood' : H0, 

236 'hash' : [False, self._DHT, geometry, self._vis_model, self._scale_height], 

237 } 

238 

239 def check_hash(self, hash, multi_freq=False, geometry=None): 

240 """Checks whether the hash of some mapped visibilities are compatible 

241 with this VisibilityMapping. 

242  

243 Parameters 

244 ---------- 

245 hash : list 

246 Hash to compare 

247 multi_freq : bool 

248 Whether we are expected a multi-frequncy fit 

249 geometry : SourceGeometry object, optional 

250 Geometry to use in the comparison. 

251 """ 

252 if geometry is None: 

253 geometry = self._geometry 

254 

255 passed = ( 

256 multi_freq == hash[0] and 

257 self._DHT.Rmax == hash[1].Rmax and 

258 self._DHT.size == hash[1].size and 

259 self._DHT.order == hash[1].order and 

260 geometry.inc == hash[2].inc and 

261 geometry.PA == hash[2].PA and 

262 geometry.dRA == hash[2].dRA and 

263 geometry.dDec == hash[2].dDec and 

264 self._vis_model == hash[3] 

265 ) 

266 

267 if not passed: 

268 return False 

269 

270 if self._scale_height is None: 

271 return hash[4] is None 

272 else: 

273 if hash[4] is None: 

274 return False 

275 else: 

276 return np.all(self._scale_height == hash[4]) 

277 

278 

279 def predict_visibilities(self, I, q, k=None, geometry=None): 

280 r"""Compute the predicted visibilities given the brightness profile, I 

281  

282 Parameters 

283 ---------- 

284 I : array, unit = Jy 

285 Brightness at the collocation points. 

286 q : array, unit = :math:`\lambda` 

287 Radial uv-distance to predict the visibility at 

288 k : array, optional, unit = :math:`\lambda` 

289 Vertical uv-distance to predict the visibility. Only needed for a 

290 geometrically thick model. 

291 geometry : SourceGeometry object, optional 

292 Geometry used to correct the visibilities for the source 

293 inclination. Only needed for the optically thick model. If not  

294 provided, the geometry passed in during construction will be used.  

295  

296 Returns 

297 ------- 

298 V(q, k) : array, unit = Jy 

299 Predicted visibilties of a source with a radial flux distribution 

300 given by :math:`I` and the position angle, inclination determined 

301 by the geometry. 

302  

303 Notes 

304 ----- 

305 For an optically thick model the visibility amplitudes are reduced due 

306 to the projection but phase centre corrections are not added. 

307 """ 

308 # Chunk the visibility calulation for speed 

309 if self._chunking: 

310 Ni = int(self._chunk_size / self.size + 1) 

311 else: 

312 Ni = len(q) 

313 

314 end = 0 

315 start = 0 

316 V = [] 

317 while end < len(q): 

318 start = end 

319 end = start + Ni 

320 qi = q[start:end] 

321 

322 ki = None 

323 if k is not None: 

324 ki = k[start:end] 

325 

326 H = self._get_mapping_coefficients(qi, ki, geometry) 

327 

328 V.append(np.dot(H, I)) 

329 return np.concatenate(V) 

330 

331 def invert_visibilities(self, V, R, geometry=None): 

332 r"""Compute the brightness, I, from the visibilities.  

333  

334 Note this method does not work for an arbitrary distribution of  

335 baselines and therefore cannot be used to determine the brightness 

336 given a generic set of data. Instead it needs the visibilities at  

337 collocation points of the DiscrteHankelTransform, q. 

338 

339 For geometrically thick models the visibilities used must be those for 

340 which kz = 0. 

341 

342 Given the above constraints, this method computes the inverse of 

343 predict_visibilites. 

344  

345 Parameters 

346 ---------- 

347 V : array, unit = Jy 

348 Visibility at the collocation points. This must be the deprojected 

349 and phase-center corrected visibilities. 

350 R : array, unit = arcsec 

351 Radial distance to compute the brightness at 

352 geometry : SourceGeometry object, optional 

353 Geometry used to correct the visibilities for the source 

354 inclination. Only needed for the optically thick model. If not  

355 provided, the geometry passed in during construction will be used.  

356  

357 Returns 

358 ------- 

359 I(R) : array, unit = Jy / Sr 

360 Brightness at the radial locations, R.  

361  

362 Notes 

363 ----- 

364 The brightness is corrected under the optically thin 

365 """ 

366 # Chunk the visibility calulation for speed 

367 R = np.atleast_1d(R) 

368 if self._chunking: 

369 Ni = int(self._chunk_size / self.size + 1) 

370 else: 

371 Ni = len(R) 

372 

373 end = 0 

374 start = 0 

375 I = [] 

376 while end < len(R): 

377 start = end 

378 end = start + Ni 

379 Ri = R[start:end] 

380 

381 H = self._get_mapping_coefficients(Ri, 0, geometry, inverse=True) 

382 

383 I.append(np.dot(H, V)) 

384 return np.concatenate(I)[R < self.Rmax] 

385 

386 def transform(self, f, q=None, direction='forward'): 

387 """Apply a DHT directly to data provided 

388  

389 Parameters 

390 ---------- 

391 f : array, size = N 

392 Function to Hankel transform, evaluated at the collocation points: 

393 f[k] = f(r_k) or f[k] = f(q_k) 

394 q : array or None 

395 The frequency points at which to evaluate the Hankel 

396 transform. If not specified, the conjugate points of the 

397 DHT will be used. For the backwards transform, q should be 

398 the radius points in arcsec 

399 direction : { 'forward', 'backward' }, optional 

400 Direction of the transform. If not supplied, the forward 

401 transform is used 

402 

403 Returns 

404 ------- 

405 H[f] : array, size = N or len(q) if supplied 

406 The Hankel transform of the array f 

407 

408 """ 

409 if direction == 'backward' and q is not None: 

410 q = q / rad_to_arcsec 

411 

412 return self._DHT.transform(f, q, direction) 

413 

414 def DHT_coefficients(self, direction='forward'): 

415 """Get the coefficients of the Discrete Hankel Transform. 

416 

417 The coefficients are defined by 

418 .. math: 

419 H[h] = np.dot(H, f) 

420  

421 Parameters 

422 ---------- 

423 direction : { 'forward', 'backward' }, optional 

424 Direction of the transform. If not supplied, the forward 

425 transform is used 

426 

427 Returns 

428 ------- 

429 H : array, size = N or len(q) if supplied 

430 The Hankel transform of the array f 

431 

432 """ 

433 return self._DHT.coefficients(direction=direction) 

434 

435 def interpolate(self, f, r, space='Real'): 

436 """ 

437 Interpolate f (evaluated at the collocation points) to the new points, 

438 pts, using interpolation that is consistent with the Fourier-Bessel 

439 Series / Discrete Hankel Transform. 

440 

441 Parameters 

442 ---------- 

443 f : array, size = N 

444 Function to interpolate, evaluated at the collocation points: 

445 f[k] = f(r_k) or f[k] = f(q_k) 

446 r : array or None 

447 The points at which to evaluate the interpolation. 

448 space : { 'Real', 'Fourier' }, optional 

449 Space in which the interpolation is done. If not supplied,  

450 'Real' is assumed. 

451 

452 

453 Returns 

454 ------- 

455 f_interp : array, size = len(r) 

456 The interpolated results 

457 """ 

458 if space == 'Real': 

459 r = r / rad_to_arcsec 

460 

461 r = np.array(r) 

462 shape = r.shape 

463 r = r.reshape(-1) 

464 

465 if self._chunking: 

466 Ni = int(self._chunk_size / len(r) + 1) 

467 else: 

468 Ni = len(r) 

469 

470 end = 0 

471 start = 0 

472 I = [] 

473 while end < len(r): 

474 start = end 

475 end = start + Ni 

476 ri = r[start:end] 

477 

478 I.append(self._DHT.interpolate(f, ri, space)) 

479 

480 return np.concatenate(I).reshape(*shape) 

481 

482 

483 def _get_mapping_coefficients(self, qs, ks, geometry=None, inverse=False): 

484 """Get :math:`H(q)`, such that :math:`V(q) = H(q) I_\nu`""" 

485 

486 if self._vis_model == 'opt_thick': 

487 # Optically thick & geometrically thin 

488 if geometry is None: 

489 geometry = self._geometry 

490 scale = np.cos(geometry.inc * deg_to_rad) 

491 elif self._vis_model == 'opt_thin': 

492 # Optically thin & geometrically thin 

493 scale = 1 

494 elif self._vis_model == 'debris': 

495 # Optically thin & geometrically thick 

496 scale = np.exp(-np.outer(ks*ks, self._H2)) 

497 else: 

498 raise ValueError("model not supported. Should never occur.") 

499 

500 if inverse: 

501 scale = np.atleast_1d(1/scale).reshape(1,-1) 

502 qs = qs / rad_to_arcsec 

503 direction='backward' 

504 else: 

505 direction='forward' 

506 

507 H = self._DHT.coefficients(qs, direction=direction) * scale 

508 

509 return H 

510 

511 

512 def _check_uv_range(self, uv): 

513 """Check that the uv domain is properly covered""" 

514 

515 # Check whether the first (last) collocation point is smaller (larger) 

516 # than the shortest (longest) deprojected baseline in the dataset 

517 if self.check_qbounds: 

518 if self.q[0] < uv.min(): 

519 logging.warning(r"WARNING: First collocation point, q[0] = {:.3e} \lambda," 

520 " is at a baseline shorter than the" 

521 " shortest deprojected baseline in the dataset," 

522 r" min(uv) = {:.3e} \lambda. For q[0] << min(uv)," 

523 " the fit's total flux may be biased" 

524 " low.".format(self.q[0], uv.min())) 

525 

526 if self.q[-1] < uv.max(): 

527 raise ValueError(r"ERROR: Last collocation point, {:.3e} \lambda, is at" 

528 " a shorter baseline than the longest deprojected" 

529 r" baseline in the dataset, {:.3e} \lambda. Please" 

530 " increase N in FrankMultFrequencyFitter (this is" 

531 " `hyperparameters: n` if you're using a parameter" 

532 " file). Or if you'd like to fit to shorter maximum baseline," 

533 " cut the (u, v) distribution before fitting" 

534 " (`modify_data: baseline_range` in the" 

535 " parameter file).".format(self.q[-1], uv.max())) 

536 

537 @property 

538 def r(self): 

539 """Radius points, unit = arcsec""" 

540 return self._DHT.r * rad_to_arcsec 

541 

542 @property 

543 def Rmax(self): 

544 """Maximum radius, unit = arcsec""" 

545 return self._DHT.Rmax * rad_to_arcsec 

546 

547 @property 

548 def q(self): 

549 r"""Frequency points, unit = :math:`\lambda`""" 

550 return self._DHT.q 

551 

552 @property 

553 def Qmax(self): 

554 r"""Maximum frequency, unit = :math:`\lambda`""" 

555 return self._DHT.Qmax 

556 

557 @property 

558 def size(self): 

559 """Number of points in reconstruction""" 

560 return self._DHT.size 

561 

562 @property 

563 def scale_height(self): 

564 "Vertial thickness of the disc, unit = arcsec" 

565 if self._scale_height is not None: 

566 return self._scale_height 

567 else: 

568 return None 

569 

570 

571class GaussianModel: 

572 r""" 

573 Solves the linear regression problem to compute the posterior, 

574 

575 .. math:: 

576 P(I|q,V,p) \propto G(I-\mu, D), 

577 

578 where :math:`I` is the intensity to be predicted, :math:`q` are the 

579 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`D` are 

580 the mean and covariance of the posterior distribution. 

581 

582 If :math:`p` is provided, the covariance matrix of the prior is included, 

583 with 

584 

585 .. math:: 

586 P(I|p) \propto G(I, S(p)), 

587 

588 and the Bayesian Linear Regression problem is solved. :math:`S` is computed 

589 from the power spectrum, :math:`p`, if provided. Otherwise the traditional 

590 (frequentist) linear regression is used. 

591 

592 The problem is framed in terms of the design matrix :math:`M` and 

593 information source :math:`j`. 

594 

595 :math:`H(q)` is the matrix that projects the intensity :math:`I` to 

596 visibility space. :math:`M` is defined by 

597 

598 .. math:: 

599 M = H(q)^T w H(q), 

600 

601 where :math:`w` is the weights matrix and 

602 

603 .. math:: 

604 j = H(q)^T w V. 

605 

606 The mean and covariance of the posterior are then given by 

607 

608 .. math:: 

609 \mu = D j 

610 

611 and 

612 

613 .. math:: 

614 D = [ M + S(p)^{-1}]^{-1}, 

615 

616 if the prior is provided, otherwise 

617 

618 .. math:: 

619 D = M^{-1}. 

620 

621 

622 Parameters 

623 ---------- 

624 DHT : DiscreteHankelTransform 

625 A DHT object with N bins that defines H(p). The DHT is used to compute 

626 :math:`S(p)` 

627 M : 2/3D array, size = (N, N) or (Nf, N, N) 

628 The design matrix, see above. Nf is the number of channels (frequencies). 

629 j : 1/2D array, size = N or (Nf, N) 

630 Information source, see above 

631 p : 1D array, size = N, optional 

632 Power spectrum used to generate the covarience matrix :math:`S(p)` 

633 guess: array, optional 

634 Initial guess used in computing the brightness. 

635 Nfields : int, optional 

636 Number of fields used to fit the data. Typically 1, but could be more 

637 for multi-wavelength data. If not provided it will be determined from 

638 the shape of guess. 

639 noise_likelihood : float, optional 

640 An optional parameter needed to compute the full likelihood, which 

641 should be equal to 

642 

643 .. math:: 

644 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)]. 

645 

646 If not provided, the likelihood can still be computed up to this 

647 missing constant 

648 """ 

649 

650 def __init__(self, DHT, M, j, p=None, scale=None, guess=None, 

651 Nfields=None, noise_likelihood=0): 

652 

653 self._DHT = DHT 

654 

655 # Correct shape of design matrix etc.  

656 if len(M.shape) == 2: 

657 M = M.reshape(1, *M.shape) 

658 if len(j.shape) == 1: 

659 j = j.reshape(1, *j.shape) 

660 

661 # Number of frequencies / radial points 

662 Nf, Nr = j.shape 

663 

664 # Get the number of fields 

665 if Nfields is None: 

666 if guess is None: 

667 Nfields = 1 

668 else: 

669 guess = guess.reshape(-1, Nr) 

670 Nfields = guess.shape[0] 

671 elif guess is not None: 

672 guess = guess.reshape(Nfields, Nr) 

673 self._Nfields = Nfields 

674 

675 # Create the correct shape for the power spectrum and scale factors 

676 if p is not None: 

677 p = p.reshape(-1, Nr) 

678 if p.shape[0] == 1 and Nfields > 1: 

679 p = np.repeat(p, Nfields, axis=0) 

680 self._p = p 

681 

682 if scale is None: 

683 self._scale = np.ones([Nf, Nfields], dtype='f8') 

684 else: 

685 self._scale = np.empty([Nf, Nfields], dtype='f8') 

686 self._scale[:] = scale.reshape(Nf, -1) 

687 

688 if p is not None: 

689 if np.any(p <= 0) or np.any(np.isnan(p)): 

690 print(p) 

691 raise ValueError("Bad value in power spectrum. The power" 

692 " spectrum must be positive and not contain" 

693 " any NaN values. This is likely due to" 

694 " your UVtable (incorrect units or weights), " 

695 " or the deprojection being applied (incorrect" 

696 " geometry and/or phase center). Else you may" 

697 " want to adjust `rout` (ensure it is larger than" 

698 " the source) or `n` (up to ~300).") 

699 

700 Ykm = self._DHT.coefficients() 

701 Sj = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm) 

702 

703 self._Sinv = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8') 

704 for n in range(0, Nfields): 

705 sn = n*Nr 

706 en = (n+1)*Nr 

707 

708 self._Sinv[sn:en, sn:en] += Sj[n] 

709 else: 

710 self._Sinv = None 

711 

712 # Compute the design matrix 

713 self._M = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8') 

714 self._j = np.zeros(Nr*Nfields, dtype='f8') 

715 for si, Mi, ji in zip(self._scale, M, j): 

716 

717 for n in range(0, Nfields): 

718 sn = n*Nr 

719 en = (n+1)*Nr 

720 

721 self._j[sn:en] += si[n] * ji 

722 for m in range(0, Nfields): 

723 sm = m*Nr 

724 em = (m+1)*Nr 

725 

726 self._M[sn:en, sm:em] += si[n]*si[m] * Mi 

727 

728 self._like_noise = noise_likelihood 

729 

730 self._fit() 

731 

732 def _fit(self): 

733 """Compute the mean and variance""" 

734 # Compute the inverse prior covariance, S(p)^-1 

735 Sinv = self._Sinv 

736 if Sinv is None: 

737 Sinv = 0 

738 

739 Dinv = self._M + Sinv 

740 

741 try: 

742 self._Dchol = scipy.linalg.cho_factor(Dinv) 

743 self._Dsvd = None 

744 

745 self._mu = scipy.linalg.cho_solve(self._Dchol, self._j) 

746 

747 except np.linalg.LinAlgError: 

748 U, s, V = scipy.linalg.svd(Dinv, full_matrices=False) 

749 

750 s1 = np.where(s > 0, 1. / s, 0) 

751 

752 self._Dchol = None 

753 self._Dsvd = U, s1, V 

754 

755 self._mu = np.dot(V.T, np.multiply(np.dot(U.T, self._j), s1)) 

756 

757 # Reset the covariance matrix - we will compute it when needed 

758 if self._Nfields > 1: 

759 self._mu = self._mu.reshape(self._Nfields, self.size) 

760 self._cov = None 

761 

762 def Dsolve(self, b): 

763 r""" 

764 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`. 

765 

766 Parameters 

767 ---------- 

768 b : array, size = (N,...) 

769 Right-hand side to solve for 

770 

771 Returns 

772 ------- 

773 x : array, shape = np.shape(b) 

774 Solution to the equation D x = b 

775 

776 """ 

777 if self._Dchol is not None: 

778 return scipy.linalg.cho_solve(self._Dchol, b) 

779 else: 

780 U, s1, V = self._Dsvd 

781 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1)) 

782 

783 def draw(self, N): 

784 """Compute N draws from the posterior""" 

785 draws = np.random.multivariate_normal(self.mean.reshape(-1), self.covariance, N) 

786 if self.num_fields > 1: 

787 draws = draws.reshape(N, self.num_fields, self.size) 

788 return draws 

789 

790 def log_likelihood(self, I=None): 

791 r""" 

792 Compute one of two types of likelihood. 

793 

794 If :math:`I` is provided, this computes 

795 

796 .. math: 

797 \log[P(I,V|S)]. 

798 

799 Otherwise the marginalized likelihood is computed, 

800 

801 .. math: 

802 \log[P(V|S)]. 

803 

804 

805 Parameters 

806 ---------- 

807 I : array, size = N, optional, unit = Jy / sr 

808 Intensity :math:`I(r)` to compute the likelihood of 

809 

810 Returns 

811 ------- 

812 log_P : float 

813 Log likelihood, :math:`\log[P(I,V|p)]` or :math:`\log[P(V|p)]` 

814 

815 Notes 

816 ----- 

817 1. The prior probability P(S) is not included. 

818 2. The likelihoods take the form: 

819 

820 .. math:: 

821 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I 

822 - \frac{1}{2} \log[\det(2 \pi S)] + H_0 

823 

824 and 

825 

826 .. math:: 

827 \log[P(V|p)] = \frac{1}{2} j^T D j 

828 + \frac{1}{2} \log[\det(D)/\det(S)] + H_0 

829 

830 where 

831 

832 .. math:: 

833 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi) 

834 

835 is the noise likelihood. 

836 """ 

837 

838 if I is None: 

839 like = 0.5 * np.sum(self._j * self._mu) 

840 

841 if self._Sinv is not None: 

842 Q = self.Dsolve(self._Sinv) 

843 like += 0.5 * np.linalg.slogdet(Q)[1] 

844 else: 

845 Sinv = self._Sinv 

846 if Sinv is None: 

847 Sinv = 0 

848 

849 Dinv = self._M + Sinv 

850 

851 like = np.sum(self._j * I) - 0.5 * np.dot(I, np.dot(Dinv, I)) 

852 

853 if self._Sinv is not None: 

854 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1] 

855 

856 return like + self._like_noise 

857 

858 def solve_non_negative(self): 

859 """Compute the best fit solution with non-negative intensities""" 

860 Sinv = self._Sinv 

861 if Sinv is None: 

862 Sinv = 0 

863 

864 Dinv = self._M + Sinv 

865 return scipy.optimize.nnls(Dinv, self._j, 

866 maxiter=100*len(self._j))[0] 

867 

868 @property 

869 def mean(self): 

870 """Posterior mean, unit = Jy / sr""" 

871 return self._mu 

872 

873 @property 

874 def MAP(self): 

875 """Posterior maximum, unit = Jy / sr""" 

876 return self.mean 

877 

878 @property 

879 def covariance(self): 

880 """Posterior covariance, unit = (Jy / sr)**2""" 

881 if self._cov is None: 

882 self._cov = self.Dsolve(np.eye(self.size*self.num_fields)) 

883 return self._cov 

884 

885 @property 

886 def s_0(self): 

887 return 0 

888 

889 @property 

890 def power_spectrum(self): 

891 """Power spectrum coefficients""" 

892 if self.num_fields == 1 and self._p is not None: 

893 return self._p.reshape(self.size) 

894 return self._p 

895 

896 @property 

897 def num_fields(self): 

898 """Number of fields fit for""" 

899 return self._Nfields 

900 

901 @property 

902 def size(self): 

903 """Number of points in reconstruction""" 

904 return self._DHT.size 

905 

906 

907class LogNormalMAPModel: 

908 r""" 

909 Finds the maximum a posteriori field for log-normal regression problems, 

910 

911 .. math:: 

912 P(s|q,V,p,s0) \propto G(H exp(scale*(s+s0)) - V, M) P(s|p) 

913 

914 where :math:`s` is the log-intensity to be predicted, :math:`q` are the 

915 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`H` is 

916 the design matrix of the transform, e.g. the coefficient matrix of 

917 the forward Hankel transform. 

918 

919 If :math:`p` is provided, the covariance matrix of the prior is included, 

920 with 

921 

922 .. math:: 

923 P(s|p) \propto G(s, S(p)), 

924 

925 The problem is framed in terms of the design matrix :math:`M` and 

926 information source :math:`j`. 

927 

928 :math:`H(q)` is the matrix that projects the intensity :math:`exp(s*s0)` to 

929 visibility space. :math:`M` is defined by 

930 

931 .. math:: 

932 M = H(q)^T w H(q), 

933 

934 where :math:`w` is the weights matrix and 

935 

936 .. math:: 

937 j = H(q)^T w V. 

938 

939 

940 The maximum a posteori field, s_MAP, is found by maximizing  

941 :math:`\log P(s|q,V,p,s0)` and the posterior covariance at s_MAP is 

942 

943 .. math:: 

944 D = [ M + S(p)^{-1}]^{-1}. 

945 

946 If the prior is not provided then 

947 

948 .. math:: 

949 D = M^{-1}. 

950 

951 and the posterior for exp(s) is the same as the standard Gaussian model. 

952 

953 Note: This class also supports :math:`M` and :math:`j` being split into 

954 multiple terms (i.e. :math:`M_i, j_i`) such that different scale  

955 factors, :math:`s0_i` can be applied to each system. This allows fitting 

956 multi-frequency data. 

957 

958 

959 Parameters 

960 ---------- 

961 DHT : DiscreteHankelTransform 

962 A DHT object with N bins that defines H(p). The DHT is used to compute 

963 :math:`S(p)` 

964 M : 2/3D array, size = (N, N) or (Nf, N, N) 

965 The design matrix, see above. Nf is the number of channels (frequencies). 

966 j : 1/2D array, size = N or (Nf, N) 

967 Information source, see above 

968 p : 1D array, size = N, optional 

969 Power spectrum used to generate the covarience matrix :math:`S(p)` 

970 scale : float, 1D array (size=N), or list of 

971 Scale factors s0 (see above). These factors can be a constant, one 

972 per brightness point or per band (optionally per collocation point) 

973 to enable multi-frequency fitting. 

974 s0 : float or 1D array, size Nfields. Optional 

975 Zero point for the brightness function, see above. 

976 guess: array, optional 

977 Initial guess used in computing the brightness. 

978 Nfields : int, optional 

979 Number of fields used to fit the data. Typically 1, but could be more 

980 for multi-wavelength data. If not provided it will be determined from 

981 the shape of guess. 

982 full_hessian: float, range [0, 1] 

983 If 1 then use the full Hessian in evaluating the matrix :math:`D`. When 

984 0 a term is omitted that can cause the Hessian not be a positive- 

985 definite matrix when the solution is a poor fit to the data. A value 

986 between 0 and 1 will scale this term by that factor. 

987 noise_likelihood : float, optional 

988 An optional parameter needed to compute the full likelihood, which 

989 should be equal to 

990 

991 .. math:: 

992 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)]. 

993 

994 If not provided, the likelihood can still be computed up to this 

995 missing constant 

996 """ 

997 

998 def __init__(self, DHT, M, j, p=None, scale=None, s0=None, guess=None, 

999 Nfields=None, full_hessian=1, noise_likelihood=0): 

1000 

1001 self._DHT = DHT 

1002 self._full_hess = full_hessian 

1003 

1004 # Correct shape of design matrix etc.  

1005 if len(M.shape) == 2: 

1006 M = M.reshape(1, *M.shape) 

1007 if len(j.shape) == 1: 

1008 j = j.reshape(1, *j.shape) 

1009 

1010 self._M = M 

1011 self._j = j 

1012 

1013 # Number of frequencies / radial points 

1014 Nf, Nr = j.shape 

1015 # Number of signal fields: 

1016 # Get the number of fields 

1017 if Nfields is None: 

1018 if guess is None: 

1019 Nfields = 1 

1020 else: 

1021 guess = guess.reshape(-1, Nr) 

1022 Nfields = guess.shape[0] 

1023 elif guess is not None: 

1024 guess = guess.reshape(Nfields, Nr) 

1025 self._Nfields = Nfields 

1026 

1027 

1028 # Create the correct shape for the power spectrum and scale factors 

1029 if p is not None: 

1030 p = p.reshape(-1, Nr) 

1031 if p.shape[0] == 1 and Nfields > 1: 

1032 p = np.repeat(p, Nfields, axis=0) 

1033 self._p = p 

1034 

1035 

1036 if scale is None: 

1037 self._scale = np.ones([Nf, Nfields], dtype='f8') 

1038 else: 

1039 self._scale = np.empty([Nf, Nfields], dtype='f8') 

1040 self._scale[:] = scale.reshape(Nf, -1) 

1041 

1042 if s0 is None: 

1043 self._s0 = np.ones(Nfields, dtype='f8') 

1044 else: 

1045 s0 = np.atleast_1d(s0) 

1046 if len(s0) == 1: 

1047 s0 = np.repeat(s0, Nfields) 

1048 elif len(s0) != Nfields: 

1049 raise ValueError("Signal zero-point (s0) must have the same " 

1050 "length as the number of fields or length 1") 

1051 self._s0 = s0.reshape(Nfields, 1) 

1052 

1053 if p is not None: 

1054 if np.any(p <= 0) or np.any(np.isnan(p)): 

1055 raise ValueError("Bad value in power spectrum. The power" 

1056 " spectrum must be positive and not contain" 

1057 " any NaN values. This is likely due to" 

1058 " your UVtable (incorrect units or weights), " 

1059 " or the deprojection being applied (incorrect" 

1060 " geometry and/or phase center). Else you may" 

1061 " want to increase `rout` by 10-20% or `n` so" 

1062 " that it is large, >~300.") 

1063 

1064 Ykm = self._DHT.coefficients() 

1065 self._Sinv = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm) 

1066 else: 

1067 self._Sinv = np.zeros([Nfields, Nr, Nr], dtype='f8') 

1068 

1069 self._like_noise = noise_likelihood 

1070 

1071 self._fit(guess) 

1072 

1073 def _fit(self, guess): 

1074 """Find the maximum likelihood solution and variance""" 

1075 Sinv = self._Sinv 

1076 Ns, Nr = Sinv.shape[:2] 

1077 

1078 Sflat = np.zeros([Ns*Nr, Ns*Nr]) 

1079 for i in range(Ns): 

1080 s = i*Nr 

1081 e = (i+1)*Nr 

1082 Sflat[s:e,s:e] = Sinv[i] 

1083 

1084 

1085 scale = self._scale 

1086 s0 = self._s0 

1087 

1088 def H(s): 

1089 """Log-likelihood function""" 

1090 s = s.reshape(Ns, Nr) 

1091 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1092 

1093 f = 0.5*np.einsum('ij,ijk,ik', s, Sinv, s) 

1094 

1095 f += 0.5*np.einsum('ij,ijk,ik',I,self._M,I) 

1096 f -= np.sum(I*self._j) 

1097 

1098 return f 

1099 

1100 def jac(s): 

1101 """1st Derivative of log-likelihood""" 

1102 s = s.reshape(Ns, Nr) 

1103 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1104 

1105 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr 

1106 

1107 S1_s = np.einsum('sjk,sk->sj', Sinv, s) # shape Ns, Nr 

1108 MI = np.einsum('isj,ijk,ik->sj', sI, self._M, I) 

1109 jI = np.einsum('isj,ij->sj', sI, self._j) 

1110 

1111 return (S1_s + (MI - jI)).reshape(Ns*Nr) 

1112 

1113 def hess(s): 

1114 """2nd derivative of log-likelihood""" 

1115 s = s.reshape(Ns, Nr) 

1116 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr 

1117 

1118 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr 

1119 s2I = np.einsum('is,ij->isj',scale**2, I) # shape Nf, Ns, Nr 

1120 

1121 Mjk = np.einsum('isj,ijk,itk->sjtk',sI, self._M, sI) 

1122 

1123 resid = 0 

1124 if self._full_hess > 0: 

1125 MI = Mjk.sum(3) 

1126 jI = np.einsum('is,itj,ij->sjt', scale, sI, self._j) 

1127 

1128 resid = np.einsum('sjt,jk->sjtk', MI-jI, np.eye(Nr)).reshape(Ns*Nr, Ns*Nr) 

1129 if self._full_hess < 1: 

1130 resid *= self._full_hess 

1131 

1132 return Mjk.reshape(Ns*Nr, Ns*Nr) + resid + Sflat 

1133 

1134 x = guess.reshape(Ns*Nr) 

1135 

1136 def limit_step(dx, x): 

1137 alpha = 1.1*np.min(np.abs(x/dx)) 

1138 

1139 alpha = min(alpha, 1) 

1140 return alpha*dx 

1141 

1142 # Ignore convergence because it will often fail due to round off when 

1143 # we're super close to the minimum 

1144 search = LineSearch(reduce_step=limit_step) 

1145 s, _ = MinimizeNewton(H, jac, hess, x, search, tol=1e-7) 

1146 self._s_MAP = s.reshape(Ns, Nr) 

1147 

1148 Dinv = hess(s) 

1149 try: 

1150 self._Dchol = scipy.linalg.cho_factor(Dinv) 

1151 self._Dsvd = None 

1152 except np.linalg.LinAlgError: 

1153 U, s_svd, V = scipy.linalg.svd(Dinv, full_matrices=False) 

1154 

1155 s1 = np.where(s_svd > 0, 1./s_svd, 0) 

1156 

1157 self._Dchol = None 

1158 self._Dsvd = U, s1, V 

1159 

1160 self._cov = None 

1161 

1162 def Dsolve(self, b): 

1163 r""" 

1164 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`. 

1165 

1166 Parameters 

1167 ---------- 

1168 b : array, size = (N,...) 

1169 Right-hand side to solve for 

1170 

1171 Returns 

1172 ------- 

1173 x : array, shape = np.shape(b) 

1174 Solution to the equation D x = b 

1175 

1176 """ 

1177 if self._Dchol is not None: 

1178 return scipy.linalg.cho_solve(self._Dchol, b) 

1179 else: 

1180 U, s1, V = self._Dsvd 

1181 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1)) 

1182 

1183 def draw(self, N): 

1184 """Compute N draws from the (approximate) posterior""" 

1185 draws = np.random.multivariate_normal(self.MAP.reshape(-1), self.covariance, N) 

1186 if self.num_fields > 1: 

1187 draws = draws.reshape(N, self.num_fields, self.size) 

1188 return draws 

1189 

1190 def log_likelihood(self, s=None): 

1191 r""" 

1192 Compute the likelihood, 

1193 

1194 .. math: 

1195 \log[P(I,V|S)]. 

1196 

1197 Parameters 

1198 ---------- 

1199 s : array, size = N, optional 

1200 Log-intensity :math:`I(r)=exp(s0*s)` to compute the likelihood of 

1201 

1202 Returns 

1203 ------- 

1204 log_P : float 

1205 Log likelihood, :math:`\log[P(I,V|p)]` 

1206 

1207 Notes 

1208 ----- 

1209 1. The prior probability P(S) is not included. 

1210 2. The likelihood takes the form: 

1211 

1212 .. math:: 

1213 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I 

1214 - \frac{1}{2} \log[\det(2 \pi S)] + H_0 

1215 

1216 where 

1217 

1218 .. math:: 

1219 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi) 

1220 

1221 is the noise likelihood. 

1222 """ 

1223 

1224 if s is None: 

1225 s = self._s_MAP 

1226 

1227 Sinv = self._Sinv 

1228 if Sinv is None: 

1229 Sinv = 0 

1230 

1231 

1232 I = np.exp(np.einsum('i,j->ij', self._scale,s)) 

1233 

1234 like = - 0.5*np.dot(s-self._s0, np.dot(Sinv, s-self._s0)) 

1235 

1236 like -= 0.5*np.einsum('ij,ijk,ik',I,self._M,I) 

1237 like += np.sum(I*self._j) 

1238 

1239 

1240 if self._Sinv is not None: 

1241 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1] 

1242 

1243 return like + self._like_noise 

1244 

1245 def solve_non_negative(self): 

1246 """Compute the best fit solution with non-negative intensities""" 

1247 # The solution is alway non-negative. Provided for convenience. 

1248 return self.MAP 

1249 

1250 @property 

1251 def MAP(self): 

1252 """Posterior maximum, unit = Jy / sr""" 

1253 MAP = self._s_MAP 

1254 if MAP.shape[0] == 1: 

1255 return MAP.reshape(self.size) 

1256 return MAP 

1257 

1258 @property 

1259 def covariance(self): 

1260 """Posterior covariance at MAP, unit = (Jy / sr)**2""" 

1261 if self._cov is None: 

1262 self._cov = self.Dsolve(np.eye(self.size*self.num_fields)) 

1263 return self._cov 

1264 

1265 @property 

1266 def power_spectrum(self): 

1267 """Power spectrum coefficients""" 

1268 p = self._p 

1269 if p.shape[0] == 1: 

1270 return p.reshape(self.size) 

1271 return p 

1272 

1273 @property 

1274 def scale(self): 

1275 scale = self._scale 

1276 if scale.shape[1] == 1: 

1277 return scale[:,0] 

1278 return self._scale 

1279 

1280 @property 

1281 def s_0(self): 

1282 s0 = self._s0 

1283 if s0.shape[0] == 1: 

1284 return s0[0] 

1285 return s0 

1286 

1287 @property 

1288 def num_fields(self): 

1289 """Number of fields fit for""" 

1290 return self._Nfields 

1291 

1292 @property 

1293 def size(self): 

1294 """Number of points in reconstruction""" 

1295 return self._DHT.size