-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProcessCrystals.py
1383 lines (1098 loc) · 58.7 KB
/
ProcessCrystals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# --------------------------------------------- #
# Imports #
# --------------------------------------------- #
from collections import OrderedDict
import cPickle as pickle
import numpy as np
import itertools, warnings, os
import pathos.pools as pp
import scipy.optimize
import cctbx_utils, utils
from cctbx.array_family import flex
from cctbx import miller, crystal
from iotbx import mtz
# --------------------------------------------- #
# Classes #
# --------------------------------------------- #
class PeakFitting:
"""
Class for fitting Bragg peak profiles, with different options to do so based on local
intensity and phase values.
"""
def __init__(self, tomogram, hkl_info, length):
"""
Initialize class by extracting intensity and phase subvolumes.
Inputs:
-------
tomogram: real space volume
hkl_info: dict containing Miller and peak center information
length: cubic length of subvolume to consider around each potential Bragg peak
"""
self.ivols, self.pvols = self.extract_subvolumes(tomogram, hkl_info, length)
self.length = length
def extract_subvolumes(self, tomogram, hkl_info, length):
"""
Extract cubic subvolumes around each predicted peak center given by xyz coordinates
in hkl_info. Return separate dictionaries containing subvolumes for intensities and
phases (in degrees) as values and corresponding Miller indices as keys.
Inputs:
-------
tomogram: real space volume
hkl_info: dict containing Miller and peak center information
length: cubic length of subvolume to consider around each potential Bragg peak
Outputs:
--------
ivols: OrderedDict, keys as Millers and values as subvolumes of the FT intensities
pvols: OrderedDict, keys as Millers and values as subvolumes of the FT phases
"""
# FFT volume to compute intensities and phases
fftI, fftp = utils.ft_to_I_phase(utils.compute_ft(tomogram))
ivols, pvols = OrderedDict(), OrderedDict()
# if hkl_info is a regular rather than DIALS-style pickle file
if type(hkl_info) == dict:
self.xyz_calc = hkl_info['xyz']
hkl = list(map(tuple, hkl_info['hkl']))
# otherwise, use DIALS functions to retreive hkl and xyz information
else:
hkl = indexed.select(indexed['miller_index']!=(0,0,0))['miller_index']
self.xyz_calc = np.array(indexed.select(indexed['miller_index']!=(0,0,0))['xyzobs.px.value'])
xyz = np.around(self.xyz_calc).astype(int)
x0, x1, y0, y1, z0, z1 = xyz[:,0]-int(length/2), xyz[:,0]+int(length/2), xyz[:,1]-int(length/2), \
xyz[:,1]+int(length/2), xyz[:,2]-int(length/2), xyz[:,2]+int(length/2)
# extract cubic subvolumes for phases and intensities
for i in range(len(hkl)):
ivols[hkl[i]] = fftI[z0[i]:z1[i]+1, y0[i]:y1[i]+1, x0[i]:x1[i]+1]
pvols[hkl[i]] = fftp[z0[i]:z1[i]+1, y0[i]:y1[i]+1, x0[i]:x1[i]+1] * 180/np.pi
self.retained_hkl = ivols.keys()
return ivols, pvols
def _find_contiguous(self, mapping, start, contig_list):
"""
Identify pixels contiguous with pixel mapped to start index. This function works
recursively; on the first iteration, contig_list should be an empty numpy array.
"""
if len(contig_list)==0:
contig_list = np.array([start])
to_check = mapping[start]
for idx in to_check:
if idx not in contig_list:
contig_list = np.concatenate((np.array([idx]), contig_list))
contig_list = self._find_contiguous(mapping, idx, contig_list)
return contig_list
def _adjacency_filter(self, mask, coincident_mask = None):
"""
Remove marked (value of 1) pixels that don't face-border at least one other unmasked
pixel. If there are multiple isolated sets of contiguous pixels, retain the set that
is closest to the center of the subvolume or the coincident mask, if given.
"""
from scipy.spatial.distance import squareform, pdist
import scipy.ndimage
# compute pairwise distances between all identified pixels
mask_idx = np.array(np.where(mask!=0), dtype='float').T
euc_dist = squareform(pdist(mask_idx, 'euclidean'))
# map each marked pixels to its adjacent marked pixels
mapping = dict()
adj_pix = [np.where(euc_dist[i]==1)[0] for i in range(euc_dist.shape[0])]
for i in range(len(adj_pix)):
mapping[i] = adj_pix[i]
# find set of contiguous pixels that is nearest to center of subvolume or coincident mask, if given
if coincident_mask is None:
cen_array = np.expand_dims(np.array(mask.shape)/2, 0)
else:
cen_array = np.expand_dims(np.array(scipy.ndimage.measurements.center_of_mass(coincident_mask)), 0)
d_cen = scipy.spatial.distance.cdist(mask_idx, cen_array, 'euclidean')
cen = np.where(d_cen==d_cen.min())[0][0]
contig_list = np.empty(0)
contiguous_set = self._find_contiguous(mapping, cen, contig_list)
# remove any marked pixels that do not belong to the identified contiguous set
for i in range(mask_idx.shape[0]):
if i not in contiguous_set:
xm,ym,zm = mask_idx[i].astype(int)
mask[xm,ym,zm] = 0
return mask
def compute_imasks(self, sigma, k_size=None, coincident_masks=None):
"""
Generate peak masks, where 1 corresponds to pixels predicted to be spanned by a
reflection. Pixels are predicted to belong to a peak if their intensity exceeds:
I(px) > median_filter(ivol)_px + mean(ivol) + sigma * std_dev(ivol)
if a kernel size (k_size) for a median filter is given, or, if k_size is None:
I(px) > mean(ivol) + sigma * std_dev(ivol)
and they are adjacent to minimally one other pixel spanned by the peak.
Inputs:
-------
sigma: factor determining threshold above which pixels are included
k_size: kernel size if using a median filter, optional
coincident_masks: dict with keys as Millers, values as subvolume masks
Outputs:
--------
imasks: dict with keys as Millers, values as subvolume masks (0=reject, 1=retain)
"""
import scipy.signal
# generate a spherical mask to exclude pixels outside of radius 1 less than half cubic length
radius = int(self.length / 2)
r_threshold = radius - 1
mg = np.mgrid[-radius:radius:(2*radius+1)*1j,
-radius:radius:(2*radius+1)*1j,
-radius:radius:(2*radius+1)*1j]
r_dist = np.square(mg[0].flatten()) + np.square(mg[1].flatten()) + np.square(mg[2].flatten())
r_dist = np.sqrt(r_dist).reshape(radius*2+1,radius*2+1,radius*2+1)
r_mask = np.zeros_like(r_dist)
r_mask[r_dist < r_threshold] = 1
# generate a mask for each Miller index in self.ivols
imasks = OrderedDict()
counter = 0
if coincident_masks is None:
self.xyz_obs = OrderedDict()
for key in self.ivols.keys():
ivol = self.ivols[key].copy()
mask = np.zeros(ivol.shape)
# pixels that exceed I_mean + sigma * I_std are included in peak
if k_size is None:
i_threshold = np.mean(ivol) + sigma*np.std(ivol)
mask[ivol>i_threshold] = 1
# if k_size is given, base threshold on some intensity over median-filtered volume
else:
medfilt = scipy.signal.medfilt(ivol, kernel_size=k_size)
mask[np.where(ivol - medfilt > np.mean(ivol) + sigma*np.std(ivol))] = 1
# eliminate pixels outside spherical mask and check for adjacency
mask[r_mask==0] = 0
if np.sum(mask) > 0:
if coincident_masks is None:
mask = self._adjacency_filter(mask, coincident_mask = None)
# compute peak centroid: intensity-weighted peak position if centroid not already defined
xc = np.sum(ivol[np.where(mask==1)] * np.where(mask==1)[0]) / np.sum(ivol[np.where(mask==1)])
yc = np.sum(ivol[np.where(mask==1)] * np.where(mask==1)[1]) / np.sum(ivol[np.where(mask==1)])
zc = np.sum(ivol[np.where(mask==1)] * np.where(mask==1)[2]) / np.sum(ivol[np.where(mask==1)])
self.xyz_obs[key] = self.xyz_calc[counter] - radius + np.array([zc, yc, xc])
else:
if np.sum(coincident_masks[key]) > 0:
mask = self._adjacency_filter(mask, coincident_mask = coincident_masks[key])
imasks[key] = mask
counter += 1
return imasks
def compute_pmasks(self, imasks, p_threshold):
"""
Generate a submask for each hkl, where 1 corresponds to the set of pixels whose
retention yields an intensity-weighted standard deviation less than p_threshold.
Inputs:
-------
masks: dict with keys as Millers and values as masks indicating valid pixels
p_threshold: threshold for standard deviation of phases belonging to peak
Outputs:
--------
masks: dict with keys as Millers and values as masks indicating valid pixels
based on p_threshold criterion
"""
pmasks = OrderedDict()
# loop over all Miller indices
for key in imasks.keys():
mask = np.zeros((self.length, self.length, self.length))
if np.sum(imasks[key]!=0):
p_vals = self.pvols[key][np.where(imasks[key]!=0)]
i_vals = self.ivols[key][np.where(imasks[key]!=0)]
# iteratively identify pixels to remove until p_threshold is reached
valid_idx = np.ones_like(p_vals).astype(int)
p_temp, i_temp = p_vals.copy(), i_vals.copy()
for i in range(len(p_vals)):
while utils.std_phases(p_temp, i_temp) > p_threshold:
delta = np.abs(utils.wraptopi(p_temp - utils.average_phases(p_temp,weights=i_temp)))
max_idx = np.where(delta==delta.max())[0][0]
valid_idx[p_vals==p_temp[max_idx]] = 0
p_temp = np.delete(p_temp, max_idx)
i_temp = np.delete(i_temp, max_idx)
# compute mask in shape of subvolume, with pixels to retain marked by a value of 1
peak_idx = np.where(imasks[key]!=0)
retained_idx = np.array(peak_idx).T[valid_idx==1]
for i in range(retained_idx.shape[0]):
xi,yi,zi = retained_idx[i]
mask[xi,yi,zi] = 1
pmasks[key] = mask
return pmasks
def estimate_phases(self, masks, mode='weighted_mean'):
"""
Estimate the phase of each Bragg peak as 1) the weighted mean of peak pixels
(mode="weighted_mean"), 2) the mean of peak pixels (mode="mean"), or 3) the
value of the pixel with the maximum intensity (mode="maxI"). Peak pixels are
indicated by a value of 1 in masks.
Inputs:
-------
masks: dict with keys as Millers and values as masks indicating valid pixels
Outputs:
--------
p_est: dict with keys as Millers and values as estimated phase
p_std: dict with keys as Millers and values as phase standard deviation
"""
p_est, p_std = OrderedDict(), OrderedDict()
# loop over all Miller indices
for key in self.retained_hkl:
# phase cannot be determined if no valid peak pixels
if np.sum(masks[key]) == 0:
print "Phase could not be estimated for Miller %s" %(key,)
else:
p_vals = self.pvols[key][np.where(masks[key]!=0)]
i_vals = self.ivols[key][np.where(masks[key]!=0)]
if mode == "weighted_mean":
p_est[key] = utils.average_phases(p_vals, weights=i_vals)
p_std[key] = utils.std_phases(p_vals, weights=i_vals)
elif mode == "mean":
p_est[key] = utils.average_phases(p_vals)
p_std[key] = utils.std_phases(p_vals)
elif mode == "maxI":
p_est[key] = p_vals[i_vals==i_vals.max()]
p_std[key] = utils.std_phases(p_vals, weights=i_vals)
else:
print "Error: mode must be weighted_mean, mean or maxI"
return p_est, p_std
def estimate_background(self, masks):
"""
Estimate the background intensity by performing linear interpolation to predict the
intensity values of pixels marked by 1 in input masks. The purpose of estimating the
underlying background intensity at each pixel rather than simply summing nearby pixels
is due to the effect of the missing wedge, which yields nonuniform background patterns.
Inputs:
-------
masks: dict with keys as Millers and values as mask subvolumes
Outputs:
--------
bgd_ests: dict with keys as Millers and values as background subvolumes, optional
"""
import scipy.interpolate
bgd_ests = OrderedDict()
for key in masks.keys():
bgd = self.ivols[key].copy().flatten()
if np.sum(masks[key]) != 0:
# using np.ravel methods to locate masked (Bragg) pixels
mg_1d = np.arange(masks[key].shape[0])
ravel_shape = (len(mg_1d), len(mg_1d), len(mg_1d))
xyz = np.array(list(itertools.product(mg_1d, mg_1d, mg_1d)), dtype='int')
xyz_rav = np.ravel_multi_index(xyz.T, ravel_shape)
mask_idx = np.ravel_multi_index(np.where(masks[key]==1), ravel_shape)
for idx in mask_idx:
xyz_rav[idx] = -1
xi = np.array(np.where(masks[key]==1)).T
points = np.array(np.unravel_index(xyz_rav[xyz_rav!=-1], ravel_shape)).T
values = self.ivols[key][masks[key]==0].copy()
# estimate masked (Bragg) pixels by interpolation
est_xi = scipy.interpolate.griddata(points, values, xi, method='linear')
est = masks[key].flatten()
bgd[est==1] = est_xi # fill gaps in original volume with estimated values
bgd_ests[key] = bgd.reshape(masks[key].shape)
return bgd_ests
def estimate_intensities(self, masks, bgd_ests = None):
"""
Estimate the intensity for each Bragg peak as the sum of retained pixels, indicated by a
value of 1 in masks. If bgd_ests is given, subtract the estimated background before
computing the sum.
Inputs:
-------
masks: dict with keys as Millers and values as mask subvolumes
bgd_ests: dict with keys as Millers and values as background subvolumes, optional
Outputs:
--------
i_est: dict with keys as Millers and values as integrated peak intensity
i_std: dict with keys as Millers and values as standard deviation of peak intensity
"""
i_est, i_std = OrderedDict(), OrderedDict()
for key in self.retained_hkl:
if np.sum(masks[key])==0:
print "Intensity could not be estimated for Miller %s" %(key,)
else:
if bgd_ests is None:
i_vals = self.ivols[key][np.where(masks[key]!=0)]
else:
ivol_mbgd = self.ivols[key].copy() - bgd_ests[key]
i_vals = ivol_mbgd[np.where(masks[key]!=0)]
i_est[key], i_std[key] = np.sum(i_vals), np.std(i_vals)
return i_est, i_std
def npixels_threshold(self, masks, n_threshold):
"""
Compute the number of pixels belonging to each reflection, and update the list of retained
Millers (self.retained_hkl) and peak centers (self.xyz_obs) to only keep reflections with
at least n_threshold pixels.
Inputs:
-------
masks: dict with keys as Millers and values as mask subvolumes
n_threshold: threshold number of pixels for retaining peak
Outputs:
--------
n_pixels: array of number of retained pixels per peak
"""
n_pixels = np.array([np.sum(masks[miller]) for miller in masks.keys()])
counter = 0
for i,miller in enumerate(masks):
if (n_pixels[i] < n_threshold) and (miller in self.retained_hkl):
self.retained_hkl.remove(miller)
counter += 1
if miller in self.xyz_obs.keys():
self.xyz_obs.pop(miller)
print "%i Millers removed based on npixel threshold" %counter
return n_pixels
def stdratio_threshold(self, masks, sr_threshold):
"""
Retain any hkl associated with an std_ratio above input sr_threshold, where std_ratio is
the ratio of the standard deviations of peak to non-peak pixel intensities for the slices
along the x direction that contain at least 1 marked pixel. Miller list self.retained_hkl
and self.xyz_obs are updated accordingly, and an np.array of std_ratio values is returned.
Inputs:
-------
masks: dict with keys as Millers and values as mask subvolumes
Outputs:
--------
std_ratio: array of std_ratio value per peak, as explained above
"""
std_ratios = np.zeros(len(masks))
counter = 0
for i,miller in enumerate(masks):
if np.sum(masks[miller]) > 0:
xslices = np.unique(np.where(masks[miller]!=0)[0])
num = np.concatenate([self.ivols[miller][xs][masks[miller][xs]==1] for xs in xslices]).ravel()
den = np.concatenate([self.ivols[miller][xs][masks[miller][xs]==0] for xs in xslices]).ravel()
std_ratios[i] = np.std(num) / np.std(den)
if (std_ratios[i] < sr_threshold) and (miller in self.retained_hkl):
self.retained_hkl.remove(miller)
counter += 1
if miller in self.xyz_obs.keys():
self.xyz_obs.pop(miller)
print "%i Millers removed based on std dev ratio threshold" %counter
return std_ratios
class FindOrigin:
"""
Class to assist with locating the crystallographic phase origin.
"""
def __init__(self, sg_symbol, cell, cs, hklp, hklI):
"""
Initialize class, minimally with a dictionary of phases and unit cell dimensions.
Inputs:
-------
sg_symbol: space group symbol in Hermann-Mauguin notation
cell: tuple of unit cell constants, (a,b,c)
cs: CCTBX symmetry object
hklp: dictionary whose keys are Millers and values are phases in degrees
hklI: dictionary whose keys are Millers and values are intensities
"""
ref_path = os.path.join(os.path.abspath("."), "reference")
sym_ops = pickle.load(open(os.path.join(ref_path,"sym_ops.pickle")))[sg_symbol]
self.sym_ops = utils.sym_ops_friedels(sym_ops)
self.caxis_val, self.cslice_val = pickle.load(open(os.path.join(ref_path,
"phase_restrictions.pickle")))[sg_symbol]
self.cell = cell
self.cs = cs
self._set_up(hklp, hklI)
def _centralhkl_indices(self):
"""
Locate indices of central axis and central slice reflections.
Outputs:
--------
caxis_idx: np.array of indices of cenral axis reflections
cslice_idx: np.array of indices of central slice reflections
"""
caxis_idx, cslice_idx = list(), list()
for i,miller in enumerate(self.hkl):
if miller.count(0)==2:
caxis_idx.append(i) # central axis
if miller.count(0)==1:
cslice_idx.append(i) # central slice
return np.array(caxis_idx), np.array(cslice_idx)
def _symhkl_indices(self):
"""
Return indices of each primary reflection (base_idx); any of its symmetry-equivalents,
including identity (sym_idx); and the key of the translational operation that relates
them., with the ordering according to sym_ops dictionary. Friedel mates are treated as
symmetry-equivalents.
Outputs:
--------
base_idx: np.array of indices of "parent" reflections
sym_idx: np.array of indices of "children" reflections (symmetry-related to parent)
op_idx: np.array of the sym_ops operation that relates base_idx and sym_idx
"""
self.R_ops = np.array([self.sym_ops[i][:,:-1] for i in range(len(self.sym_ops.keys()))])
self.T_ops = np.array([self.sym_ops[i][:,-1] for i in range(len(self.sym_ops.keys()))])
miller_seen = list()
base_idx, sym_idx, op_idx = list(), list(), list()
# loop over all reflections and check for symmetry-related reflections
for i,miller in enumerate(self.hkl):
miller_sym = np.inner(np.array(miller), self.R_ops).astype(int)
for op,ms in enumerate(miller_sym):
if (tuple(ms) in self.hkl) and (tuple(ms) not in miller_seen):
op_idx.append(op)
base_idx.append(i)
sym_idx.append(self.hkl.index(tuple(ms)))
miller_seen.append(tuple(ms))
return np.array(base_idx), np.array(sym_idx), np.array(op_idx)
def _set_up(self, hklp, hklI):
"""
Set up symmetry information and indexing information useful for pulling out relevant
reflections. If intensities are supplied, use as weights.
Inputs:
-------
hklp: dictionary whose keys are Millers and values are phases in degrees
hklI: dictionary whose keys are Millers and values are intensities, or None
"""
# extract reflection information, store as Millers, phases, intensities
assert hklI.keys() == hklp.keys()
self.hkl = hklp.keys()
self.phases = np.squeeze(np.array(hklp.values()))
self.I = np.squeeze(np.array(hklI.values()))
# indices for restricted phase reflections (central axis/slice)
self.caxis_idx, self.cslice_idx = self._centralhkl_indices()
# indices for symmetry-related reflections
self.base_idx, self.sym_idx, self.op_idx = self._symhkl_indices()
return
def sym_residual(self, phases, weighted=True):
"""
Compute the average residual of symmetry-equivalent reflections, assuming that given
phases are in degrees and their ordering matches that of self.hkl. Expected phase is:
p_sym = p_base + 360 * dot(hkl_sym, T_sym)
where p_base corresponds to the phase of the principal reflection, and hkl_sym and T_sym
respectively are the symmetry-related reflection and its translational operator.
Inputs:
-------
phases: array of phases, with ordering matching self.hkl, in degrees
weighted: boolean, if True weight phase residual by corresponding intensity
Outputs:
--------
r_avg: mean residual of symmetry-equivalent phases to their expected values
"""
# if no symmetry related reflections are present, return 0
if len(self.base_idx) == 0 and len(self.sym_idx) == 0:
return 0.0
p_base, p_obs = phases[self.base_idx], phases[self.sym_idx]
hkl_sym = np.array(self.hkl)[self.sym_idx]
T = self.T_ops[self.op_idx]
# map all reflections to ASU, including -1 adjustment for Friedels
shifts = np.sum(hkl_sym[:,:,np.newaxis] * T[:,:,np.newaxis], axis=1).flatten()
p_calc = utils.wraptopi(p_obs - 360 * shifts)
p_calc[np.where(np.array(self.op_idx) > max(self.sym_ops.keys())/2)] *= -1
# compute expected mean phase value for the parent reflection from all observations
unq_idx = np.unique(np.array(self.base_idx))
p_avg = np.zeros_like(p_base)
for unq in unq_idx:
p_avg[np.array(self.base_idx)==unq] = utils.average_phases(p_calc[np.array(self.base_idx)==unq])
r_vals = np.abs(utils.wraptopi(p_calc - p_avg))
if weighted is True:
return np.average(r_vals, weights=self.I[self.sym_idx])
else:
return np.average(r_vals)
def central_residual(self, phases, weighted=True):
"""
Compute the average residual of central slice / axis reflections to the nearest integer
multiple of their expected values.
Inputs:
-------
phases: array of phases, with ordering matching self.hkl, in degrees
weighted: boolean, if True weight phase residual by corresponding intensity
Outputs:
--------
r_avg: mean residual of central axis / slice phases to their expected values
or 0, if no centric reflections are in dataset
"""
# if not centric reflections are present, return 0
if len(self.caxis_idx) == 0 and len(self.cslice_idx) == 0:
return 0.0
# otherwise, compute mean residual of centric reflections
e_vals = [self.caxis_val, self.cslice_val]
idx_lists = [self.caxis_idx, self.cslice_idx]
r_vals, w_vals = list(), list()
for e_val, idx_list in zip(e_vals, idx_lists):
if len(idx_list) > 0:
p_sel = phases[idx_list]
p_residuals = np.abs(p_sel % float(e_val))
n_residuals = np.abs(p_sel % float(-1 * e_val))
stacked = np.vstack((p_residuals, n_residuals)).T
r_vals.extend(list(np.around(np.abs(np.amin(stacked, axis=1)), 2)))
r_vals = np.array(r_vals)
if weighted is True:
return np.average(r_vals, weights=self.I[np.concatenate(idx_lists).astype(int)])
else:
return np.average(r_vals)
def map_skewness(self, phases):
"""
Compute the skewness of the real space map computed from self.hkl and input phases.
Inputs:
------
phases: array of phases, with order matching self.hkl, in degrees
Outputs:
--------
s_factor: skewness of electron density map as a float
"""
import scipy.stats
# compute Miller array, convert to ccp4_map, convert to numpy array
ma = cctbx_utils.convert_to_sf(self.hkl, self.I, np.deg2rad(phases), self.cs)
fmap = cctbx_utils.compute_map(ma, save_name = None, grid_step = 0.5)
fmap_as_npy = fmap.real_map_unpadded().as_numpy_array()
return scipy.stats.skew(fmap_as_npy.flatten())
def shift_phases(self, fshifts):
"""
Compute new phases after shifting the unit cell origin by fractional shifts.
Inputs:
-------
fshifts: fractional shifts along a,b,c by which to translate unit cell origin
Outputs:
--------
p_shifted: dictionary with keys as Millers, and values as origin-centered phases
"""
return utils.wraptopi(self.phases - 360.0 * np.dot(np.array(self.hkl), fshifts))
def _eval_shifts(self, fshifts):
"""
Compute residuals and map skewness for phases translated by input shifts.
Inputs:
-------
fshifts: fractional shifts along the crystallographic a,b,c axes
Outputs:
--------
eval: tuple of (central hkl residual, sym-equiv residual, map skewness)
"""
p_shifted = self.shift_phases(fshifts)
r_cen = self.central_residual(p_shifted)
r_sym = self.sym_residual(p_shifted)
skew = self.map_skewness(p_shifted)
return (r_cen, r_sym, skew)
def scan_candidates(self, grid_spacing, n_processes, w_rsym=1.0, w_rcen=1.0, w_skew=1.0):
"""
Perform a grid scan to assess each fractional position as a candidate phase origin.
Inputs:
-------
grid_spacing: grid spacing in Angstrom for phase origin search
n_processes: number of processors for multiprocessing
w_rsym: relative weight for symmetry residual, default is equal weights
w_rcen: relative weight for central reflection residual, default is equal weights
w_skew: relative weight for map skewness metric, default is equal weights
Outputs:
--------
metrics: dictionary of metrics for sym and central residuals and map skewness
fshifts: fractional shifts by which to translate unit cell origin
"""
# set up grid of fractional shifts to scan as candidate origins
xshifts, yshifts, zshifts = [np.arange(0, self.cell[i], grid_spacing) for i in range(3)]
fshifts_list = list(itertools.product(xshifts/self.cell[0],
yshifts/self.cell[1],
zshifts/self.cell[2]))
num = len(fshifts_list)
print "Finding origin: %i grid point to evaluate" %num
# evaluate shifts using multiprocessing
pool = pp.ProcessPool(n_processes)
metrics = pool.map(self._eval_shifts, fshifts_list)
metrics = np.array(metrics)
# rearrange results into separate dictionary keys
dmetrics = dict()
for i,m in zip(range(3), ['rcen', 'rsym', 'skew']):
dmetrics[m] = metrics[:,i].reshape(len(xshifts), len(yshifts), len(zshifts))
# computing combined metric, taking inverse of skew factor, weighting and normalizing
for key in dmetrics.keys():
if key == 'skew':
dmetrics[key] = -1.0 * dmetrics[key]
if np.all(dmetrics[key]==0):
print "Could not evaluate %s due to lack of relevant reflections" %key
else:
dmetrics[key] = (dmetrics[key]-dmetrics[key].min())/(dmetrics[key].max()-dmetrics[key].min())
dmetrics['combined'] = w_rsym*dmetrics['rsym'] + w_rcen*dmetrics['rcen'] + w_skew*dmetrics['skew']
# identify top candidate phase origin
sorted_origins = np.array(np.unravel_index(np.argsort(dmetrics['combined'].flatten()),
dmetrics['combined'].shape)).T
xs, ys, zs = sorted_origins[0]
fshifts = (xshifts[xs]/self.cell[0], yshifts[ys]/self.cell[1], zshifts[zs]/self.cell[2])
return dmetrics, fshifts
class MergeCrystals:
"""
Class for merging data from different crystals by shifting the nth added crystal to
the same phase origin (not necessarily coincident with a crystallographic origin) as
the first dataset added and scaling intensities.
"""
def __init__(self, space_group, grid_spacing):
"""
Initialize class with empty OrderedDicts to store intensities and phases, and
a dictionary to store unit cell constants. Input grid_spacing is in Angstroms
and dictates the sampling rate for the phase shift search.
"""
self.hklI, self.hklp = OrderedDict(), OrderedDict()
self.cell_constants = dict()
self.space_group = space_group
self.grid_spacing = grid_spacing
self.fshifts = dict()
def merge_values(self, weighted=True):
"""
Compute centroid intensity and phase values from self.hklI and self.hklp. For
intensities, this is the average; for phases, the intensity-weighted average.
Inputs:
-------
weighted: whether to intensity-weight phases, boolean
Outputs:
--------
hklI_merge: dict whose keys are Millers and values are averaged intensities
hklp_merge: dict whose keys are Millers and values are I-weighted avg phases
"""
assert self.hklI.keys() == self.hklp.keys()
# average intensities, maintaining original Miller ordering
hklI_merge = OrderedDict((key,np.average(val)) for key,val in self.hklI.items())
# compute (optionally intensity-weighted) phase averages
hklp_merge = OrderedDict()
for miller in self.hklp.keys():
x,y = 0,0
for (angle,weight) in zip(self.hklp[miller], self.hklI[miller]):
if weighted is True:
x += np.cos(np.deg2rad(angle)) * weight / np.sum(self.hklI[miller])
y += np.sin(np.deg2rad(angle)) * weight / np.sum(self.hklI[miller])
else:
x += np.cos(np.deg2rad(angle))
y += np.sin(np.deg2rad(angle))
hklp_merge[miller] = np.rad2deg(np.arctan2(y,x))
return hklI_merge, hklp_merge
def _compute_presiduals(self, hkl, p_ref, p_tar, weights, fshifts):
"""
Compute the average residual between target phases (p_tar) shifted by fshifts and
reference phases (p_ref).
Inputs:
-------
hkl: array of Miller indices
p_ref: array of reference phases in degrees
p_tar: array of target phases in degrees
weights: array of weights, either uniform or mean intensity for that Miller
fshifts: fractional shifts along (a,b,c) by which to translate phases
Outputs:
--------
p_residual: weighted average residual between shifted and reference phases
"""
p_shifted = utils.wraptopi(p_tar - 360 * np.dot(hkl, fshifts).ravel())
diff = p_shifted - p_ref
diff[diff>180] -= 360
diff[diff<-180] += 360
return np.average(np.abs(diff), weights=weights)
def _wrap_presidual(self, args):
return self._compute_presiduals(*args)
def _shift_origin(self, hklp_ref, hklp, hklI_ref, hklI, n_processes, weighted):
"""
Shift phases of added crystal to an origin consistent with the reference phases.
The best origin is considered the one that minimizes the mean phase residual
between the added/target and reference phases, and is identified based on a grid
search of frequency grid_spacing along the a,b,c crystallographic axes.
Inputs:
-------
hklp_ref: dict whose keys are Millers and values are reference phases
hklp: dict whose keys are Millers and values are phases of crystal to add
hklI_ref: dict whose keys are Millers and values are reference intensities
hklI: dict whose keys are Millers and values are intensities of crystal to add
n_processes: number of processors over which to parallelize task
weighted: whether to intensity-weight the phase residuals, boolean
Outputs:
--------
hklp_shifted: dict whose keys are Millers and values are shifted phases
"""
# find shared Millers and extract shared information
hklp_shared = utils.shared_dict(hklp_ref, hklp)
hkl = np.array(hklp_shared.keys())
p_ref, p_tar = np.array(hklp_shared.values())[:,0], np.array(hklp_shared.values())[:,1]
n_crystal = max(self.cell_constants.keys())
cell = self.cell_constants[n_crystal]
# process intensities for weights, or supply uniform weights if weighted is False
if weighted is True:
hklI_shared = utils.shared_dict(hklI_ref, hklI)
weights = np.mean(np.array(hklI_shared.values()), axis=1)
else:
weights = np.ones_like(p_ref)
# locate best origin based on a grid search
xshifts, yshifts, zshifts = [np.arange(0, cell[i], self.grid_spacing) for i in range(3)]
fshifts_list = list(itertools.product(xshifts/cell[0],
yshifts/cell[1],
zshifts/cell[2]))
num = len(fshifts_list)
print "Merging crystals: %i shared reflections, %i grid points" %(len(hkl),num)
# use multiprocessing to compute residuals for each shift in fshifts_list
pool = pp.ProcessPool(n_processes)
args_eval = zip([hkl]*num, [p_ref]*num, [p_tar]*num, [weights]*num, fshifts_list)
m_grid = pool.map(self._wrap_presidual, args_eval)
m_grid = np.array(m_grid).reshape((len(xshifts), len(yshifts), len(zshifts)))
# shift all phases in added crystal to best origin
xs,ys,zs = np.where(m_grid==m_grid.min())
if len(xs)>1:
print "Warning: multiple origins (n=%i) scored equally well; one will be chosen at random" %(len(xs))
rand_idx = np.random.randint(0, high=len(xs))
xs, ys, zs = np.array([xs[rand_idx]]), np.array([ys[rand_idx]]), np.array([zs[rand_idx]])
fshifts = np.array([xshifts[xs]/cell[0],
yshifts[ys]/cell[1],
zshifts[zs]/cell[2]])
self.fshifts[n_crystal] = np.squeeze(fshifts)
hkl, p_vals = np.array(hklp.keys()), np.squeeze(np.array(hklp.values()))
p_shifted = utils.wraptopi(p_vals - 360 * np.dot(hkl, fshifts).ravel())
hklp_shifted = OrderedDict((key,val) for key,val in zip(hklp.keys(), p_shifted))
print "Minimum mean phase resiudal is %.2f" %(m_grid.min())
print "Best fractional shifts are: (%.2f, %.2f, %.2f)" %(fshifts[0], fshifts[1], fshifts[2])
return hklp_shifted
def _sigma_scale(self, I, qmags, m, b, sigma):
"""
Perform Wilson factor scaling of input intensities with additional base offset
and multiplicative factor.
Inputs:
-------
I: np.array of intensities to scale
qmags: np.array of |q_vectors| (inverse resolution), ordered like I
m: multiplicative factor, float
b: additive factor, float
sigma: Wilson scaling parameter, related to B factor
Outputs:
--------
I_scaled: scaled intensities
"""
return m * np.exp(-1*np.square(qmags * sigma)) * I + b
def _optimize_scalefactors(self, I_ref, I_tar, qmags):
"""
Least-squares optimization of parameters to scale I_tar to I_ref using the model:
I_ref = m * exp(-(q*sigma)^2) * I_tar + b
Inputs:
-------
I_ref: np.array of reference intensities
I_tar: np.array of intensties to scale to reference
qmags: np.array of |q_vectors| (inverse resolution), ordered like I_ref and I_tar
Outputs:
--------
m_f, b_f, sigma_f: best-fit scaling parameters
"""
import scipy.optimize
def error(args):
m,b,sigma = args
I_scaled = self._sigma_scale(I_tar, qmags, m, b, sigma)
residuals = np.log10(I_scaled[I_scaled>0]) - np.log10(I_ref[I_scaled>0])
return residuals
x0 = (np.mean(I_tar)/np.mean(I_ref), 0.3, 1.0)
res = scipy.optimize.leastsq(error, x0)
m_f, b_f, sigma_f = res[0]
I_scaled = self._sigma_scale(I_tar, qmags, m_f, b_f, sigma_f)
residual_s = np.sum(np.abs(np.log10(I_ref[I_scaled>0]) - np.log10(I_tar[I_scaled>0])))
residual_e = np.sum(np.abs(np.log10(I_ref[I_scaled>0]) - np.log10(I_scaled[I_scaled>0])))
print "Fitted parameters m, b, sigma are: (%.2f,%.2f,%.2f)" %(m_f, b_f, sigma_f)
print "Scaling reduced sum of log residuals by %.2f" %(residual_s - residual_e)
return m_f, b_f, sigma_f
def _scale_intensities(self, hklI_ref, hklI):
"""
Scale intensities of added crystal to be consistent with reference intensities.
Inputs:
-------
hklI_ref: dict whose keys are Millers and values are reference intensities
hklI: dict whose keys are Millers and values of are intensities of added crystal
Outputs:
--------
hklI_scaled: dict whose keys are Millers and values are scaled phases
"""
# find shared Millers and extract shared information
hklI_shared = utils.shared_dict(hklI_ref, hklI)
hkl = np.array(hklI_shared.keys())
I_ref, I_tar = np.array(hklI_shared.values())[:,0], np.array(hklI_shared.values())[:,1]
# least-squares optimization to scale I_tar to I_ref
n_crystal = max(self.cell_constants.keys())
qmags_sel = 1.0 / utils.compute_resolution(self.space_group,
self.cell_constants[n_crystal],
hkl)
m_f, b_f, sigma_f = self._optimize_scalefactors(I_ref.copy(), I_tar.copy(), qmags_sel)
# scale all intensities and reconstruct into a dictionary
qmags = 1.0 / utils.compute_resolution(self.space_group,
self.cell_constants[n_crystal],
np.array(hklI.keys()))
I_scaled = self._sigma_scale(np.array(hklI.values()), qmags, m_f, b_f, sigma_f)