3 def checkModules() -> None:
4 import importlib
.metadata
5 from packaging
import version
8 got_ver
= importlib
.metadata
.version(pkgname
)
9 if version
.parse(got_ver
) < version
.parse(min_ver
):
10 raise Exception(f
"{pkgname}>={min_ver} is needed, but found {pkgname}=={got_ver}")
12 if __name__
== "__main__":
18 def main(thisID
) -> None:
21 nfoDict
= SamplesDict
[thisID
]
22 print("[i]Start.", file=sys
.stderr
)
23 for platform
in PlatformTuple
:
24 nfoDict
['platformK'] = platform
25 nfoDict
['platformV'] = nfoDict
['platforms'][platform
]
26 nfoDict
['suffixOutV'] = nfoDict
['suffixOut'][platform
]
27 if nfoDict
['type'] == 'mobivision':
28 h5Path
= f
"{nfoDict['sid']}_{platform}.raw.h5ad"
29 if os
.path
.exists(h5Path
) and os
.access(h5Path
, os
.R_OK
) and os
.path
.getsize(h5Path
) > 0:
30 print(f
"[i]Reading {h5Path}", file=sys
.stderr
)
31 adata
= ad
.read_h5ad(h5Path
)
33 mtxPath
= os
.path
.join( *[nfoDict
[v
] for v
in nfoDict
['pattern']] )
34 print(f
"[i]Reading {mtxPath}", file=sys
.stderr
)
35 adata
=sc
.read_10x_mtx(mtxPath
, var_names
='gene_symbols', make_unique
=True, gex_only
=True)
36 print(f
"[i]Saving {h5Path}", file=sys
.stderr
)
37 adata
.write_h5ad(h5Path
,compression
='lzf')
38 elif nfoDict
['type'] == 'visium':
39 h5Path
= f
"{nfoDict['sid']}_{platform}.rsp.h5ad"
40 if os
.path
.exists(h5Path
) and os
.access(h5Path
, os
.R_OK
) and os
.path
.getsize(h5Path
) > 0:
41 print(f
"[i]Reading {h5Path}", file=sys
.stderr
)
42 adata
= ad
.read_h5ad(h5Path
)
44 visiumPath
= os
.path
.join( *[nfoDict
[v
] for v
in nfoDict
['pattern'][:-1] ] )
45 print(f
"[i]Reading {visiumPath}", file=sys
.stderr
)
46 adata
=sq
.read
.visium(visiumPath
, library_id
=platform
)
47 adata
.var_names_make_unique()
48 print(f
"[i]Saving {h5Path}", file=sys
.stderr
)
49 adata
.write_h5ad(h5Path
,compression
='lzf')
51 print(f
"[x]Unknow Type {nfoDict['type']}", file=sys
.stderr
)
53 scDat
[platform
] = adata
54 print(f
"[i]Read {thisID}.{platform}.raw: {adata.shape}", file=sys
.stderr
)
55 metapdf
={'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'}
56 rawList
=[scDat
[v
] for v
in PlatformTuple
]
57 adata
=ad
.concat(rawList
, label
='Platform', keys
=PlatformTuple
, index_unique
='-')
58 adata
.var
['mt'] = adata
.var_names
.str.startswith('MT-') | adata
.var_names
.str.startswith('mt-')
59 sc
.pp
.calculate_qc_metrics(adata
, qc_vars
=['mt'], percent_top
=None, log1p
=True, inplace
=True)
60 adata
.obs
['sqrt_inv_total_counts'] = 1 / np
.sqrt(adata
.obs
['total_counts'])
61 p995
= np
.percentile(adata
.obs
['sqrt_inv_total_counts'].values
, nfoDict
['fltPct'])
63 print(f
"[i] {nfoDict['sub']} sqrt_inv_total_counts: {p995} ({round(c995,3)})", file=sys
.stderr
)
65 ax
=sc
.pl
.violin(adata
,['sqrt_inv_total_counts'],jitter
=0.4, stripplot
=True,show
=False)
66 ax
.set_title(f
"sqrt_inv_total_counts Violin - {nfoDict['sub']} @ {round(p995,9)}({round(c995,3)})")
67 ax
.axhline(y
=p995
, color
='red', linestyle
='dotted', label
=f
'p995={p995}')
68 plt
.savefig(f
"2C_umiEstd_{nfoDict['sid']}_{round(c995)}.pdf", metadata
={**metapdf
, 'Title': 'sqrt_inv_total_counts Violin', 'Subject': f
"{nfoDict['sub']} Data @ {round(c995)}"})
70 ax
=sc
.pl
.violin(adata
,['total_counts'],jitter
=0.4, stripplot
=True,show
=False)
71 ax
.set_title(f
"total_counts Violin - {nfoDict['sub']} @ {round(c995)}")
72 ax
.axhline(y
=round(c995
), color
='red', linestyle
='dotted', label
=f
'c995={round(c995)}')
73 plt
.savefig(f
"2C_umiEcnt_{nfoDict['sid']}_{round(c995)}.pdf", metadata
={**metapdf
, 'Title': 'total_counts Violin', 'Subject': f
"{nfoDict['sub']} Data @ {round(c995)}"})
74 adata
.raw
= adata
.copy()
75 #sc.pp.filter_cells(adata, min_counts=round(c995)) # sqrt_inv_total_counts < p995 按照样品均值的标准差考虑。写作round(c995)。
76 sc
.pp
.filter_cells(adata
, min_counts
=1)
77 sc
.pp
.filter_genes(adata
, min_cells
=1) # adata.var[adata.var['n_cells']<2].sort_values(by='sqrt_inv_total_counts') 有800个,就不过滤了。
78 print(f
"[i]Filtered: {adata.raw.shape} -> {adata.shape}", file=sys
.stderr
)
80 sc
.pp
.highly_variable_genes(adata
, flavor
="seurat_v3", n_top_genes
=3000)
81 sc
.pp
.normalize_total(adata
,target_sum
=1e6
, key_added
='CPMnormFactor')
82 adata
.layers
["norm"] = adata
.X
.copy()
84 if nfoDict
['type'] == 'mobivision':
86 sc
.pp
.neighbors(adata
)
87 sc
.tl
.tsne(adata
,random_state
=369)
88 elif nfoDict
['type'] == 'visium':
91 STAGATE_pyG
.Cal_Spatial_Net(adata
, rad_cutoff
=150)
92 STAGATE_pyG
.Stats_Spatial_Net(adata
)
93 adata
= STAGATE_pyG
.train_STAGATE(adata
, device
= torch
.device('cuda:0' if torch
.cuda
.is_available() else 'cpu'))
94 sc
.pp
.neighbors(adata
, use_rep
='STAGATE')
95 sc
.tl
.tsne(adata
, use_rep
='STAGATE', random_state
=369)
97 data
=adata
.obsm
['STAGATE'][0:,0:2]
98 df
=pd
.DataFrame(data
=data
[0:,0:], index
=[adata
.obs_names
[i
] for i
in range(data
.shape
[0])], columns
=['STA'+str(1+i
) for i
in range(data
.shape
[1])])
99 df
['Platform'] = adata
.obs
['Platform']
100 figD
=sns
.JointGrid(data
=df
, x
="STA1", y
="STA2", hue
='Platform', dropna
=True)
101 figD
.plot_joint(sns
.scatterplot
, s
=12.7, alpha
=.6)
102 figD
.plot_marginals(sns
.histplot
, kde
=True, alpha
=.618)
103 figD
.figure
.suptitle(f
"STAGATE - {nfoDict['sub']}")
104 figD
.savefig(f
"2C_STAGATE_{nfoDict['sid']}.pdf", metadata
={**metapdf
, 'Title': 'STAGATE'})
106 print(f
"[x]Unknow Type {nfoDict['type']}", file=sys
.stderr
)
108 sc
.tl
.umap(adata
,random_state
=369)
109 sc
.tl
.draw_graph(adata
,random_state
=369)
110 sc
.tl
.leiden(adata
,random_state
=0,resolution
=0.25)
112 fig1, ax1 = plt.subplots()
114 ax1.set_title("Axis 1 title")
115 ax1.set_xlabel("X-label for axis 1")
117 print("[i]Begin fig E. 2Ca", file=sys
.stderr
)
119 ax
=sc
.pl
.pca(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", annotate_var_explained
=True)
120 plt
.savefig(f
"2C_PCA_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 'PCA'})
122 ax
=sc
.pl
.umap(adata
,color
='Platform', show
=False, title
=f
"UMAP - {nfoDict['sub']}")
123 plt
.savefig(f
"2C_UMAP_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 'UMAP'})
125 ax
=sc
.pl
.tsne(adata
, color
='Platform', show
=False, title
=f
"t-SNE - {nfoDict['sub']}")
126 plt
.savefig(f
"2C_tSNE_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 't-SNE'})
128 ax
=sc
.pl
.draw_graph(adata
, color
='Platform', show
=False, title
=f
"ForceAtlas2 - {nfoDict['sub']}")
129 plt
.savefig(f
"2C_ForceAtlas2_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 'ForceAtlas2'})
132 fig
, ax
= plt
.subplots()
133 fig
.patch
.set(alpha
=0)
134 ax
.patch
.set(alpha
=0)
135 palette
= ['#CC0000','#00CC00','#CCCC00']
136 sc
.pl
.pca(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", ax
=ax
, palette
=palette
, annotate_var_explained
=True)
137 arts
=ax
.findobj(matplotlib
.collections
.PathCollection
)
139 mplcairo
.operator_t
.ADD
.patch_artist(art
)
140 newlabels
= adata
.obs
['Platform'].unique().tolist() + ['Both']
141 legend_elements
= [plt
.scatter([],[],linewidths
=0, marker
='o', label
=label
, color
=color
) for label
, color
in zip(newlabels
, palette
)]
142 ax
.legend(handles
=legend_elements
,bbox_to_anchor
=(1.05, 0.5), loc
='center left', borderaxespad
=0.2)
143 plt
.savefig(f
"2C_mPCA_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 'PCA'})
146 fig
, ax
= plt
.subplots()
147 fig
.patch
.set(alpha
=0)
148 ax
.patch
.set(alpha
=0)
149 palette
= ['#CC0000','#00CC00','#CCCC00']
150 sc
.pl
.tsne(adata
, color
='Platform', show
=False, title
=f
"PCA - {nfoDict['sub']}", ax
=ax
, palette
=palette
)
151 arts
=ax
.findobj(matplotlib
.collections
.PathCollection
)
153 mplcairo
.operator_t
.ADD
.patch_artist(art
)
154 newlabels
= adata
.obs
['Platform'].unique().tolist() + ['Both']
155 legend_elements
= [plt
.scatter([],[],linewidths
=0, marker
='o', label
=label
, color
=color
) for label
, color
in zip(newlabels
, palette
)]
156 ax
.legend(handles
=legend_elements
,bbox_to_anchor
=(1.05, 0.5), loc
='center left', borderaxespad
=0.2)
157 plt
.savefig(f
"2C_mtSNE_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': 't-SNE'})
160 print("[i]Begin fig E. 2Cb", file=sys
.stderr
)
161 fig
, axes
= plt
.subplots(1, 2, figsize
=(12, 6))
162 plt
.subplots_adjust(wspace
=0.1)
163 sc
.pl
.umap(adata
[adata
.obs
['Platform']=='Illumina'], color
='leiden', ax
=axes
[0], title
=f
'UMAP - Illumina')
164 sc
.pl
.umap(adata
[adata
.obs
['Platform']=='Salus'], color
='leiden', ax
=axes
[1], title
=f
'UMAP - Salus')
165 axes
[0].legend().set_visible(False)
166 fig
.suptitle(f
"Clusters by Leiden - {nfoDict['sub']}")
167 fig
.savefig(f
"2C_leidenUMAP_{nfoDict['sid']}.pdf", metadata
={**metapdf
, 'Title': 'Cluster UMAP'})
168 plt
.figure(figsize
=(6,4))
169 plt
.title(f
"Cluster Size Histogram - {nfoDict['sub']}")
170 axB
= sns
.histplot(adata
.obs
,x
='leiden',hue
='Platform',multiple
="dodge",shrink
=.66)
171 axB
.set_xlabel('leiden Cluster NO.')
172 axB
.set_ylabel('Cluster Size')
173 plt
.savefig(f
"2C_leidenHist_{nfoDict['sid']}.pdf", metadata
={**metapdf
, 'Title': 'Cluster Size Histogram'})
176 print("[i]Begin fig E. 2D", file=sys
.stderr
)
177 plt
.switch_backend('pdf')
179 adata
.obs
['cell.cluster'] = adata
.obs
['leiden'].astype(str)
180 adata
.obs
['study_id'] = adata
.obs
['Platform'].astype(str)
181 pymn
.variableGenes(adata
, study_col
='Platform')
182 pymn
.MetaNeighborUS(adata
, study_col
='study_id', ct_col
='cell.cluster', fast_version
=True)
183 plt
.figure(figsize
=(16,16))
184 plt
.title(f
"MetaNeighborUS - {nfoDict['sub']}")
185 cm
=pymn
.plotMetaNeighborUS(adata
, cmap
='coolwarm',show
=False)
186 cm
.savefig(f
"2D_MetaNeighborUS_{nfoDict['sid']}.pdf", metadata
={**metapdf
, 'Title': 'MetaNeighborUS'})
187 pymn
.topHits(adata
, threshold
=0)
188 mndf
= adata
.uns
['MetaNeighborUS_topHits']
189 mndf
['ClusterID'] = mndf
['Study_ID|Celltype_1'].str.split('|').str[1].astype(int)
190 pndf
=mndf
[mndf
['Match_type']=='Reciprocal_top_hit']
191 plt
.figure(figsize
=(6,4))
192 plt
.title(f
"Mean_AUROC Between Platforms - {nfoDict['sub']}")
193 axC
= sns
.barplot(pndf
,x
='ClusterID',y
='Mean_AUROC')
194 axC
.set_xlabel('leiden Cluster NO.')
195 plt
.savefig(f
"2D_AUROC_{nfoDict['sid']}.pdf", metadata
={**metapdf
, 'Title': 'AUROC'})
197 if nfoDict
['type'] == 'visium':
198 for platform
in PlatformTuple
:
200 ax
=sc
.pl
.embedding(adata
[adata
.obs
['Platform'] == platform
], s
=135, basis
="spatial", color
=["leiden"], show
=False, title
=f
'{platform}')
202 plt
.savefig(f
"2C_spLeiden_{platform}_{nfoDict['sid']}.pdf", bbox_extra_artists
=(ax
.get_legend(),), metadata
={**metapdf
, 'Title': f
'spLeiden - {platform}'})
205 if __name__
== "__main__":
206 if len(sys
.argv
) > 1:
208 if thisID
not in SamplesDict
:
209 print(f
"[x]sid can only be {SamplesDict.keys()}", file=sys
.stderr
)
211 print(sys
.argv
, file=sys
.stderr
)
212 print(f
"[i]{thisID}")
217 ./nfig1.py human; ./nfig2.py human ; ./nfig1.py mbrain; ./nfig2.py mbrain ; ./nfig1.py mkidney; ./nfig2.py mkidney
218 time (./nfig1.py human; ./nfig1.py mbrain ; ./nfig1.py mkidney ) | tee nplot.log
219 time (./nfig2.py human; ./nfig2.py mbrain ; ./nfig2.py mkidney )
221 pip install git+https://github.com/gillislab/pyMN#egg=pymetaneighbor
222 pip install git+https://github.com/scverse/squidpy@main # or 'squidpy>=1.2.3'
223 pip install git+https://github.com/QIFEIDKN/STAGATE_pyG
224 pip install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
226 pip install git+https://github.com/makepath/xarray-spatial
228 micromamba install -c pyg pyg::pyg[version='>2=*_cpu']
229 #pip3 install --upgrade --upgrade-strategy only-if-needed torch_geometric
230 pip3 install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
231 pip3 install --upgrade --upgrade-strategy only-if-needed ~/conda/wheels/STAGATE_pyG-1.0.0-py3-none-any.whl ~/conda/wheels/pyMetaNeighbor-0.1.0-py3-none-any.whl
232 pip3 install --upgrade --upgrade-strategy only-if-needed ~/conda/wheels/xarray_spatial-0.3.8.dev1+g27ab0c8-py3-none-any.whl ~/conda/wheels/squidpy-1.3.2.dev9+g4fdb801-py3-none-any.whl