modified: Makefile
[GalaxyCodeBases.git] / python / salus / cmplatform / nfig2.py
blobc97812a26168aece3ceafdbd812b9d4e4cae5f9e
1 #!/usr/bin/env python3
3 def checkModules() -> None:
4 import importlib.metadata
5 from packaging import version
6 pkgname = "squidpy"
7 min_ver = "1.2.3"
8 got_ver = importlib.metadata.version(pkgname)
9 if version.parse(got_ver) < version.parse(min_ver):
10 raise Exception(f"{pkgname}>={min_ver} is needed, but found {pkgname}=={got_ver}")
12 if __name__ == "__main__":
13 checkModules()
15 from nfig1 import *
16 #print(SamplesDict)
18 def main(thisID) -> None:
19 import squidpy as sq
20 scDat = {}
21 nfoDict = SamplesDict[thisID]
22 print("[i]Start.", file=sys.stderr)
23 for platform in PlatformTuple:
24 nfoDict['platformK'] = platform
25 nfoDict['platformV'] = nfoDict['platforms'][platform]
26 nfoDict['suffixOutV'] = nfoDict['suffixOut'][platform]
27 if nfoDict['type'] == 'mobivision':
28 h5Path = f"{nfoDict['sid']}_{platform}.raw.h5ad"
29 if os.path.exists(h5Path) and os.access(h5Path, os.R_OK) and os.path.getsize(h5Path) > 0:
30 print(f"[i]Reading {h5Path}", file=sys.stderr)
31 adata = ad.read_h5ad(h5Path)
32 else:
33 mtxPath = os.path.join( *[nfoDict[v] for v in nfoDict['pattern']] )
34 print(f"[i]Reading {mtxPath}", file=sys.stderr)
35 adata=sc.read_10x_mtx(mtxPath, var_names='gene_symbols', make_unique=True, gex_only=True)
36 print(f"[i]Saving {h5Path}", file=sys.stderr)
37 adata.write_h5ad(h5Path,compression='lzf')
38 elif nfoDict['type'] == 'visium':
39 h5Path = f"{nfoDict['sid']}_{platform}.rsp.h5ad"
40 if os.path.exists(h5Path) and os.access(h5Path, os.R_OK) and os.path.getsize(h5Path) > 0:
41 print(f"[i]Reading {h5Path}", file=sys.stderr)
42 adata = ad.read_h5ad(h5Path)
43 else:
44 visiumPath = os.path.join( *[nfoDict[v] for v in nfoDict['pattern'][:-1] ] )
45 print(f"[i]Reading {visiumPath}", file=sys.stderr)
46 adata=sq.read.visium(visiumPath, library_id=platform)
47 adata.var_names_make_unique()
48 print(f"[i]Saving {h5Path}", file=sys.stderr)
49 adata.write_h5ad(h5Path,compression='lzf')
50 else:
51 print(f"[x]Unknow Type {nfoDict['type']}", file=sys.stderr)
52 exit(1)
53 scDat[platform] = adata
54 print(f"[i]Read {thisID}.{platform}.raw: {adata.shape}", file=sys.stderr)
55 metapdf={'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'}
56 rawList=[scDat[v] for v in PlatformTuple]
57 adata=ad.concat(rawList, label='Platform', keys=PlatformTuple, index_unique='-')
58 adata.var['mt'] = adata.var_names.str.startswith('MT-') | adata.var_names.str.startswith('mt-')
59 sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
60 adata.obs['sqrt_inv_total_counts'] = 1 / np.sqrt(adata.obs['total_counts'])
61 p995 = np.percentile(adata.obs['sqrt_inv_total_counts'].values, nfoDict['fltPct'])
62 c995 = 1/(p995*p995)
63 print(f"[i] {nfoDict['sub']} sqrt_inv_total_counts: {p995} ({round(c995,3)})", file=sys.stderr)
64 plt.figure(1)
65 ax=sc.pl.violin(adata,['sqrt_inv_total_counts'],jitter=0.4, stripplot=True,show=False)
66 ax.set_title(f"sqrt_inv_total_counts Violin - {nfoDict['sub']} @ {round(p995,9)}({round(c995,3)})")
67 ax.axhline(y=p995, color='red', linestyle='dotted', label=f'p995={p995}')
68 plt.savefig(f"2C_umiEstd_{nfoDict['sid']}_{round(c995)}.pdf", metadata={**metapdf, 'Title': 'sqrt_inv_total_counts Violin', 'Subject': f"{nfoDict['sub']} Data @ {round(c995)}"})
69 plt.figure(2)
70 ax=sc.pl.violin(adata,['total_counts'],jitter=0.4, stripplot=True,show=False)
71 ax.set_title(f"total_counts Violin - {nfoDict['sub']} @ {round(c995)}")
72 ax.axhline(y=round(c995), color='red', linestyle='dotted', label=f'c995={round(c995)}')
73 plt.savefig(f"2C_umiEcnt_{nfoDict['sid']}_{round(c995)}.pdf", metadata={**metapdf, 'Title': 'total_counts Violin', 'Subject': f"{nfoDict['sub']} Data @ {round(c995)}"})
74 adata.raw = adata.copy()
75 #sc.pp.filter_cells(adata, min_counts=round(c995)) # sqrt_inv_total_counts < p995 按照样品均值的标准差考虑。写作round(c995)。
76 sc.pp.filter_cells(adata, min_counts=1)
77 sc.pp.filter_genes(adata, min_cells=1) # adata.var[adata.var['n_cells']<2].sort_values(by='sqrt_inv_total_counts') 有800个,就不过滤了。
78 print(f"[i]Filtered: {adata.raw.shape} -> {adata.shape}", file=sys.stderr)
79 plt.close('all')
80 sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
81 sc.pp.normalize_total(adata,target_sum=1e6, key_added='CPMnormFactor')
82 adata.layers["norm"] = adata.X.copy()
83 sc.pp.log1p(adata)
84 if nfoDict['type'] == 'mobivision':
85 sc.pp.pca(adata)
86 sc.pp.neighbors(adata)
87 sc.tl.tsne(adata,random_state=369)
88 elif nfoDict['type'] == 'visium':
89 import torch
90 import STAGATE_pyG
91 STAGATE_pyG.Cal_Spatial_Net(adata, rad_cutoff=150)
92 STAGATE_pyG.Stats_Spatial_Net(adata)
93 adata = STAGATE_pyG.train_STAGATE(adata, device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))
94 sc.pp.neighbors(adata, use_rep='STAGATE')
95 sc.tl.tsne(adata, use_rep='STAGATE', random_state=369)
96 sc.pp.pca(adata)
97 data=adata.obsm['STAGATE'][0:,0:2]
98 df=pd.DataFrame(data=data[0:,0:], index=[adata.obs_names[i] for i in range(data.shape[0])], columns=['STA'+str(1+i) for i in range(data.shape[1])])
99 df['Platform'] = adata.obs['Platform']
100 figD=sns.JointGrid(data=df, x="STA1", y="STA2", hue='Platform', dropna=True)
101 figD.plot_joint(sns.scatterplot, s=12.7, alpha=.6)
102 figD.plot_marginals(sns.histplot, kde=True, alpha=.618)
103 figD.figure.suptitle(f"STAGATE - {nfoDict['sub']}")
104 figD.savefig(f"2C_STAGATE_{nfoDict['sid']}.pdf", metadata={**metapdf, 'Title': 'STAGATE'})
105 else:
106 print(f"[x]Unknow Type {nfoDict['type']}", file=sys.stderr)
107 exit(1)
108 sc.tl.umap(adata,random_state=369)
109 sc.tl.draw_graph(adata,random_state=369)
110 sc.tl.leiden(adata,random_state=0,resolution=0.25)
112 fig1, ax1 = plt.subplots()
113 ax1.plot(x, y)
114 ax1.set_title("Axis 1 title")
115 ax1.set_xlabel("X-label for axis 1")
117 print("[i]Begin fig E. 2Ca", file=sys.stderr)
118 plt.figure()
119 ax=sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", annotate_var_explained=True)
120 plt.savefig(f"2C_PCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 'PCA'})
121 plt.figure()
122 ax=sc.pl.umap(adata,color='Platform', show=False, title=f"UMAP - {nfoDict['sub']}")
123 plt.savefig(f"2C_UMAP_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 'UMAP'})
124 plt.figure()
125 ax=sc.pl.tsne(adata, color='Platform', show=False, title=f"t-SNE - {nfoDict['sub']}")
126 plt.savefig(f"2C_tSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 't-SNE'})
127 plt.figure()
128 ax=sc.pl.draw_graph(adata, color='Platform', show=False, title=f"ForceAtlas2 - {nfoDict['sub']}")
129 plt.savefig(f"2C_ForceAtlas2_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 'ForceAtlas2'})
130 plt.close('all')
132 fig, ax = plt.subplots()
133 fig.patch.set(alpha=0)
134 ax.patch.set(alpha=0)
135 palette = ['#CC0000','#00CC00','#CCCC00']
136 sc.pl.pca(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette, annotate_var_explained=True)
137 arts=ax.findobj(matplotlib.collections.PathCollection)
138 for art in arts:
139 mplcairo.operator_t.ADD.patch_artist(art)
140 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
141 legend_elements = [plt.scatter([],[],linewidths=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
142 ax.legend(handles=legend_elements,bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0.2)
143 plt.savefig(f"2C_mPCA_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 'PCA'})
144 plt.close('all')
146 fig, ax = plt.subplots()
147 fig.patch.set(alpha=0)
148 ax.patch.set(alpha=0)
149 palette = ['#CC0000','#00CC00','#CCCC00']
150 sc.pl.tsne(adata, color='Platform', show=False, title=f"PCA - {nfoDict['sub']}", ax=ax, palette=palette)
151 arts=ax.findobj(matplotlib.collections.PathCollection)
152 for art in arts:
153 mplcairo.operator_t.ADD.patch_artist(art)
154 newlabels = adata.obs['Platform'].unique().tolist() + ['Both']
155 legend_elements = [plt.scatter([],[],linewidths=0, marker='o', label=label, color=color) for label, color in zip(newlabels, palette)]
156 ax.legend(handles=legend_elements,bbox_to_anchor=(1.05, 0.5), loc='center left', borderaxespad=0.2)
157 plt.savefig(f"2C_mtSNE_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': 't-SNE'})
158 plt.close('all')
160 print("[i]Begin fig E. 2Cb", file=sys.stderr)
161 fig, axes = plt.subplots(1, 2, figsize=(12, 6))
162 plt.subplots_adjust(wspace=0.1)
163 sc.pl.umap(adata[adata.obs['Platform']=='Illumina'], color='leiden', ax=axes[0], title=f'UMAP - Illumina')
164 sc.pl.umap(adata[adata.obs['Platform']=='Salus'], color='leiden', ax=axes[1], title=f'UMAP - Salus')
165 axes[0].legend().set_visible(False)
166 fig.suptitle(f"Clusters by Leiden - {nfoDict['sub']}")
167 fig.savefig(f"2C_leidenUMAP_{nfoDict['sid']}.pdf", metadata={**metapdf, 'Title': 'Cluster UMAP'})
168 plt.figure(figsize=(6,4))
169 plt.title(f"Cluster Size Histogram - {nfoDict['sub']}")
170 axB = sns.histplot(adata.obs,x='leiden',hue='Platform',multiple="dodge",shrink=.66)
171 axB.set_xlabel('leiden Cluster NO.')
172 axB.set_ylabel('Cluster Size')
173 plt.savefig(f"2C_leidenHist_{nfoDict['sid']}.pdf", metadata={**metapdf, 'Title': 'Cluster Size Histogram'})
174 plt.close('all')
176 print("[i]Begin fig E. 2D", file=sys.stderr)
177 plt.switch_backend('pdf')
178 import pymn
179 adata.obs['cell.cluster'] = adata.obs['leiden'].astype(str)
180 adata.obs['study_id'] = adata.obs['Platform'].astype(str)
181 pymn.variableGenes(adata, study_col='Platform')
182 pymn.MetaNeighborUS(adata, study_col='study_id', ct_col='cell.cluster', fast_version=True)
183 plt.figure(figsize=(16,16))
184 plt.title(f"MetaNeighborUS - {nfoDict['sub']}")
185 cm=pymn.plotMetaNeighborUS(adata, cmap='coolwarm',show=False)
186 cm.savefig(f"2D_MetaNeighborUS_{nfoDict['sid']}.pdf", metadata={**metapdf, 'Title': 'MetaNeighborUS'})
187 pymn.topHits(adata, threshold=0)
188 mndf = adata.uns['MetaNeighborUS_topHits']
189 mndf['ClusterID'] = mndf['Study_ID|Celltype_1'].str.split('|').str[1].astype(int)
190 pndf=mndf[mndf['Match_type']=='Reciprocal_top_hit']
191 plt.figure(figsize=(6,4))
192 plt.title(f"Mean_AUROC Between Platforms - {nfoDict['sub']}")
193 axC = sns.barplot(pndf,x='ClusterID',y='Mean_AUROC')
194 axC.set_xlabel('leiden Cluster NO.')
195 plt.savefig(f"2D_AUROC_{nfoDict['sid']}.pdf", metadata={**metapdf, 'Title': 'AUROC'})
196 plt.close('all')
197 if nfoDict['type'] == 'visium':
198 for platform in PlatformTuple:
199 plt.figure()
200 ax=sc.pl.embedding(adata[adata.obs['Platform'] == platform], s=135, basis="spatial", color=["leiden"], show=False, title=f'{platform}')
201 plt.axis('off')
202 plt.savefig(f"2C_spLeiden_{platform}_{nfoDict['sid']}.pdf", bbox_extra_artists=(ax.get_legend(),), metadata={**metapdf, 'Title': f'spLeiden - {platform}'})
203 plt.close('all')
205 if __name__ == "__main__":
206 if len(sys.argv) > 1:
207 thisID = sys.argv[1]
208 if thisID not in SamplesDict:
209 print(f"[x]sid can only be {SamplesDict.keys()}", file=sys.stderr)
210 exit(1)
211 print(sys.argv, file=sys.stderr)
212 print(f"[i]{thisID}")
213 sys.stdout.flush()
214 main(thisID)
217 ./nfig1.py human; ./nfig2.py human ; ./nfig1.py mbrain; ./nfig2.py mbrain ; ./nfig1.py mkidney; ./nfig2.py mkidney
218 time (./nfig1.py human; ./nfig1.py mbrain ; ./nfig1.py mkidney ) | tee nplot.log
219 time (./nfig2.py human; ./nfig2.py mbrain ; ./nfig2.py mkidney )
221 pip install git+https://github.com/gillislab/pyMN#egg=pymetaneighbor
222 pip install git+https://github.com/scverse/squidpy@main # or 'squidpy>=1.2.3'
223 pip install git+https://github.com/QIFEIDKN/STAGATE_pyG
224 pip install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
226 pip install git+https://github.com/makepath/xarray-spatial
227 # salus
228 micromamba install -c pyg pyg::pyg[version='>2=*_cpu']
229 #pip3 install --upgrade --upgrade-strategy only-if-needed torch_geometric
230 pip3 install torch_scatter torch_sparse -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
231 pip3 install --upgrade --upgrade-strategy only-if-needed ~/conda/wheels/STAGATE_pyG-1.0.0-py3-none-any.whl ~/conda/wheels/pyMetaNeighbor-0.1.0-py3-none-any.whl
232 pip3 install --upgrade --upgrade-strategy only-if-needed ~/conda/wheels/xarray_spatial-0.3.8.dev1+g27ab0c8-py3-none-any.whl ~/conda/wheels/squidpy-1.3.2.dev9+g4fdb801-py3-none-any.whl