5 from typing
import NamedTuple
7 PlatformTuple
= ('Illumina', 'Salus')
11 'sub' : 'Mouse Brain Sptial',
14 'prefix' : '/share/result/spatial/data/BoAo_sp',
15 'suffixOut': dict.fromkeys(PlatformTuple
,"outs"),
16 'suffixMtx': 'filtered_feature_bc_matrix',
17 'platforms': {PlatformTuple
[0]:'illumina', PlatformTuple
[1]: 'salus'},
18 'pattern': ('prefix', 'platformV', 'sid', 'suffixOutV', 'suffixMtx')
22 'sub' : 'Mouse Kindey Sptial',
25 'prefix' : '/share/result/spatial/data/BoAo_sp',
26 'suffixOut': dict.fromkeys(PlatformTuple
,"outs"),
27 'suffixMtx': 'filtered_feature_bc_matrix',
28 'platforms': {PlatformTuple
[0]:'illumina', PlatformTuple
[1]: 'salus'},
29 'pattern': ('prefix', 'platformV', 'sid', 'suffixOutV', 'suffixMtx')
33 'sub' : 'Human Single Cell',
36 'prefix' : '/share/result/spatial/data/MoZhuo_sc/FX20230913',
37 'suffixOut': {PlatformTuple
[0]: 'out/R22045213-220914-LYY-S11-R03-220914-LYY-S11-R03_combined_outs',
38 PlatformTuple
[1]: 'out_subset/20221124-LYY-S09-R03_AGGCAGAA_fastq_outs'},
39 'suffixMtx': 'filtered_cell_gene_matrix',
40 'platforms': {PlatformTuple
[0]:'illumina', PlatformTuple
[1]: 'sailu'},
41 'pattern': ('prefix', 'platformV', 'suffixOutV', 'suffixMtx')
45 def checkModules() -> None:
46 import importlib
.metadata
47 from packaging
import version
50 got_ver
= importlib
.metadata
.version(pkgname
)
51 if version
.parse(got_ver
) < version
.parse(min_ver
):
52 raise Exception(f
"{pkgname}>={min_ver} is needed, but found {pkgname}=={got_ver}")
54 if __name__
== "__main__":
57 if thisID
not in SamplesDict
:
58 print(f
"[x]sid can only be {SamplesDict.keys()}", file=sys
.stderr
)
62 print(sys
.argv
, file=sys
.stderr
)
67 import matplotlib
; matplotlib
.use("module://mplcairo.base")
68 from matplotlib
import pyplot
as plt
71 plt
.rcParams
['figure.figsize'] = (6.0, 6.0) # set default size of plots
72 plt
.rcParams
['figure.dpi'] = 300
73 plt
.rcParams
['savefig.bbox'] = 'tight'
74 plt
.rcParams
["savefig.transparent"] = True
75 font
= {'family' : 'STIX Two Text',
78 matplotlib
.rc('font', **font
)
82 import fast_matrix_market
85 sc
._settings
.ScanpyConfig
.n_jobs
= -1
92 warnings
.filterwarnings('ignore')
96 class scDatItem(NamedTuple
):
102 def __repr__(self
) -> str:
103 return f
'[sc:{self.name}, Raw_BC*Gene={self.bgRaw[0]}x{self.bgRaw[1]}, NonZero_BC*Gene={self.bgFlt[0]}x{self.bgFlt[1]} ({self.annDat.n_obs}x{self.annDat.n_vars})]'
106 nfoDict
= SamplesDict
[thisID
]
107 print("[i]Start.", file=sys
.stderr
)
108 for platform
in PlatformTuple
:
109 nfoDict
['platformK'] = platform
110 nfoDict
['platformV'] = nfoDict
['platforms'][platform
]
111 nfoDict
['suffixOutV'] = nfoDict
['suffixOut'][platform
]
112 mtxPath
= os
.path
.join( *[nfoDict
[v
] for v
in nfoDict
['pattern']] )
113 print(f
"[i]Reading {mtxPath}", file=sys
.stderr
)
114 adata
=sc
.read_10x_mtx(mtxPath
, var_names
='gene_symbols', make_unique
=True, gex_only
=True)
115 adata
.var_names_make_unique() # this is necessary if using `var_names='gene_symbols'` in `sc.read_10x_mtx`
117 adata
.var
['mt'] = adata
.var_names
.str.startswith('MT-') | adata
.var_names
.str.startswith('mt-')
118 sc
.pp
.calculate_qc_metrics(adata
, qc_vars
=['mt'], percent_top
=None, log1p
=True, inplace
=True)
120 sc
.pp
.filter_cells(adata
, min_genes
=1)
121 sc
.pp
.filter_genes(adata
, min_cells
=1)
122 nnFlt
= (adata
.n_obs
,adata
.n_vars
)
124 #sc.pp.neighbors(adata)
125 #sc.tl.umap(adata,random_state=369)
126 #sc.tl.draw_graph(adata)
127 scDat
.append(scDatItem(platform
,nnRaw
,nnFlt
,adata
))
128 adata
.write_h5ad(f
"{nfoDict['sid']}_{platform}.h5ad",compression
='lzf')
130 print("\n".join(map(str,scDat
)))
132 with pd
.option_context("mode.copy_on_write", True):
133 obsmbi
= scDat
[0].annDat
.obs
[['n_genes_by_counts', 'total_counts']].copy(deep
=False)
134 obsmbs
= scDat
[1].annDat
.obs
[['n_genes_by_counts', 'total_counts']].copy(deep
=False)
135 p1df
= pd
.concat([obsmbi
.assign(Platform
=scDat
[0].name
), obsmbs
.assign(Platform
=scDat
[1].name
)], ignore_index
=True).replace([np
.inf
, -np
.inf
, 0], np
.nan
).dropna()
136 p2df
= obsmbi
.join(obsmbs
,lsuffix
='_'+scDat
[0].name
,rsuffix
='_'+scDat
[1].name
,how
='inner').replace([np
.inf
, -np
.inf
, 0], np
.nan
).dropna()
137 p3tuple
= (frozenset(scDat
[0].annDat
.var_names
), frozenset(scDat
[1].annDat
.var_names
))
139 print("[i]Begin fig A. 1D", file=sys
.stderr
)
140 custom_params
= {"axes.spines.right": False, "axes.spines.top": False}
141 sns
.set_theme(style
="ticks", rc
=custom_params
, font
="STIX Two Text")
142 figA
=sns
.JointGrid(data
=p1df
, x
="total_counts", y
="n_genes_by_counts", hue
='Platform', dropna
=True)
143 #figA.plot(sns.scatterplot, sns.histplot, alpha=.7, edgecolor=".2", linewidth=.5)
144 figA
.plot_joint(sns
.scatterplot
, s
=12.7, alpha
=.6)
145 figA
.plot_marginals(sns
.histplot
, kde
=True, alpha
=.618)
146 figA
.figure
.suptitle(f
"Gene to UMI plot - {nfoDict['sub']}")
147 figA
.set_axis_labels(xlabel
='UMIs per Barcode', ylabel
='Genes per Barcode')
148 figA
.savefig(f
"1D_{nfoDict['sid']}.pdf", transparent
=True, dpi
=300, metadata
={'Title': 'Gene to UMI plot', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
150 print("[i]Begin fig B. 1E", file=sys
.stderr
)
151 figB
=sns
.JointGrid(data
=p2df
, x
="total_counts_Illumina", y
="total_counts_Salus", dropna
=True)
152 figB
.plot_joint(sns
.scatterplot
, s
=12.7, alpha
=.6)
153 figB
.plot_marginals(sns
.histplot
, kde
=True, alpha
=.618)
154 figB
.figure
.suptitle(f
"UMI per Barcode Counts Comparing - {nfoDict['sub']}")
155 figB
.set_axis_labels(xlabel
='UMI Counts from Illumina', ylabel
='UMI Counts from Salus')
156 figB
.savefig(f
"1E_{nfoDict['sid']}.pdf", transparent
=True, dpi
=300, metadata
={'Title': 'UMI per Barcode Counts Comparing', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
158 print("[i]Begin fig . 1F", file=sys
.stderr
)
159 from matplotlib_venn
import venn2
160 plt
.figure(figsize
=(4,4))
161 plt
.title(f
"Genes Venn diagram - {nfoDict['sub']}")
162 p3intersection
= p3tuple
[0] & p3tuple
[1]
163 p3veen
= (p3tuple
[0]-p3intersection
, p3tuple
[1]-p3intersection
, p3intersection
)
164 GenesA
= scDat
[0].annDat
.var
.loc
[p3veen
[0]-p3veen
[2]]
165 GenesB
= scDat
[1].annDat
.var
.loc
[p3veen
[1]-p3veen
[2]]
166 GenesC
= scDat
[0].annDat
.var
.loc
[p3veen
[2]]
167 p3vd
=venn2(subsets
=tuple(map(len,p3veen
)), set_labels
=(scDat
[0].name
, scDat
[1].name
))
168 plt
.savefig(f
"1G_Genes_{nfoDict['sid']}.pdf", transparent
=True, dpi
=300, metadata
={'Title': 'Veen of Genes', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
169 GenesA
.to_csv(f
"1G_Genes_{nfoDict['sid']}_{scDat[0].name}_only.csv",encoding
='utf-8')
170 GenesB
.to_csv(f
"1G_Genes_{nfoDict['sid']}_{scDat[1].name}_only.csv",encoding
='utf-8')
171 GenesC
.to_csv(f
"1G_Genes_{nfoDict['sid']}_intersection.csv.zst",encoding
='utf-8',compression
={'method': 'zstd', 'level': 9, 'write_checksum': True})
173 print("[i]Begin fig C. 2A", file=sys
.stderr
)
174 # https://www.kaggle.com/code/lizabogdan/top-correlated-genes?scriptVersionId=109838203&cellId=21
175 p4xdf
= scDat
[0].annDat
.to_df()
176 p4ydf
= scDat
[1].annDat
.to_df()
177 p4corraw
= p4xdf
.corrwith(p4ydf
,axis
=1)
178 p4corr
= p4corraw
.dropna()
179 plt
.figure(figsize
=(6,4))
180 plt
.title(f
"Pearson correlation - {nfoDict['sub']}")
181 figC
=sns
.histplot(p4corr
,stat
='count',binwidth
=0.01)
182 plt
.savefig(f
"2A_Correlation_{nfoDict['sid']}.pdf", transparent
=True, dpi
=300, metadata
={'Title': 'Pearson correlation', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
184 print("[i]Begin fig D. 2B", file=sys
.stderr
)
185 var_names
= scDat
[0].annDat
.var_names
.intersection(scDat
[1].annDat
.var_names
)
186 xadata
= scDat
[0].annDat
[:, var_names
]
187 yadata
= scDat
[1].annDat
[:, var_names
]
188 xdf
=getOBSMdf(xadata
)
189 ydf
=getOBSMdf(yadata
)
190 #p4df = xdf.assign(Platform=scDat[0].name).join(ydf.assign(Platform=scDat[1].name),lsuffix='_'+scDat[0].name,rsuffix='_'+scDat[1].name,how='inner')
191 p4df
= pd
.concat([xdf
.assign(Platform
=scDat
[0].name
), ydf
.assign(Platform
=scDat
[1].name
)], ignore_index
=True).replace([np
.inf
, -np
.inf
, 0], np
.nan
).dropna()
192 figD
=sns
.JointGrid(data
=p4df
, x
="P1", y
="P2", hue
='Platform', dropna
=True)
193 figD
.plot_joint(sns
.scatterplot
, s
=12.7, alpha
=.6)
194 figD
.plot_marginals(sns
.histplot
, kde
=True, alpha
=.618)
195 figD
.figure
.suptitle(f
"PCA - {nfoDict['sub']}")
196 figD
.set_axis_labels(xlabel
='PC1', ylabel
='PC2')
197 figD
.savefig(f
"2B_PCA_{nfoDict['sid']}.pdf", transparent
=True, dpi
=300, metadata
={'Title': 'PCA', 'Subject': f
"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
199 print("[i]Begin fig E. 2C", file=sys.stderr)
200 xdf=getOBSMdf(xadata,'X_umap')
201 ydf=getOBSMdf(yadata,'X_umap')
202 p5df = pd.concat([xdf.assign(Platform=scDat[0].name), ydf.assign(Platform=scDat[1].name)], ignore_index=True).replace([np.inf, -np.inf, 0], np.nan).dropna()
203 figE=sns.JointGrid(data=p5df, x="P1", y="P2", hue='Platform', dropna=True)
204 figE.plot_joint(sns.scatterplot, s=12.7, alpha=.6)
205 figE.plot_marginals(sns.histplot, kde=True, alpha=.618)
206 figE.figure.suptitle(f"UMAP - {nfoDict['sub']}")
207 figE.set_axis_labels(xlabel='UMAP1', ylabel='UMAP2')
208 figE.savefig(f"2C_UMAP_{nfoDict['sid']}.pdf", transparent=True, dpi=300, metadata={'Title': 'UMAP', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
209 print("[i]Begin fig E. 2Cn", file=sys.stderr)
210 xdf=getOBSMdf(xadata,'X_draw_graph_fa')
211 ydf=getOBSMdf(yadata,'X_draw_graph_fa')
212 p5df = pd.concat([xdf.assign(Platform=scDat[0].name), ydf.assign(Platform=scDat[1].name)], ignore_index=True).replace([np.inf, -np.inf, 0], np.nan).dropna()
213 figE=sns.JointGrid(data=p5df, x="P1", y="P2", hue='Platform', dropna=True)
214 figE.plot_joint(sns.scatterplot, s=12.7, alpha=.6)
215 figE.plot_marginals(sns.histplot, kde=True, alpha=.618)
216 figE.figure.suptitle(f"ForceAtlas2 - {nfoDict['sub']}")
217 figE.set_axis_labels(xlabel='FA1', ylabel='FA2')
218 figE.savefig(f"2C_ForceAtlas2_{nfoDict['sid']}.pdf", transparent=True, dpi=300, metadata={'Title': 'ForceAtlas2', 'Subject': f"{nfoDict['sub']} Data", 'Author': 'HU Xuesong'})
221 def getOBSMdf(anndata
, obsmkey
='X_pca') -> pd
.DataFrame
:
222 if not obsmkey
in anndata
.obsm
:
224 sc
.tl
.pca(anndata
,zero_center
=True)
225 elif obsmkey
=='X_umap':
226 if not 'neighbors' in anndata
.uns
:
227 if not 'X_pca' in anndata
.obsm
:
228 sc
.pp
.pca(anndata
,zero_center
=True)
229 sc
.pp
.neighbors(anndata
)
230 sc
.tl
.umap(anndata
,random_state
=369)
231 elif obsmkey
=='X_draw_graph_fa':
232 if not 'neighbors' in anndata
.uns
:
233 if not 'X_pca' in anndata
.obsm
:
234 sc
.pp
.pca(anndata
,zero_center
=True)
235 sc
.pp
.neighbors(anndata
)
236 sc
.tl
.draw_graph(anndata
,random_state
=369)
237 data
=anndata
.obsm
[obsmkey
][0:,0:2]
238 df
=pd
.DataFrame(data
=data
[0:,0:], index
=[anndata
.obs_names
[i
] for i
in range(data
.shape
[0])], columns
=['P'+str(1+i
) for i
in range(data
.shape
[1])])
241 if __name__
== "__main__":
242 main() # time (./fig1.py human; ./fig1.py mbrain ; ./fig1.py mkidney ) | tee plot.log
245 x1 = np.random.randn(1000)
246 y1 = np.random.randn(1000)
247 x2 = np.random.randn(1000) * 5
248 y2 = np.random.randn(1000)
249 fig, ax = plt.subplots()
250 # The figure and axes background must be made transparent.
251 fig.patch.set(alpha=0)
252 ax.patch.set(alpha=0)
253 pc1 = ax.scatter(x1, y1, c='b', edgecolors='none')
254 pc2 = ax.scatter(x2, y2, c='r', edgecolors='none')
255 mplcairo.operator_t.ADD.patch_artist(pc2) # Use additive blending.
260 3、Q<20和purity<0.6的比率大于18%
262 fastp --thread 4 -z -A --max_len1 28 --max_len2 0 --dont_eval_duplication -q 20 -u 30 -n 4 --average_qual 20 --length_required 28 -y -Y 30 -g -x
264 fastp -w 4 -A -q 20 -u 30 -n 5 -l 28 -y -Y 30 -g -x --max_len1 28 --max_len2 1000 \
265 -i ${prefix}_R1_001.fastq.gz -I ${prefix}_R2_001.fastq.gz \
266 -o ./cleanfq/${basepx}_R1_001.fastq.gz -O ./cleanfq/${basepx}_R2_001.fastq.gz \
267 -j ./cleanfq/${basepx}.json -h ./cleanfq/${basepx}.html 2>./cleanfq/${basepx}.log
269 import patchworklib as pw
270 #from blend_modes import addition
274 * Try layers of annData.
275 * Res: layers share obs and var, thus useless. Even MuData shares obs.
277 ls -1 *.pdf|while read a;do convert -density 1200 $a -resize 25% $a.png;done