Merge pull request #5134 from solgenomics/topic/fix_seedlot_search
[sgn.git] / cgi-bin / about / family_analysis.pl
blobf9dc201a57fce04e44d83938428be8a01de435c8
1 use strict;
2 use CXGN::Page;
3 use CXGN::Page::FormattingHelpers qw/ page_title_html
4 blue_section_html /;
6 my ($sum_content, $process_content, $term_content, $ref_content);
8 ##########################################
9 #Define the summary
10 $sum_content = "<tr><td><p>SGN gene family analysis groups proteins based on their sequence similarity. It incoporates the Arabidopsis proteome and peptides predicted from SGN unigenes (currently from Lycopersicon combined, Solanum tuberosum, Solanum melongena, Capsicum annuum and Petunia hybrida) and coffee unigenes.</p></td></tr>";
12 ##########################################
13 #Define the process
14 $process_content = "<tr><th valign=\"top\">1. </th><td>SGN and coffee unigenes are subjected to ESTScan, an HMM-based program to predict coding regions and the corresponding peptide from EST sequences[1].</td></tr>";
16 $process_content .= "<tr><th valign=\"top\">2. </th><td>Predicted SGN and coffee peptides are combined with Arabidopsis predicted proteins. A self blastp is performed in the combined protein data set[2].</td></tr>";
18 $process_content .= "<tr><th valign=\"top\">3. </th><td>TRIBE-MCL program is applied to the blastp result for clustering protein sequences into families[3]. This program first translates blastp result into a similarity matrix. Based on the matrix , the program then groups the proteins using Marcov cluster (MCL) algorithm.</td></tr>";
21 #########################################
22 #Define the terms
23 $term_content = "<tr><th valign=\"top\">Data Set</th><td>A combination of Arabidopsis predicted proteins and predicted peptide from current SGN and coffee unigene builds. If any of the above data set member is updated, for example, a new unigene build of Solanum tuberosum is built, a new data set is then generated and family analysis is performed in the data set.</td></tr>";
25 $term_content .= "<tr><th valign=\"top\">i Value</th><td>Clustering of proteins by TRIBE-MCL is carried out by alteration of two operators called expansion and inflation. While inflation groups genes into clusters, expansion dissipates clusters. I value controlls the strigency of inflation. The higher the i value, the more strigent for inflation operator to group genes together.</td></tr>";
27 $term_content .= "<tr><th valign=\"top\">Family Build</th><td>A family build is uniquely defined by the Data Set and strigency (i Value). For each SGN Data Set, we do TRIBE-MCL analysis with 3 i Values: 1.2, 2 and 5 and obtain 3 Family Builds.</td></tr>";
29 $term_content .= "<tr><th valign=\"top\">Gene Family of a Species</th><td>A gene family with at least 1 member gene from the species.</td></tr>";
31 $term_content .= "<tr><th valign=\"top\">Unique Gene Family of a Species</th><td> A gene family whose member genes are from a species exclusively.</td></tr>";
34 #################################################
35 #Define references
36 $ref_content = "<tr><th valign=\"top\">[1] </th><td>Iseli C. et al (1999), ESTScan: a Program for Detectingm Evaluating and Reconstructing Potential Coding Regions in EST Sequences, American Association of Artificial Intellegence.</td></tr>";
38 $ref_content .= "<tr><th valign=\"top\">[2] </th><td>Altschul S.F. et al (1997), Gapped BLAST and PSI-BLAST: a New Generation of Protein Database Search Programs. NUcleic Acids Research 25, 3389-3402.</td></tr>";
40 $ref_content .= "<tr><th valign=\"top\">[3] </th><td>Enright A.J. et al (2002), An Efficient Algorithm for Large-Scale Detection of Protein Families. NUcleic Acids Research 30, 1575-1584.</td></tr>";
45 ################################################
46 #Generate the page
47 our $page = CXGN::Page->new( "Gene Family Help Page", "Chenwei Lin");
48 $page->header();
49 print page_title_html("SGN Gene Family Analysis");
52 print blue_section_html('Summary','<table width="100%" summary="" cellpadding="3" cellspacing="0" border="0">'.$sum_content.'</table>');
53 print blue_section_html('Procedure','<table width="100%" summary="" cellpadding="3" cellspacing="0" border="0">'.$process_content.'</table>');
54 print blue_section_html('Terms','<table width="100%" summary="" cellpadding="3" cellspacing="0" border="0">'.$term_content.'</table>');
55 print blue_section_html('References','<table width="100%" summary="" cellpadding="3" cellspacing="0" border="0">'.$ref_content.'</table>');
58 $page->footer();