From dba2e368333f0796e08bf617bd5a0b18c755e3cc Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Tue, 6 Jun 2023 20:04:52 +0000 Subject: [PATCH 1/5] consilidate 7 apps into 4 --- .../methods/annotate_contigset/display.yaml | 268 ------------ .../annotate_contigset/img/rast-red.png | Bin 6071 -> 0 bytes .../methods/annotate_contigset/spec.json | 400 ----------------- .../methods/annotate_contigsets/display.yaml | 320 -------------- .../annotate_contigsets/img/rast-red.png | Bin 6071 -> 0 bytes .../methods/annotate_contigsets/spec.json | 412 ------------------ .../display.yaml | 20 +- .../spec.json | 95 ---- .../reannotate_microbial_genome/display.yaml | 294 ------------- .../img/rast-red.png | Bin 6071 -> 0 bytes .../reannotate_microbial_genome/spec.json | 323 -------------- .../reannotate_microbial_genomes/display.yaml | 270 ------------ .../img/rast-red.png | Bin 6071 -> 0 bytes .../reannotate_microbial_genomes/spec.json | 328 -------------- 14 files changed, 2 insertions(+), 2728 deletions(-) delete mode 100644 ui/narrative/methods/annotate_contigset/display.yaml delete mode 100644 ui/narrative/methods/annotate_contigset/img/rast-red.png delete mode 100644 ui/narrative/methods/annotate_contigset/spec.json delete mode 100644 ui/narrative/methods/annotate_contigsets/display.yaml delete mode 100644 ui/narrative/methods/annotate_contigsets/img/rast-red.png delete mode 100644 ui/narrative/methods/annotate_contigsets/spec.json delete mode 100644 ui/narrative/methods/reannotate_microbial_genome/display.yaml delete mode 100644 ui/narrative/methods/reannotate_microbial_genome/img/rast-red.png delete mode 100644 ui/narrative/methods/reannotate_microbial_genome/spec.json delete mode 100644 ui/narrative/methods/reannotate_microbial_genomes/display.yaml delete mode 100644 ui/narrative/methods/reannotate_microbial_genomes/img/rast-red.png delete mode 100644 ui/narrative/methods/reannotate_microbial_genomes/spec.json diff --git a/ui/narrative/methods/annotate_contigset/display.yaml b/ui/narrative/methods/annotate_contigset/display.yaml deleted file mode 100644 index e235e5b..0000000 --- a/ui/narrative/methods/annotate_contigset/display.yaml +++ /dev/null @@ -1,268 +0,0 @@ -# -# Define basic display information -# -#name : Annotate Microbial Assembly with RASTtk - v1.073 (*Deprecated by 'Annotate Genome/Assembly with RASTtk - v1.073' in 6 months) - -name : Annotate Microbial Assembly with RASTtk - v1.073 - -tooltip : | - Annotate a bacterial or archaeal assembly using RASTtk (Rapid Annotations using Subsystems Technology toolkit). -screenshots : - [] - -icon: rast-red.png - -# -# Define the set of other narrative methods that should be suggested to the user. -# -suggestions : - apps: - related : - [] - next : - [build_fba_model, build_species_tree] - methods: - related : - [build_metabolic_model] - next : - [annotate_domains_in_genome_generic] - - -# -# Configure the display and description of the parameters -# -parameters : - input_contigset : - ui-name : | - Assembly - short-hint : | - An Assembly or ContigSet object to annotate. - scientific_name : - ui-name : | - Scientific Name - short-hint : | - The scientific name to assign to the genome as found at https://www.ncbi.nlm.nih.gov/taxonomy. - domain : - ui-name : | - Domain - short-hint : | - The domain of life of the organism being annotated. Bacteria and Archaea are acceptable values. - genetic_code : - ui-name : | - Genetic Code - short-hint : | - The genetic code used in translating to protein sequences. See https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for more information on genetic codes. - call_features_rRNA_SEED : - ui-name : | - Call rRNAs - short-hint : | - Call rRNA non-coding features with RAST. See http://rast.nmpdr.org/ for more information. - call_features_tRNA_trnascan : - ui-name : | - Call tRNA trnascan - short-hint : | - Call tRNA non-coding feature with trnascan in RAST. See http://rast.nmpdr.org/ for more information. - call_selenoproteins : - ui-name : | - Call selenoproteins - short-hint : | - Call new selenoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - call_pyrrolysoproteins : - ui-name : | - Call pyrrolysoproteins - short-hint : | - Call new pyrrolysoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - call_features_repeat_region_SEED : - ui-name : | - Call SEED repeat region - short-hint : | - Call SEED-defined non-coding repeat regions. See http://rast.nmpdr.org/ for more information. - call_features_insertion_sequences : - ui-name : | - Call features insertion sequences - short-hint : | - Call non-coding features insertion sequences with RAST. See http://rast.nmpdr.org/ for more information. - call_features_strep_suis_repeat : - ui-name : | - Call strep suis repeats - short-hint : | - Call non-coding strep suis repeats with RAST. See http://rast.nmpdr.org/ for more information. - call_features_strep_pneumo_repeat : - ui-name : | - Call strep pneumo repeats - short-hint : | - Call non-coding strep pneumo repeats with RAST. See http://rast.nmpdr.org/ for more information. - call_features_crispr : - ui-name : | - Call crisprs - short-hint : | - Call non-coding crisprs with RAST. See http://rast.nmpdr.org/ for more information. - call_features_CDS_glimmer3 : - ui-name : | - Call glimmer3 - short-hint : | - Call CDS features with glimmer3. See http://rast.nmpdr.org/ for more information. WARNING: Calling new genes will delete existing genes. Glimmer will not run if all contigs < 2000nt. - call_features_CDS_prodigal : - ui-name : | - Call prodigal - short-hint : | - Call CDS features with prodigal. See https://github.com/hyattpd/Prodigal for more information. WARNING: Calling new genes will delete existing genes. - annotate_proteins_kmer_v2 : - ui-name : | - Annotate proteins kmer v2 - short-hint : | - Annotate proteins kmer v2 with RAST. See http://rast.nmpdr.org/ for more information. - kmer_v1_parameters : - ui-name : | - Annotate proteins Kmer v1 - short-hint : | - Annotate proteins kmer v1 with RAST. See http://rast.nmpdr.org/ for more information. - annotate_proteins_similarity : - ui-name : | - Annotate proteins similarity - short-hint : | - Annotate proteins similarity with RAST. See http://rast.nmpdr.org/ for more information. - retain_old_anno_for_hypotheticals : - ui-name : | - Retain old annotations for hypotheticals - short-hint : | - Retain old annotations for hypotheticals. - resolve_overlapping_features : - ui-name : | - Resolve overlapping features - short-hint : | - Resolve overlapping features. See http://rast.nmpdr.org/ for more information. - call_features_prophage_phispy : - ui-name : | - Call features prophage phispy - short-hint : | - Call features prophage phispy with RAST. See http://rast.nmpdr.org/ for more information. - output_genome : - ui-name : | - Output Genome Name - short-hint : | - Name to assign the output genome. -description : | -

This KBase annotation App (Annotate Microbial Assembly) uses components from the RAST (Rapid Annotations using Subsystems Technology) toolkit [1,2,3] to annotate an assembled bacterial or archaeal genome.

-

The release versions of the RASTtk component services used in this app are: -

-

-

The required input is an Assembly object (older Narratives used the term ContigSet object). An Assembly can be generated using any of the Assembly Apps, by importing a FASTA file, or by uploading an .faa file directly from NCBI via FTP (Upload File to Staging from Web) and then importing.

-

Assemblies have three essential metadata fields that must be completed: scientific name, domain, and genetic code. The default genetic code for bacterial and archaeal genomes is genetic code 11. KBase annotation also supports genetic code 4 for Mycoplasma and genetic code 25. For more information on genetic codes, please refer to this NCBI document. All metadata fields are required because they affect conditional parameters in various programs that are being run. Use existing scientific names whenever possible.

-

The App annotates the Assembly-typed object (a set of contigs) and generates a Genome-typed object with both coding and non-coding features. By definition, Assembly objects have no annotation (only sequence) and the default is to select nearly all of the available App options. The available annotation features are in the advanced parameters and are discussed in more detail below.

-

For addition help, view this Tutorial for Annotate Microbial Contigs.

-

The Default Annotation Pipeline
Clicking "Run" will run the default pipeline. For a typical 2-5 MBp genome, this should take about 5 minutes. Because this is the first annotation for this assembly, the default pipeline consists of the following steps: -

-

-

Advanced Annotation Options
If you wish to customize the features in your annotation, click the "show advanced options" link. This will display the full set of available annotation options. The "Call features prophage phispy" option is unchecked because it is slower.

-

The Results -

-

-

GUI Output
The GUI output currently consists of three tabs. The "Overview" tab provides basic information on the annotation job, the "Browse Features" tab allows the user to scroll through the features that were called, and the "Browse Contigs" tab provides information on the contigs in the genome. Users can sort on the various types of features. Note that some features will overlap (e.g., "prophage" and "CDS").

-

Additional Information
For more information on the steps of the default RAStk pipeline, please refer to our publication on this (publication forthcoming). For more detailed tutorial information and to explore the additional functionality of RASTtk not currently available in the Narrative interface please refer to http://tutorial.theseed.org.

-

Team members who developed & deployed algorithm in KBase: - Thomas Brettin, James Davis, Terry Disz, Robert Edwards, Chris Henry, Gary Olsen, Robert Olson, Ross Overbeek, Bruce Parrello, Gordon Pusch, Roman Sutormin, and Fangfang Xia. For questions, please contact us.

-

The authors of RAST request that if you use the results of this annotation in your work, please cite the first three listed publications:

-publications : - - - - display-text: | - [1] Aziz RK, Bartels D, Best AA, DeJongh M, Disz T, Edwards RA, et al. The RAST Server: Rapid Annotations using Subsystems Technology. BMC Genomics. 2008;9: 75. doi:10.1186/1471-2164-9-75 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-9-75 - - - - display-text: | - [2] Overbeek R, Olson R, Pusch GD, Olsen GJ, Davis JJ, Disz T, et al.vThe SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST). Nucleic Acids Res. 2014;42: D206–D214. doi:10.1093/nar/gkt1226 - link: https://academic.oup.com/nar/article/42/D1/D206/1062536 - - - - display-text: | - [3] Brettin T, Davis JJ, Disz T, Edwards RA, Gerdes S, Olsen GJ, et al. RASTtk: A modular and extensible implementation of the RAST algorithm for building custom annotation pipelines and annotating batches of genomes. Sci Rep. 2015;5. doi:10.1038/srep08365 - link: https://www.nature.com/articles/srep08365 - - - - display-text: | - [4] Kent WJ. BLAT—The BLAST-Like Alignment Tool. Genome Res. 2002;12: 656–664. doi:10.1101/gr.229202 - link: https://genome.cshlp.org/content/12/4/656 - - - - display-text: | - [5] Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 1997;25: 3389-3402. doi:10.1093/nar/25.17.3389 - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC146917/ - - - - display-text: | - [6] Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. Nucleic Acids Res. 1997;25: 955–964. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC146525/ - - - - display-text: | - [7] Cobucci-Ponzano B, Rossi M, Moracci M. Translational recoding in archaea. Extremophiles. 2012;16: 793–803. doi:10.1007/s00792-012-0482-8 - link: https://www.ncbi.nlm.nih.gov/pubmed/23015064 - - - - display-text: | - [8] Meyer F, Overbeek R, Rodriguez A. FIGfams: yet another set of protein families. Nucleic Acids Res. 2009;37 6643-54. doi:10.1093/nar/gkp698. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2777423/ - - - - display-text: | - [9] van Belkum A, Sluijuter M, de Groot R, Verbrugh H, Hermans PW. Novel BOX repeat PCR assay for high-resolution typing of Streptococcus pneumoniae strains. J Clin Microbiol. 1996;34: 1176–1179. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC228977/ - - - - display-text: | - [10] Croucher NJ, Vernikos GS, Parkhill J, Bentley SD. Identification, variation and transcription of pneumococcal repeat sequences. BMC Genomics. 2011;12: 120. doi:10.1186/1471-2164-12-120 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-12-120 - - - - display-text: | - [11] Hyatt D, Chen G-L, Locascio PF, Land ML, Larimer FW, Hauser LJ. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010;11: 119. doi:10.1186/1471-2105-11-119 - link: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119 - - - - display-text: | - [12] Delcher AL, Bratke KA, Powers EC, Salzberg SL. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics. 2007;23: 673–679. doi:10.1093/bioinformatics/btm009 - link: https://academic.oup.com/bioinformatics/article/23/6/673/419055 - - - - display-text: | - [13] Akhter S, Aziz RK, Edwards RA. PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies. Nucleic Acids Res. 2012;40: e126. doi:10.1093/nar/gks406 - link: https://academic.oup.com/nar/article/40/16/e126/1027055 - diff --git a/ui/narrative/methods/annotate_contigset/img/rast-red.png b/ui/narrative/methods/annotate_contigset/img/rast-red.png deleted file mode 100644 index dc5f3609b7ac91605659756ec978ad83ca535e6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6071 zcmZ`cWmuF^vo9q}hctpqBNEcmy>x>#NO!kLEse6k(%oIsE)CKM2ur63k^)Ld!^I!p zpYPuLV`iRb&YYQZX6DSyiP2D#$HS(=1^@t0Q9)Mgkq-X@(33}8qhFi*NU+=#3_Jk< z=jlH{0Wz~G003LvUPeYk!_L*q)zi+^4Xh|519tOpwXt`y1^~aM9Bo@2?L9KFi?uUp z<>-)PWmhdy5Liq4LkLMCGaDTip;9zm!6K<RIQ1Hw&J;ML?0!L5*{CHfvV)IYef#VHwz z&FBUYf*P&pJntA#fXg6J(UgGrYHk`R=ToOEVwARirgJ7#II321NtZ&`x3kw8e!WkvuBV3bTpijtoQfS+1N z%K~;HK=tICSUEud6+mL6+%E(yZ~(6q^=uS?Ppv@D1R+iX0Kx@cX+}r90MG+~x5Eq! zen5BzKms|@6aDkDmT-sd(Nh@>q6kJIxlkhzhdZX89w#m9gd+J%5&?5m^Gr$3KK~3# zo=`r5@26h@AU~1x@wTUTeiQh$6B7ck7JM^~{q83Z^cEJI_dh4A+#~^D!z*a&o}H_a zDn#T-i1U3R(-FFzDNerk#Rr=P0?B3|e{V_e)a{>d7DWQ*%hQAnKntrhQvCe*jCce%Od~y&+2}tB9H?c(ftIR^E@743; z0sv0hU3+I(L1-a%VH=aa562RB^7%|ah>arL9RN&a898-E8zqK703e$m%JxZ$`mC3N zz30iZUW}DqoZGho;nIx#ebR)|*j6FWJl?*jhDx(Wj(?(Id&@N=NyFEpX&0H~hR@lj z`x#%%jo{iGl-tYN5sHN_H4J)cPPY_|ViBv)00WU{L|oGKDxed8U<8julW5W_C-N(B zYsKn-bron%#5_=iqKqMJi6X-Q5A-&&L!LW1v_VsB2De!TQ7%Xo4k^U>X64158Bdm< z_06M+O6;RR!OpiTJA$Qnahai4J-ZZ`y!}qEI0wH-*S}`RO0pcQr?1AX8l?L4rW&n1 z=G=0!-WQ!XRIUe00Nqy_Y)&hsps%8@wxCcBe#Xd0w1g#yg%+;a$3mY}uH3}jMs(Fr zVZp;4rLVxwFh?B!l#4JRTBcu+kvK+%hXs* zDUYhOEqB$CC=N2Xi+N)3MdgdSOtEHuz1e2RAQ;DzveA#7&JnLF(mRyDpSgZ~@x2dQ zd-@$s`v|>E56f~&JLTF9ZbI>;hLl*0S>$cwCopn$v=_Bk%dpGHRKD)PAmz~Sw>OX13Wqh)XJ3%2j z!0+xVO2aj@{6|y9v-Y#Tn2xZ823ESPS#G=eAasFTSGrX_LlON&(?|!TZW|u zM=c#L5r#qrCHlICCXJAqoEoON_L}xGqB4Y9l3G>nob8uSe_BRbDq1N&{akpmT-)N@ z5@>E~L2lbRQ2XU5sWU0CEimp*3==PE5xu^0XJIoerBd0TQ@glE`k^oJN_=on|3IA<31;OcPamXg=GyUE78>GPbd z#;m@>^5wcCfg}5)sFl30e0eW&4R}4jeHoVduK9CH0w*u!mE(GKkh+X|sBQBO1D7RN zqF&AXr`8hR$_w*ja`C_79(kMMo1%hiJZlDKUj=(whg-+gzqsa_MXbvNGdx&5ynC1e z%0ie>Y|&yur9zERZ!X`xr|X7E2rpazZCE^%`k0T9B9)4V)DOAszgL{x-cqy)?@TAyl)JB{CWYDLC#UG`cwRdy}QmRnt@ zjzfu`O}vuRL3By5jjv5&kkiKC>#Tl`zM_-zGa*j4S^*cQuJzl))2JUgKX5h!$kVxo zP2BtoCi>1I%E&4>iP?oyB~_x;UMK&SB4jHk`^a?i)Rzf{?cYb!XG_mDB`_7eAY;r% zt9GKmruq*4E3t#$pK*Yo^0f!!iqx~pTzI?smLU~8>s(>)$DDwJB4 z`*Qs7Yvbs0f?)P>-oqhrAMpmfr6f{WkAp^kyR+Iss6o`-e;Ip|6ys&&nvI{bH=AEa=1_~1#>Cc= zER&~^1~0I#`2D1gavkl?SV?ZGsOm>pP1;P`>}{`cQ%RFLcmS$Q^ zM=O5r<$*l@(DSbQoMT@OJOMo2xa`3n3~sXiTYDE+>ld_m!b?6M1FAPLkYzf%0oLF}ta6N7#^xBw~d-$*;(jhWoeDc}rq+$8zQ@zi6nRUWu&ih{bqW4Ki z^`Z9c_RLy@u_NN>U1xKE&BY#duHeM#S*QOWvWL0ZTp_X5&eM0U7XugP>tw4z9xQhg zW9^H+EJrmR>K$fhl7~`@GOJOu5=Qqc3&$(L4n4m4r&BBP7f&L>Chrr^;|XLi;xppM zBJm;*a$k!J3H^}pxZArelbN)dWX>hOFT5YCB`4zwx|}_jpO{LYDo8s?OX_xhUvjtK zb3j?QJlHo#+co0H%WZo0=H{^1cg2_HxM-a~ed|3qnDxQ#s{UO2TiHz#Wm3`uI$HYa zttBtV^5eV-wp7qk1%P)<01y%e09Ox>bPoW$Uje}WTL2JF2LN(cxY+;%0C0aO%1Y_@ zE&a+dQ!v(}9qMKn2&@S9!RUo7|1poCC}8HWW9N$;AeI7E;0OdxN93GUxX>*MV)eEz zMWvV3+~lh}ho@_R^v~!@HJho2HSdT|uXrQcs%M>^U0{JQs!HD!b7Ub}2J2 z_6kqALO49(U2fq9>{L>N4%Agl=gL%!!jd7HHagR?R-V>XU<}GCXI$mBqhXW;E@sBn zXs6nKY-z&&5DQ`^l6;C;7)9`gD+X6W7L3(s2tsJf=a5Z>2Gfk=A?)APW0mKQrqY<1 zVoVNuX0CYABp8q4W^9n`HR9?_eDXrRsu&YqX&=lAv@qse@CIvb%fN!RLQsngr_sue zx4EFNSt!an3qmL#ZK|Mif;=Q&C=oX#hh?HEH85it3=^fE(RfG_DS0gMwdvX<6d4A{ zAcj!}^gpkAUVoO5Y7)d+P%@HEoZdfkKYo$g_()YvD|q{DixV zcCkUVzLo(qnQ+6Y;ti|J8A#c%U_2k)Ui5XPZYt9|AenyNgaI^#1{RA=CK3Z0Lj+WJ3}{-t9Vl*3U>gP>q9z5yyj2FZfBN zQm4DO21g>w!9{p`JIT?}^x|PSExxI?(>sB(mNO+fNIkq(YP5>YdwV~hGx#*6Z`CB! zbXFC*eT@h>3ffkLg&o(~+cO7C6W-t>`Tta8UcW`L( ze0H@4;$9>+S)}5YUYLi82;RH9tNQCm-c~K_rfk)7p^&7yQzzs-J@2ksVFdp<@}fp< z($4mc`U^R@dC@4sVl`cT`TF{&9vZ`m{dH+JW%xJ$AuL_?Cl9 zz;)Xo4_VQx{`V=$Cbfz2YRBG;qi5HTnh#<=FPu5`j@vN|6E(CuPqf~VOq&XKrp+(S z&{iRN&uq}Kx&*L@xdvL(>`ogZz!hT5C4 zJlov}^jWsiA|fni{mm;vy9G})p7TY4`Z3d*U^AL{7(rZ(qC{KMx%?xTSV11iznFbc zUwqqC&$FiCZZuhR5@2+mr3|u{#!phYd^(_=B_)Ey57l=9Qo;NO&Ss#gpLK$ zzCX66Sm4mT{<#M~__vmoe7sT`)pBH(WH(f=< zs3)|r@xmhY!P#jS?4?A>oeTV+Rk_g$(e`&_xO`uD zsixRr|3Fu?ZA;OQwH~B3KecVYIJ{Y4t{z~8kV}}EdfDnvTF1N3d#6{e-P8C*CiHL) z3T9ICa#}g9)NcjBg`qzMW)9A`D^?G~ZEhq!kzK9S5HU(6toZTqIhmlda6A7=Y6J6> zZ>-HEAKtegGhjit(jHS_HR#mY%)iZ^-^F1jS|TtNSr3JWi!~GWYhwmnZt$CB@uoXO zg$?n=SzR4|Hf7pF+)y?if-Kg1JJ&AEv&aUDCV70e^Y;n6;mLZsJj*cf9!3pykYI>_ z_r6&!ZE7#a_^9{fnxB^<@7;6#;3MN8SM(PRRA1ue-yiL}D8=w_aB;8BCywQIj%4m^ zM9o*5-VDx4X>p))<7iE393MN%*h(jjX>^pg)G32) zmJ%k$Mop>TbZ2{~gkYY4ql-IhgnV0{);2@5&FW|kKo&~M9tbwfK)Hvj1B@Jml03@* zcDep!v*ct-e%Gc)40F@0t%BUGMcV7zACg;AOE`3OQu;_zS-HRRWSv zkStR8+&;!H{Y)+j=j54`OF|SFWKk^#qlcpV<0IKnv|jm|vNfB%7GnAF#<5BENyD0? zbsOf^-z-<7K$yhCjoF~^&UdA)Eo|fEtadi4ebTOP)(Rqd<|ssrInSAoDMa2d6R8X| zrcKyn_l&)wyvnLn%Aujvn(Um6<2`!x<$mU6mb)a~bM+=NGiQ(?Ne{CO)O-mn5M{_Dn zX)r%k+6EI)_vCvli`;-zgIRCq3X8?s58G8_q$hI*)OazAW4!oYG9kP0(Or(+NmNBq zLsMX6C*4SS0>Qs(?e?3h^SPAAQ!B1muaX(`UXtB8`!}@AOr1xg(O{;JV7!75qgd(r4&88M>(IzvLmaq2g}0}M3&F@dJdM_dS4KT<h zt|T4R0BNFnraiCHz7=J3MVLbld3&Cu`-uA*V^j(l#`pm)UT9LFa5=n(gh-phgx@SL zaM@@7jX<>lKMvmJHx}w5FNHB_!K+YI)hf}E4IyZ;Ec}fk0qaUZj{{7u&h@NWu+Fm( zX#t{uNqktMt~r_23v@Nj;c4?u>PC5{?K0}o#P*HkrOfE;9?7x!J+m8bw9xsWaV;kH z)*s9XOd(aiP)lNdHB37Bw?tc#d|1kDoE3T9q|fM{!dbA2V}F=+QtyS?R)$g0g7^JvbEB0q5lJ-hfp{G diff --git a/ui/narrative/methods/annotate_contigset/spec.json b/ui/narrative/methods/annotate_contigset/spec.json deleted file mode 100644 index 5c06521..0000000 --- a/ui/narrative/methods/annotate_contigset/spec.json +++ /dev/null @@ -1,400 +0,0 @@ -{ - "ver" : "1.0.0", - "authors" : [ "chenry", "olson" ], - "contact" : "https://www.kbase.us/support/", - "visble" : true, - "categories" : ["active","annotation"], - "widgets" : { - "input" : null, - "output" : "kbaseGenomeView" - }, - "parameters" : [ { - "id" : "input_contigset", - "optional" : false, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.ContigSet","KBaseGenomeAnnotations.Assembly" ] - } - }, { - "id": "scientific_name", - "optional": false, - "advanced": false, - "allow_multiple": false, - "default_values": [""], - "field_type": "dynamic_dropdown", - "dynamic_dropdown_options": { - "data_source": "custom", - "service_function": "taxonomy_re_api.search_species", - "service_version": "dev", - "service_params": [ - { - "search_text": "prefix:{{dynamic_dropdown_input}}", - "ns": "ncbi_taxonomy", - "limit": 20 - } - ], - "query_on_empty_input": 0, - "result_array_index": 0, - "path_to_selection_items": ["results"], - "selection_id": "ncbi_taxon_id", - "description_template": "NCBI Tax ID {{ncbi_taxon_id}}: {{scientific_name}}", - "multiselection": false - } - }, { - "id" : "domain", - "optional" : false, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "B" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "B", - "display": "B (Bacteria)", - "id": "B", - "ui_name": "B (Bacteria)" - }, - { - "value": "A", - "display": "A (Archaea)", - "id": "fast", - "ui_name": "A (Archaea)" - } - ] - } - }, { - "id" : "genetic_code", - "optional" : false, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "11" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "11", - "display": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)", - "id": "11", - "ui_name": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)" - }, - { - "value": "4", - "display": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)", - "id": "fast", - "ui_name": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)" - }, - { - "value": "25", - "display": "25 (Candidate Division SR1 and Gracilibacteria Code)", - "id": "fast", - "ui_name": "25 (Candidate Division SR1 and Gracilibacteria Code)" - } - ] - } - }, { - "id" : "output_genome", - "optional" : false, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.Genome" ], - "is_output_name":true - } - }, { - "id": "call_features_rRNA_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_tRNA_trnascan", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_selenoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_pyrrolysoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_repeat_region_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_suis_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_pneumo_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_crispr", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_glimmer3", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_prodigal", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_kmer_v2", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "kmer_v1_parameters", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_similarity", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "resolve_overlapping_features", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_prophage_phispy", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - - }], - "job_id_output_field": "docker", - "behavior" : { - "service-mapping" : { - "url" : "", - "name" : "RAST_SDK", - "method" : "annotate_genome", - "input_mapping" : [ - { - "input_parameter": "input_contigset", - "target_property": "input_contigset", - "target_type_transform": "resolved-ref" - }, - { - "narrative_system_variable": "timestamp_epoch_ms", - "target_property": "relation_engine_timestamp_ms" - }, - { - "input_parameter": "scientific_name", - "target_property": "ncbi_taxon_id" - }, - { - "input_parameter": "domain", - "target_property": "domain" - }, - { - "input_parameter": "genetic_code", - "target_property": "genetic_code" - }, - { - "input_parameter": "call_features_rRNA_SEED", - "target_property": "call_features_rRNA_SEED" - }, - { - "input_parameter": "call_features_tRNA_trnascan", - "target_property": "call_features_tRNA_trnascan" - }, - { - "input_parameter": "call_selenoproteins", - "target_property": "call_selenoproteins" - }, - { - "input_parameter": "call_pyrrolysoproteins", - "target_property": "call_pyrrolysoproteins" - }, - { - "input_parameter": "call_features_repeat_region_SEED", - "target_property": "call_features_repeat_region_SEED" - }, - { - "input_parameter": "call_features_strep_suis_repeat", - "target_property": "call_features_strep_suis_repeat" - }, - { - "input_parameter": "call_features_strep_pneumo_repeat", - "target_property": "call_features_strep_pneumo_repeat" - }, - { - "input_parameter": "call_features_crispr", - "target_property": "call_features_crispr" - }, - { - "input_parameter": "call_features_CDS_glimmer3", - "target_property": "call_features_CDS_glimmer3" - }, - { - "input_parameter": "call_features_CDS_prodigal", - "target_property": "call_features_CDS_prodigal" - }, - { - "input_parameter": "annotate_proteins_kmer_v2", - "target_property": "annotate_proteins_kmer_v2" - }, - { - "input_parameter": "kmer_v1_parameters", - "target_property": "kmer_v1_parameters" - }, - { - "input_parameter": "annotate_proteins_similarity", - "target_property": "annotate_proteins_similarity" - }, - { - "input_parameter": "resolve_overlapping_features", - "target_property": "resolve_overlapping_features" - }, - { - "input_parameter": "call_features_prophage_phispy", - "target_property": "call_features_prophage_phispy" - }, - { - "input_parameter": "output_genome", - "generated_value": { - "prefix": "genome_" - }, - "target_property": "output_genome" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace" - } - ], - "output_mapping" : [ - { - "service_method_output_path": [0, "report_name"], - "target_property": "report_name" - }, - { - "service_method_output_path": [0, "report_ref"], - "target_property": "report_ref" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace_name" - }, - { - "input_parameter": "output_genome", - "target_property": "id" - }, - { - "narrative_system_variable": "workspace", - "target_property": "ws" - } - ] - } - } -} diff --git a/ui/narrative/methods/annotate_contigsets/display.yaml b/ui/narrative/methods/annotate_contigsets/display.yaml deleted file mode 100644 index 8215877..0000000 --- a/ui/narrative/methods/annotate_contigsets/display.yaml +++ /dev/null @@ -1,320 +0,0 @@ -# -# Define basic display information -# -#name : Annotate Multiple Microbial Assemblies with RASTtk - v1.073 (*Deprecated by 'Bulk Annotate Genomes/Assemblies with RASTtk - v1.073' in 6 months) -name : Annotate Multiple Microbial Assemblies with RASTtk - v1.073 - -tooltip : | - Annotate bacterial or archaeal assemblies and/or assembly sets using RASTtk (Rapid Annotations using Subsystems Technology toolkit). - -screenshots : - [] - -icon: rast-red.png - -# -# Define the set of other narrative methods that should be suggested to the user. -# -suggestions : - apps: - related : - [] - next : - [build_fba_model, build_species_tree] - methods: - related : - [] - next : - [annotate_domains_in_a_genome] - - -# -# Configure the display and description of the parameters -# -parameters : - input_genomes : - ui-name : | - Assemblies/AssemblySets - short-hint : | - One or more Assemblies and/or AssemblySets to annotate. - - genome_text : - ui-name : | - Assembly list - short-hint : | - List of assemblies to annotate, delimited with semicolon (;). This list is in addition to the input assemblies above. Example names: Carsonella_rudii_PC;Carsonella_rudii_CE [no spaces] or IDs: 21677/17/1;21677/16/1; or delimited by new lines (list each name or ID on a separate line). You can also mix names and IDs as long as there are no spaces (e.g., Carsonella_rudii_PC;21677/18/1). - - scientific_name : - ui-name : | - Scientific Name - short-hint : | - The scientific name to assign to the genome as found at https://www.ncbi.nlm.nih.gov/taxonomy. This App will assign the same scientific name to all genomes in this bulk annotation operation. If you need greater granularity for scientific name assignment, please use the Annotate Microbial Assembly App. - - domain : - ui-name : | - Domain - short-hint : | - The domain of life of the organism being annotated. Bacteria and Archaea are acceptable values. - - genetic_code : - ui-name : | - Genetic Code - short-hint : | - The genetic code used in translating to protein sequences. See https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for more information on genetic codes. This App will assign the same genetic code to all genomes in this bulk annotation operation. If you need greater granularity for genetic code assignment, please use the Annotate Microbial Assembly App. - - call_features_rRNA_SEED : - ui-name : | - Call rRNAs - short-hint : | - Call rRNA non-coding features with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_tRNA_trnascan : - ui-name : | - Call tRNA trnascan - short-hint : | - Call tRNA non-coding features with trnascan in RAST. See http://rast.nmpdr.org/ for more information. - - call_selenoproteins : - ui-name : | - Call selenoproteins - short-hint : | - Call new selenoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - - call_pyrrolysoproteins : - ui-name : | - Call pyrrolysoproteins - short-hint : | - Call new pyrrolysoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_repeat_region_SEED : - ui-name : | - Call SEED repeat region - short-hint : | - Call SEED-defined non-coding repeat regions. See http://rast.nmpdr.org/ for more information. - - call_features_insertion_sequences : - ui-name : | - Call features insertion sequences - short-hint : | - Call non-coding features insertion sequences with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_strep_suis_repeat : - ui-name : | - Call strep suis repeats - short-hint : | - Call non-coding strep suis repeats with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_strep_pneumo_repeat : - ui-name : | - Call strep pneumo repeats - short-hint : | - Call non-coding strep pneumo repeats with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_crispr : - ui-name : | - Call crisprs - short-hint : | - Call non-coding crisprs with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_CDS_glimmer3 : - ui-name : | - Call glimmer3 - short-hint : | - Call CDS features with glimmer3. See http://rast.nmpdr.org/ for more information. WARNING: Calling new genes will delete existing genes. Glimmer will not run if all contigs < 2000nt. - - call_features_CDS_prodigal : - ui-name : | - Call prodigal - short-hint : | - Call CDS features with prodigal. See https://github.com/hyattpd/Prodigal for more information. WARNING: Callig new genes will delete existing genes. - - annotate_proteins_kmer_v2 : - ui-name : | - Annotate proteins kmer v2 - short-hint : | - Annotate proteins kmer v2 with RAST. See http://rast.nmpdr.org/ for more information. - - kmer_v1_parameters : - ui-name : | - Annotate proteins Kmer v1 - short-hint : | - Annotate proteins kmer v1 with RAST. See http://rast.nmpdr.org/ for more information. - - annotate_proteins_similarity : - ui-name : | - Annotate proteins similarity - short-hint : | - Annotate proteins similarity with RAST. See http://rast.nmpdr.org/ for more information. - - retain_old_anno_for_hypotheticals : - ui-name : | - Retain old annotations for hypotheticals - short-hint : | - Retain old annotations for hypotheticals. - - resolve_overlapping_features : - ui-name : | - Resolve overlapping features - short-hint : | - Resolve overlapping features. See http://rast.nmpdr.org/ for more information. - - - call_features_prophage_phispy : - ui-name : | - Call features prophage phispy - short-hint : | - Call features prophage phispy with RAST. See http://rast.nmpdr.org/ for more information. - - output_genome : - ui-name : | - Optional Output GenomeSet Name - short-hint : | - The name of a genome set of the output annotated genomes. This bulk operation will create multiple genomes. This is the name to asign to the set of genomes created. - placeholder : | - Name to assign genome set - -parameter-groups: - genomes: - ui-name: | - Assemblies to annotate - short-hint: | - Assemblies to annotate. - -description : | -

This KBase annotation App (Annotate Multiple Microbial Assemblies uses components from the RAST (Rapid Annotations using Subsystems Technology) toolkit [1,2,3] to annotate prokaryotic assemblies or to perform computations on a set of assemblies so that they are consistent. The newly generated genomes will have the same names as the input assemblies with “.RAST” appended.

-

The release versions of the RASTtk component services used in this app are: -

-

- -

The required input is one or more Assembly objects (older Narratives used the term ContigSet object) and/or AssemblySet objects. An Assembly can be generated using any of the Assembly Apps, by importing a FASTA file, or by uploading an .faa file directly from NCBI via FTP (Upload File to Staging from Web) and then importing. AssemblySets are created with the App Build AssemblySet.

- -

Assemblies have three essential metadata fields that must be completed: scientific name, domain, and genetic code. The default genetic code for bacterial and archaeal genomes is genetic code 11, the default domain is bacterial, and the default scientific name is "unknown taxon". All metadata fields are required because they affect conditional parameters in various programs that are being run. The values specified in the App will be used for all Assemblies annotated. KBase annotation supports genetic codes 11 (most bacteria and archaea), 4 for Mycoplasma, and 25. For more information on genetic codes please refer to this NCBI document. Use applicable scientific names if possible.

- -

The App annotates the Assembly-typed objects (sets of contigs) and generates Genome-typed objects with both coding and non-coding features. By definition, an Assembly has no annotation (only sequence) and the default is to select nearly all of the available options. The available annotation features are in the advanced parameters and are discussed in more detail below.

- -

The Default Annotation Pipeline
Clicking "Run" will run the default pipeline. For a typical 2-5 MBp assembly, the default annotation pipeline should take about 5 minutes per assembly. Because this is the first annotation for these assemblies/genomes, the default pipeline for this App consists of the following steps: -

    -
  • DNA/RNA-based predictions -
      -
    1. Call rRNAs (default = on)
      Predict rRNAs in the genome. This is a custom BLAST-based tool for finding rRNAs.
    2. -
    3. Call tRNAs with tRNAscan (default = on)
      Predict tRNAs in the genome with tRNAscan-SE [6].
    4. -
    5. Call CRISPRs (default = on)
      This is a custom tool that uses a perl regular expression-based search to find CRISPR elements.
    6. -
    7. Find prophage elements with phispy (default = off)
      This will use the phispy program to find prophage elements 13].
    8. -
  • - -
  • Gene predictions -
      -
    1. Call protein-encoding genes with both Prodigal [11] and Glimmer3 [12] (default = on)
    2. -
    3. Call selenoproteins and pyrrolysylproteins [7] (default = on)
      These are custom BLAST-based tools.
    4. -
  • - -
  • Repeats -
      -
    1. Call SEED large repeat regions (default = on)
      This is a BLASTn search within the genome for regions greater than 95% nucleotide similarity greater than or equal to 100bp in length.
    2. -
    3. Find Streptococcus repeat regions [9, 10] (default = on)
      This is a command that should only be implemented if the genus is Streptococcus.
    4. -
  • - -
  • Add SEED Functions/Annotation to protein-encoding genes (k-mers needed for Metabolic Modeling) -
      -
    1. Annotate protein-encoding genes with k-mers (version 2; default = on)
      This is a set of signature k-mers (amino acid 8-mers) built from the annotations in the CoreSEED. The CoreSEED is a database of ~1,000 diverse microbial genomes and is currently the main focus of the RAST manual annotation efforts. Annotating using this k-mer set provides the user with our most stable and best estimate of the core gene functions.
    2. - -
    3. Annotate remaining hypothetical proteins with k-mers (version 1; default = on)
      This set of k-mers is built from the FigFam collection [8] in the PubSEED, which is the publically annotated version of the SEED database that consists of ~12,000 microbial genomes. The "classic" version of RAST on the RAST website (http://rast.nmpdr.org) uses the FigFam-based k-mers (hence the version 1 designation).
    4. - -
    5. Annotate remaining hypothetical proteins by protein similarity (default = on)
      We have several non-redundant databases for the most common genera. If the genus name of your organism matches one of these, a search will be performed against the remaining hypothetical proteins to attempt to find a function. The search uses a combination of BLAT [4] and BLAST [5].
    6. -
  • - -
  • Other -
      -
    1. Perform a basic gene overlap removal (default = on)
      Using multiple gene calling algorithms can result in overlapping gene calls. This program is a custom tool that attempts to minimize overlaps and gaps to provide a set of calls that has a smaller number of gene calling errors. We do not recommend using overlap removal if you are attempting to annotate phage.
    2. - -
    3. Retain old annotations for hypotheticals (default = off)
      In instances where the pipeline fails to find an annotation for a gene, this will retain the original annotation from the input Genome-typed object.
    4. -
  • -
-

- -

Advanced Annotation Options
If you wish to customize the features in your annotation, click the "show advanced options" link. This will display the full set of available annotation options. The "Call features prophage phispy" optionis unchecked because it is slower.

- -

The Results -

    -
  • The Objects section has a table of all the data objects that were created by the App. Click on the name of the data object to open a data viewer cell (below the currently selected cell).
  • -
  • The Summary section gives details about the coding and noncoding features that were created and the average protein length.
  • -
  • The Files section has a downloadable version of the Summary.
  • -
-

- -

GUI Output
The GUI output currently consists of three tabs. The "Overview" tab provides basic information on the annotation job, the "Browse Features" tab allows the user to scroll through the features that were called, and the "Browse Contigs" tab provides information on the contigs in the genome. Users can sort on the various types of features. Note that some features will overlap (e.g., "prophage" and "CDS").

- -

Additional Information
For more information on the steps of the default RAStk pipeline please refer to our publication on this (publication forthcoming). For more detailed tutorial information and to explore the additional functionality of RASTtk not currently available in the Narrative interface please refer to http://tutorial.theseed.org.

- -

Team members who developed & deployed algorithm in KBase: Thomas Brettin, James Davis, Terry Disz, Robert Edwards, Chris Henry, Gary Olsen, Robert Olson, Ross Overbeek, Bruce Parrello, Gordon Pusch, Roman Sutormin, and Fangfang Xia. For questions, please contact us.

- -

The authors of RAST request that if you use the results of this annotation in your work, please cite the first three listed publications:

- -publications : - - - display-text: | - [1] Aziz RK, Bartels D, Best AA, DeJongh M, Disz T, Edwards RA, et al. The RAST Server: Rapid Annotations using Subsystems Technology. BMC Genomics. 2008;9: 75. doi:10.1186/1471-2164-9-75 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-9-75 - - - - display-text: | - [2] Overbeek R, Olson R, Pusch GD, Olsen GJ, Davis JJ, Disz T, et al. The SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST). Nucleic Acids Res. 2014;42: D206–D214. doi:10.1093/nar/gkt1226 - link: https://academic.oup.com/nar/article/42/D1/D206/1062536 - - - - display-text: | - [3] Brettin T, Davis JJ, Disz T, Edwards RA, Gerdes S, Olsen GJ, et al. RASTtk: A modular and extensible implementation of the RAST algorithm for building custom annotation pipelines and annotating batches of genomes. Sci Rep. 2015;5. doi:10.1038/srep08365 - link: https://www.nature.com/articles/srep08365 - - - - display-text: | - [4] Kent WJ. BLAT—The BLAST-Like Alignment Tool. Genome Res. 2002;12: 656–664. doi:10.1101/gr.229202 - link: https://genome.cshlp.org/content/12/4/656 - - - - display-text: | - [5] Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 1997;25: 3389-3402. doi:10.1093/nar/25.17.3389 - - - - display-text: | - [6] Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. Nucleic Acids Res. 1997;25: 955–964. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC146525/ - - - - display-text: | - [7] Cobucci-Ponzano B, Rossi M, Moracci M. Translational recoding in archaea. Extremophiles. 2012;16: 793–803. doi:10.1007/s00792-012-0482-8 - link: https://www.ncbi.nlm.nih.gov/pubmed/23015064 - - - - display-text: | - [8] Meyer F, Overbeek R, Rodriguez A. FIGfams: yet another set of protein families. Nucleic Acids Res. 2009;37 6643-54. doi:10.1093/nar/gkp698. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2777423/ - - - - display-text: | - [9] van Belkum A, Sluijuter M, de Groot R, Verbrugh H, Hermans PW. Novel BOX repeat PCR assay for high-resolution typing of Streptococcus pneumoniae strains. J Clin Microbiol. 1996;34: 1176–1179. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC228977/ - - - - display-text: | - [10] Croucher NJ, Vernikos GS, Parkhill J, Bentley SD. Identification, variation and transcription of pneumococcal repeat sequences. BMC Genomics. 2011;12: 120. doi:10.1186/1471-2164-12-120 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-12-120 - - - - display-text: | - [11] Hyatt D, Chen G-L, Locascio PF, Land ML, Larimer FW, Hauser LJ. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010;11: 119. doi:10.1186/1471-2105-11-119 - link: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119 - - - - display-text: | - [12] Delcher AL, Bratke KA, Powers EC, Salzberg SL. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics. 2007;23: 673–679. doi:10.1093/bioinformatics/btm009 - link: https://academic.oup.com/bioinformatics/article/23/6/673/419055 - - - - display-text: | - [13] Akhter S, Aziz RK, Edwards RA. PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies. Nucleic Acids Res. 2012;40: e126. doi:10.1093/nar/gks406 - link: https://academic.oup.com/nar/article/40/16/e126/1027055 diff --git a/ui/narrative/methods/annotate_contigsets/img/rast-red.png b/ui/narrative/methods/annotate_contigsets/img/rast-red.png deleted file mode 100644 index dc5f3609b7ac91605659756ec978ad83ca535e6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6071 zcmZ`cWmuF^vo9q}hctpqBNEcmy>x>#NO!kLEse6k(%oIsE)CKM2ur63k^)Ld!^I!p zpYPuLV`iRb&YYQZX6DSyiP2D#$HS(=1^@t0Q9)Mgkq-X@(33}8qhFi*NU+=#3_Jk< z=jlH{0Wz~G003LvUPeYk!_L*q)zi+^4Xh|519tOpwXt`y1^~aM9Bo@2?L9KFi?uUp z<>-)PWmhdy5Liq4LkLMCGaDTip;9zm!6K<RIQ1Hw&J;ML?0!L5*{CHfvV)IYef#VHwz z&FBUYf*P&pJntA#fXg6J(UgGrYHk`R=ToOEVwARirgJ7#II321NtZ&`x3kw8e!WkvuBV3bTpijtoQfS+1N z%K~;HK=tICSUEud6+mL6+%E(yZ~(6q^=uS?Ppv@D1R+iX0Kx@cX+}r90MG+~x5Eq! zen5BzKms|@6aDkDmT-sd(Nh@>q6kJIxlkhzhdZX89w#m9gd+J%5&?5m^Gr$3KK~3# zo=`r5@26h@AU~1x@wTUTeiQh$6B7ck7JM^~{q83Z^cEJI_dh4A+#~^D!z*a&o}H_a zDn#T-i1U3R(-FFzDNerk#Rr=P0?B3|e{V_e)a{>d7DWQ*%hQAnKntrhQvCe*jCce%Od~y&+2}tB9H?c(ftIR^E@743; z0sv0hU3+I(L1-a%VH=aa562RB^7%|ah>arL9RN&a898-E8zqK703e$m%JxZ$`mC3N zz30iZUW}DqoZGho;nIx#ebR)|*j6FWJl?*jhDx(Wj(?(Id&@N=NyFEpX&0H~hR@lj z`x#%%jo{iGl-tYN5sHN_H4J)cPPY_|ViBv)00WU{L|oGKDxed8U<8julW5W_C-N(B zYsKn-bron%#5_=iqKqMJi6X-Q5A-&&L!LW1v_VsB2De!TQ7%Xo4k^U>X64158Bdm< z_06M+O6;RR!OpiTJA$Qnahai4J-ZZ`y!}qEI0wH-*S}`RO0pcQr?1AX8l?L4rW&n1 z=G=0!-WQ!XRIUe00Nqy_Y)&hsps%8@wxCcBe#Xd0w1g#yg%+;a$3mY}uH3}jMs(Fr zVZp;4rLVxwFh?B!l#4JRTBcu+kvK+%hXs* zDUYhOEqB$CC=N2Xi+N)3MdgdSOtEHuz1e2RAQ;DzveA#7&JnLF(mRyDpSgZ~@x2dQ zd-@$s`v|>E56f~&JLTF9ZbI>;hLl*0S>$cwCopn$v=_Bk%dpGHRKD)PAmz~Sw>OX13Wqh)XJ3%2j z!0+xVO2aj@{6|y9v-Y#Tn2xZ823ESPS#G=eAasFTSGrX_LlON&(?|!TZW|u zM=c#L5r#qrCHlICCXJAqoEoON_L}xGqB4Y9l3G>nob8uSe_BRbDq1N&{akpmT-)N@ z5@>E~L2lbRQ2XU5sWU0CEimp*3==PE5xu^0XJIoerBd0TQ@glE`k^oJN_=on|3IA<31;OcPamXg=GyUE78>GPbd z#;m@>^5wcCfg}5)sFl30e0eW&4R}4jeHoVduK9CH0w*u!mE(GKkh+X|sBQBO1D7RN zqF&AXr`8hR$_w*ja`C_79(kMMo1%hiJZlDKUj=(whg-+gzqsa_MXbvNGdx&5ynC1e z%0ie>Y|&yur9zERZ!X`xr|X7E2rpazZCE^%`k0T9B9)4V)DOAszgL{x-cqy)?@TAyl)JB{CWYDLC#UG`cwRdy}QmRnt@ zjzfu`O}vuRL3By5jjv5&kkiKC>#Tl`zM_-zGa*j4S^*cQuJzl))2JUgKX5h!$kVxo zP2BtoCi>1I%E&4>iP?oyB~_x;UMK&SB4jHk`^a?i)Rzf{?cYb!XG_mDB`_7eAY;r% zt9GKmruq*4E3t#$pK*Yo^0f!!iqx~pTzI?smLU~8>s(>)$DDwJB4 z`*Qs7Yvbs0f?)P>-oqhrAMpmfr6f{WkAp^kyR+Iss6o`-e;Ip|6ys&&nvI{bH=AEa=1_~1#>Cc= zER&~^1~0I#`2D1gavkl?SV?ZGsOm>pP1;P`>}{`cQ%RFLcmS$Q^ zM=O5r<$*l@(DSbQoMT@OJOMo2xa`3n3~sXiTYDE+>ld_m!b?6M1FAPLkYzf%0oLF}ta6N7#^xBw~d-$*;(jhWoeDc}rq+$8zQ@zi6nRUWu&ih{bqW4Ki z^`Z9c_RLy@u_NN>U1xKE&BY#duHeM#S*QOWvWL0ZTp_X5&eM0U7XugP>tw4z9xQhg zW9^H+EJrmR>K$fhl7~`@GOJOu5=Qqc3&$(L4n4m4r&BBP7f&L>Chrr^;|XLi;xppM zBJm;*a$k!J3H^}pxZArelbN)dWX>hOFT5YCB`4zwx|}_jpO{LYDo8s?OX_xhUvjtK zb3j?QJlHo#+co0H%WZo0=H{^1cg2_HxM-a~ed|3qnDxQ#s{UO2TiHz#Wm3`uI$HYa zttBtV^5eV-wp7qk1%P)<01y%e09Ox>bPoW$Uje}WTL2JF2LN(cxY+;%0C0aO%1Y_@ zE&a+dQ!v(}9qMKn2&@S9!RUo7|1poCC}8HWW9N$;AeI7E;0OdxN93GUxX>*MV)eEz zMWvV3+~lh}ho@_R^v~!@HJho2HSdT|uXrQcs%M>^U0{JQs!HD!b7Ub}2J2 z_6kqALO49(U2fq9>{L>N4%Agl=gL%!!jd7HHagR?R-V>XU<}GCXI$mBqhXW;E@sBn zXs6nKY-z&&5DQ`^l6;C;7)9`gD+X6W7L3(s2tsJf=a5Z>2Gfk=A?)APW0mKQrqY<1 zVoVNuX0CYABp8q4W^9n`HR9?_eDXrRsu&YqX&=lAv@qse@CIvb%fN!RLQsngr_sue zx4EFNSt!an3qmL#ZK|Mif;=Q&C=oX#hh?HEH85it3=^fE(RfG_DS0gMwdvX<6d4A{ zAcj!}^gpkAUVoO5Y7)d+P%@HEoZdfkKYo$g_()YvD|q{DixV zcCkUVzLo(qnQ+6Y;ti|J8A#c%U_2k)Ui5XPZYt9|AenyNgaI^#1{RA=CK3Z0Lj+WJ3}{-t9Vl*3U>gP>q9z5yyj2FZfBN zQm4DO21g>w!9{p`JIT?}^x|PSExxI?(>sB(mNO+fNIkq(YP5>YdwV~hGx#*6Z`CB! zbXFC*eT@h>3ffkLg&o(~+cO7C6W-t>`Tta8UcW`L( ze0H@4;$9>+S)}5YUYLi82;RH9tNQCm-c~K_rfk)7p^&7yQzzs-J@2ksVFdp<@}fp< z($4mc`U^R@dC@4sVl`cT`TF{&9vZ`m{dH+JW%xJ$AuL_?Cl9 zz;)Xo4_VQx{`V=$Cbfz2YRBG;qi5HTnh#<=FPu5`j@vN|6E(CuPqf~VOq&XKrp+(S z&{iRN&uq}Kx&*L@xdvL(>`ogZz!hT5C4 zJlov}^jWsiA|fni{mm;vy9G})p7TY4`Z3d*U^AL{7(rZ(qC{KMx%?xTSV11iznFbc zUwqqC&$FiCZZuhR5@2+mr3|u{#!phYd^(_=B_)Ey57l=9Qo;NO&Ss#gpLK$ zzCX66Sm4mT{<#M~__vmoe7sT`)pBH(WH(f=< zs3)|r@xmhY!P#jS?4?A>oeTV+Rk_g$(e`&_xO`uD zsixRr|3Fu?ZA;OQwH~B3KecVYIJ{Y4t{z~8kV}}EdfDnvTF1N3d#6{e-P8C*CiHL) z3T9ICa#}g9)NcjBg`qzMW)9A`D^?G~ZEhq!kzK9S5HU(6toZTqIhmlda6A7=Y6J6> zZ>-HEAKtegGhjit(jHS_HR#mY%)iZ^-^F1jS|TtNSr3JWi!~GWYhwmnZt$CB@uoXO zg$?n=SzR4|Hf7pF+)y?if-Kg1JJ&AEv&aUDCV70e^Y;n6;mLZsJj*cf9!3pykYI>_ z_r6&!ZE7#a_^9{fnxB^<@7;6#;3MN8SM(PRRA1ue-yiL}D8=w_aB;8BCywQIj%4m^ zM9o*5-VDx4X>p))<7iE393MN%*h(jjX>^pg)G32) zmJ%k$Mop>TbZ2{~gkYY4ql-IhgnV0{);2@5&FW|kKo&~M9tbwfK)Hvj1B@Jml03@* zcDep!v*ct-e%Gc)40F@0t%BUGMcV7zACg;AOE`3OQu;_zS-HRRWSv zkStR8+&;!H{Y)+j=j54`OF|SFWKk^#qlcpV<0IKnv|jm|vNfB%7GnAF#<5BENyD0? zbsOf^-z-<7K$yhCjoF~^&UdA)Eo|fEtadi4ebTOP)(Rqd<|ssrInSAoDMa2d6R8X| zrcKyn_l&)wyvnLn%Aujvn(Um6<2`!x<$mU6mb)a~bM+=NGiQ(?Ne{CO)O-mn5M{_Dn zX)r%k+6EI)_vCvli`;-zgIRCq3X8?s58G8_q$hI*)OazAW4!oYG9kP0(Or(+NmNBq zLsMX6C*4SS0>Qs(?e?3h^SPAAQ!B1muaX(`UXtB8`!}@AOr1xg(O{;JV7!75qgd(r4&88M>(IzvLmaq2g}0}M3&F@dJdM_dS4KT<h zt|T4R0BNFnraiCHz7=J3MVLbld3&Cu`-uA*V^j(l#`pm)UT9LFa5=n(gh-phgx@SL zaM@@7jX<>lKMvmJHx}w5FNHB_!K+YI)hf}E4IyZ;Ec}fk0qaUZj{{7u&h@NWu+Fm( zX#t{uNqktMt~r_23v@Nj;c4?u>PC5{?K0}o#P*HkrOfE;9?7x!J+m8bw9xsWaV;kH z)*s9XOd(aiP)lNdHB37Bw?tc#d|1kDoE3T9q|fM{!dbA2V}F=+QtyS?R)$g0g7^JvbEB0q5lJ-hfp{G diff --git a/ui/narrative/methods/annotate_contigsets/spec.json b/ui/narrative/methods/annotate_contigsets/spec.json deleted file mode 100644 index b9c1936..0000000 --- a/ui/narrative/methods/annotate_contigsets/spec.json +++ /dev/null @@ -1,412 +0,0 @@ -{ - "ver" : "1.0.0", - "name" : "Annotate multiple microbial assemblies", - "authors" : [ "landml"], - "contact" : "https://www.kbase.us/support/", - "visble" : true, - "categories" : ["active","annotation"], - "widgets" : { - "input" : null, - "output" : "no-display" - }, - "parameters" : [ { - "id" : "input_genomes", - "optional" : true, - "advanced" : false, - "allow_multiple" : true, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.ContigSet","KBaseGenomeAnnotations.Assembly", "KBaseSets.AssemblySet" ] - } - }, { - "id": "genome_text", - "optional": true, - "advanced": false, - "allow_multiple": false, - "default_values": [ "" ], - "field_type": "textarea", - "textarea_options" : { - "n_rows" : 10 - } - }, { - "id": "scientific_name", - "optional": true, - "advanced": false, - "allow_multiple": false, - "default_values": [""], - "field_type": "dynamic_dropdown", - "dynamic_dropdown_options": { - "data_source": "custom", - "service_function": "taxonomy_re_api.search_species", - "service_version": "dev", - "service_params": [ - { - "search_text": "prefix:{{dynamic_dropdown_input}}", - "ns": "ncbi_taxonomy", - "limit": 20 - } - ], - "query_on_empty_input": 0, - "result_array_index": 0, - "path_to_selection_items": ["results"], - "selection_id": "ncbi_taxon_id", - "description_template": "NCBI Tax ID {{ncbi_taxon_id}}: {{scientific_name}}", - "multiselection": false - } - }, { - "id" : "domain", - "optional" : true, - "advanced" : true, - "allow_multiple" : false, - "default_values" : [ "B" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "B", - "display": "B (Bacteria)", - "id": "B", - "ui_name": "B (Bacteria)" - }, - { - "value": "A", - "display": "A (Archaea)", - "id": "fast", - "ui_name": "A (Archaea)" - }, - { - "value": "U", - "display": "U (Unknown)", - "id": "fast", - "ui_name": "U (Unknown)" - } - ] - } - }, { - "id" : "genetic_code", - "optional" : true, - "advanced" : true, - "allow_multiple" : false, - "default_values" : [ "11" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "11", - "display": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)", - "id": "11", - "ui_name": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)" - }, - { - "value": "4", - "display": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)", - "id": "fast", - "ui_name": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)" - }, - { - "value": "25", - "display": "25 (Candidate Division SR1 and Gracilibacteria Code)", - "id": "fast", - "ui_name": "25 (Candidate Division SR1 and Gracilibacteria Code)" - } - ] - } - }, { - "id": "call_features_rRNA_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_tRNA_trnascan", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_selenoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_pyrrolysoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_repeat_region_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_suis_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_pneumo_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_crispr", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_glimmer3", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_prodigal", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_kmer_v2", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "kmer_v1_parameters", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_similarity", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "resolve_overlapping_features", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_prophage_phispy", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "output_genome", - "optional": false, - "advanced": false, - "allow_multiple": false, - "default_values": [ "" ], - "field_type": "text", - "text_options" : { - "valid_ws_types" : [ "KBaseSearch.GenomeSet" ], - "is_output_name":true - } - }], - "job_id_output_field": "docker", - "behavior" : { - "service-mapping" : { - "url" : "", - "name" : "RAST_SDK", - "method" : "annotate_genomes", - "input_mapping" : [ - { - "input_parameter": "input_genomes", - "target_property": "input_genomes", - "target_type_transform": "resolved-ref" - }, - { - "input_parameter": "genome_text", - "target_property": "genome_text" - }, - { - "narrative_system_variable": "timestamp_epoch_ms", - "target_property": "relation_engine_timestamp_ms" - }, - { - "input_parameter": "scientific_name", - "target_property": "ncbi_taxon_id" - }, - { - "input_parameter": "domain", - "target_property": "domain" - }, - { - "input_parameter": "genetic_code", - "target_property": "genetic_code" - }, - { - "input_parameter": "call_features_rRNA_SEED", - "target_property": "call_features_rRNA_SEED" - }, - { - "input_parameter": "call_features_tRNA_trnascan", - "target_property": "call_features_tRNA_trnascan" - }, - { - "input_parameter": "call_selenoproteins", - "target_property": "call_selenoproteins" - }, - { - "input_parameter": "call_pyrrolysoproteins", - "target_property": "call_pyrrolysoproteins" - }, - { - "input_parameter": "call_features_repeat_region_SEED", - "target_property": "call_features_repeat_region_SEED" - }, - { - "input_parameter": "call_features_strep_suis_repeat", - "target_property": "call_features_strep_suis_repeat" - }, - { - "input_parameter": "call_features_strep_pneumo_repeat", - "target_property": "call_features_strep_pneumo_repeat" - }, - { - "input_parameter": "call_features_crispr", - "target_property": "call_features_crispr" - }, - { - "input_parameter": "call_features_CDS_glimmer3", - "target_property": "call_features_CDS_glimmer3" - }, - { - "input_parameter": "call_features_CDS_prodigal", - "target_property": "call_features_CDS_prodigal" - }, - { - "input_parameter": "annotate_proteins_kmer_v2", - "target_property": "annotate_proteins_kmer_v2" - }, - { - "input_parameter": "kmer_v1_parameters", - "target_property": "kmer_v1_parameters" - }, - { - "input_parameter": "annotate_proteins_similarity", - "target_property": "annotate_proteins_similarity" - }, - { - "input_parameter": "resolve_overlapping_features", - "target_property": "resolve_overlapping_features" - }, - { - "input_parameter": "call_features_prophage_phispy", - "target_property": "call_features_prophage_phispy" - }, - { - "input_parameter": "output_genome", - "generated_value": { - "prefix": "genomeset_" - }, - "target_property": "output_genome" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace" - } - ], - "output_mapping" : [ - { - "service_method_output_path": [0, "report_name"], - "target_property": "report_name" - }, - { - "service_method_output_path": [0, "report_ref"], - "target_property": "report_ref" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace_name" - } - ] - } - } -} diff --git a/ui/narrative/methods/bulk_annotate_genomes_assemblies/display.yaml b/ui/narrative/methods/bulk_annotate_genomes_assemblies/display.yaml index be5ffd0..d5e7f77 100644 --- a/ui/narrative/methods/bulk_annotate_genomes_assemblies/display.yaml +++ b/ui/narrative/methods/bulk_annotate_genomes_assemblies/display.yaml @@ -1,10 +1,10 @@ # # define display information # -name : Bulk Annotate Genomes/Assemblies with RASTtk - v1.073 +name : Annotate Multiple Genomes/Assemblies with RASTtk - v1.073 tooltip : | - Annotate or re-annotate genomes/assemblies using RASTtk (Rapid Annotations using Subsystems Technology toolkit). + Annotate or re-annotate multiple genomes/assemblies using RASTtk (Rapid Annotations using Subsystems Technology toolkit). screenshots : [] @@ -52,22 +52,6 @@ parameters : A string of genome/assembly ids/names delimited by semicolon(;), newline or bar(|) long-hint : | This list is in addition to the input genomes/assemblies above. Example names: Carsonella_rudii_PC;Carsonella_rudii_CE [no spaces] or IDs 21677/17/1;21677/16/1; or delimited by new lines (list each name or ID on a separate line). You can also mix names and IDs as long as there are no spaces (e.g., Carsonella_rudii_PC;21677/18/1). - scientific_name : - ui-name : | - Scientific Name - short-hint : | - The scientific name to assign to the genome as found at https://www.ncbi.nlm.nih.gov/taxonomy. This App will assign the same scientific name to all genomes in this bulk annotation operation. If you need greater granularity for scientific name assignment, please use the Annotate Microbial Assembly App. - domain : - ui-name : | - Domain - short-hint : | - The domain of life of the organism being annotated. Bacteria and Archaea are acceptable values. - genetic_code : - ui-name : | - Genetic Code - short-hint : | - The genetic code used in translating to protein sequences. See https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for more information on genetic codes. This App will assign the same genetic code to all genomes in this bulk annotation operation. If you need greater granularity for genetic code assignment, please use the Annotate Microbial Assembly App. - output_GenomeSet_name : ui-name : | Output Annotated GenomeSet Name diff --git a/ui/narrative/methods/bulk_annotate_genomes_assemblies/spec.json b/ui/narrative/methods/bulk_annotate_genomes_assemblies/spec.json index ff82062..962372b 100644 --- a/ui/narrative/methods/bulk_annotate_genomes_assemblies/spec.json +++ b/ui/narrative/methods/bulk_annotate_genomes_assemblies/spec.json @@ -51,89 +51,6 @@ "n_rows" : 10 } }, { - "id": "scientific_name", - "optional": true, - "advanced": false, - "allow_multiple": false, - "default_values": [""], - "field_type": "dynamic_dropdown", - "dynamic_dropdown_options": { - "data_source": "custom", - "service_function": "taxonomy_re_api.search_species", - "service_version": "dev", - "service_params": [ - { - "search_text": "prefix:{{dynamic_dropdown_input}}", - "ns": "ncbi_taxonomy", - "limit": 20 - } - ], - "query_on_empty_input": 0, - "result_array_index": 0, - "path_to_selection_items": ["results"], - "selection_id": "ncbi_taxon_id", - "description_template": "NCBI Tax ID {{ncbi_taxon_id}}: {{scientific_name}}", - "multiselection": false - } - }, { - "id" : "domain", - "optional" : true, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "B" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "B", - "display": "B (Bacteria)", - "id": "B", - "ui_name": "B (Bacteria)" - }, - { - "value": "A", - "display": "A (Archaea)", - "id": "fast", - "ui_name": "A (Archaea)" - }, - { - "value": "U", - "display": "U (Unknown)", - "id": "fast", - "ui_name": "U (Unknown)" - } - ] - } - }, { - "id" : "genetic_code", - "optional" : true, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "11" ], - "field_type" : "dropdown", - "dropdown_options":{ - "options": [ - { - "value": "11", - "display": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)", - "id": "11", - "ui_name": "11 (Archaea, most Bacteria, most Virii, and some Mitochondria)" - }, - { - "value": "4", - "display": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)", - "id": "fast", - "ui_name": "4 (Mycoplasmaea, Spiroplasmaea, Ureoplasmaea, and Fungal Mitochondria)" - }, - { - "value": "25", - "display": "25 (Candidate Division SR1 and Gracilibacteria Code)", - "id": "fast", - "ui_name": "25 (Candidate Division SR1 and Gracilibacteria Code)" - } - ] - } - }, { "id": "output_GenomeSet_name", "optional": false, "advanced": false, @@ -171,18 +88,6 @@ "input_parameter": "input_text", "target_property": "input_text" }, - { - "input_parameter": "scientific_name", - "target_property": "ncbi_taxon_id" - }, - { - "input_parameter": "domain", - "target_property": "domain" - }, - { - "input_parameter": "genetic_code", - "target_property": "genetic_code" - }, { "narrative_system_variable": "timestamp_epoch_ms", "target_property": "relation_engine_timestamp_ms" diff --git a/ui/narrative/methods/reannotate_microbial_genome/display.yaml b/ui/narrative/methods/reannotate_microbial_genome/display.yaml deleted file mode 100644 index d052f21..0000000 --- a/ui/narrative/methods/reannotate_microbial_genome/display.yaml +++ /dev/null @@ -1,294 +0,0 @@ -# -# Define basic display information -# -#name : Annotate Microbial Genome with RASTtk - v1.073 (*Deprecated by 'Annotate Genome/Assembly with RASTtk - v1.073' in 6 months) -name : Annotate Microbial Genome with RASTtk - v1.073 - -tooltip : | - Annotate or re-annotate bacterial or archaeal genome using RASTtk (Rapid Annotations using Subsystems Technology toolkit). - -screenshots : - [] - -icon: rast-red.png - -# -# Define the set of other narrative methods that should be suggested to the user. -# -suggestions : - apps: - related : - [] - next : - [build_fba_model, build_species_tree] - methods: - related : - [] - next : - [annotate_domains_in_genome_generic] - - -# -# Configure the display and description of the parameters -# -parameters : - input_genome : - ui-name : | - Genome - short-hint : | - A genome (set of DNA contigs) including structural and functional annotations. - - call_features_rRNA_SEED : - ui-name : | - Call rRNAs - short-hint : | - Call rRNA non-coding features with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_tRNA_trnascan : - ui-name : | - Call tRNA trnascan - short-hint : | - Call tRNA non-coding features with trnascan in RAST. See http://rast.nmpdr.org/ for more information. - - call_selenoproteins : - ui-name : | - Call selenoproteins - short-hint : | - Call new selenoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - - call_pyrrolysoproteins : - ui-name : | - Call pyrrolysoproteins - short-hint : | - Call new pyrrolysoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_repeat_region_SEED : - ui-name : | - Call SEED repeat region - short-hint : | - Call SEED-defined non-coding repeat regions. See http://rast.nmpdr.org/ for more information. - - call_features_insertion_sequences : - ui-name : | - Call features insertion sequences - short-hint : | - Call non-coding features insertion sequences with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_strep_suis_repeat : - ui-name : | - Call strep suis repeats - short-hint : | - Call non-coding strep suis repeats with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_strep_pneumo_repeat : - ui-name : | - Call strep pneumo repeats - short-hint : | - Call non-coding strep pneumo repeats with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_crispr : - ui-name : | - Call crisprs - short-hint : | - Call non-coding crisprs with RAST. See http://rast.nmpdr.org/ for more information. - - call_features_CDS_glimmer3 : - ui-name : | - Call glimmer3 - short-hint : | - Call CDS features with glimmer3. See http://rast.nmpdr.org/ for more information. WARNING: Calling new genes will delete existing genes. Glimmer will not run if all contigs < 2000nt. - - call_features_CDS_prodigal : - ui-name : | - Call prodigal - short-hint : | - Call CDS features with prodigal. See https://github.com/hyattpd/Prodigal for more information. WARNING: Calling new genes will delete existing genes. - - annotate_proteins_kmer_v2 : - ui-name : | - Annotate proteins kmer v2 - short-hint : | - Annotate proteins kmer v2 with RAST. See http://rast.nmpdr.org/ for more information. - - kmer_v1_parameters : - ui-name : | - Annotate proteins Kmer v1 - short-hint : | - Annotate proteins kmer v1 with RAST. See http://rast.nmpdr.org/ for more information. - - annotate_proteins_similarity : - ui-name : | - Annotate proteins similarity - short-hint : | - Annotate proteins similarity with RAST. See http://rast.nmpdr.org/ for more information. - - retain_old_anno_for_hypotheticals : - ui-name : | - Retain old annotations for hypotheticals - short-hint : | - Retain old annotations for hypotheticals. - - resolve_overlapping_features : - ui-name : | - Resolve overlapping features - short-hint : | - Resolve overlapping features. See http://rast.nmpdr.org/ for more information. - - call_features_prophage_phispy : - ui-name : | - Call features prophage phispy - short-hint : | - Call features prophage phispy with RAST. See http://rast.nmpdr.org/ for more information. - - output_genome : - ui-name : | - Output Genome Name - short-hint : | - A genome with updated structural and functional annotations. - placeholder : | - Name to assign genome - -description : | -

This KBase annotation App (Annotate Microbial Genome) uses components from the RAST (Rapid Annotations using Subsystems Technology) toolkit [1,2,3] to annotate a prokaryotic genome or to update the annotations of a genome.

-

The release versions of the RASTtk component services used in this app are: -

-

- -

The Annotate Microbial Genome App takes a Genome-typed object as input and allows users to annotate or re-annotate the genome. This will make the annotations consistent with other KBase genomes and prepare the genome for further analysis by other KBase Apps, especially the Metabolic Modeling Apps.

- -

A Genome object can be imported or generated with one of the following annotation Apps or their multi-object versions: -

-

- -

The Default Annotation Pipeline
Clicking "Run" will run the default pipeline. For a typical 2-5 MBp genome, the default annotation pipeline should take about 5 minutes. It is assumed that Genomes already have some annotation. As a result, the default behavior of this App is to use SEED to re-annotate just the protein-encoding genes. The default pipeline for this App consists of the following steps: -

    -
  • DNA/RNA-based predictions -
      -
    1. Call rRNAs (default = off)
      Predict rRNAs in the genome. This is a custom BLAST-based tool for finding rRNAs.
    2. -
    3. Call tRNAs with tRNAscan (default = off)
      Predict tRNAs in the genome with tRNAscan-SE [6].
    4. -
    5. Call CRISPRs (default = off)
      This is a custom tool that uses a perl regular expression-based search to find CRISPR elements.
    6. -
    7. Find prophage elements with phispy (default = off)
      This will use the phispy program to find prophage elements [13].
    8. -
  • - -
  • Gene predictions -
      -
    1. Call protein-encoding genes with both Prodigal [11] and Glimmer3 [12] (default = off)
      These options will delete all existing genes in the genome object and replace them with the selected predictions.
    2. -
    3. Call selenoproteins and pyrrolysylproteins [7] (default = off)
      These are custom BLAST-based tools.
    4. -
  • - -
  • Repeats -
      -
    1. Call SEED large repeat regions (default = off)
      This is a BLASTn search within the genome for regions greater than 95% nucleotide similarity greater than or equal to 100bp in length.
    2. -
    3. Find Streptococcus repeat regions [9, 10] (default = off)
      This is a command should only be implemented if the genus is Streptococcus.
    4. -
  • - -
  • Add SEED Functions/Annotation to protein-encoding genes (k-mers needed for Metabolic Modeling) -
      -
    1. Annotate protein-encoding genes with k-mers (version 2; default = on)
      This is a set of signature k-mers (amino acid 8-mers) built from the annotations in the CoreSEED. The CoreSEED is a database of ~1,000 diverse microbial genomes and is currently the main focus of the RAST manual annotation efforts. Annotating using this k-mer set provides the user with our most stable and best estimate of the core gene functions.
    2. - -
    3. Annotate remaining hypothetical proteins with k-mers (version 1; default = on)
      This set of k-mers is built from the FigFam collection [8] in the PubSEED, which is the publically annotated version of the SEED database that consists of ~12,000 microbial genomes. The "classic" version of RAST on the RAST website (http://rast.nmpdr.org) uses the FigFam-based k-mers (hence the version 1 designation).
    4. - -
    5. Annotate remaining hypothetical proteins by protein similarity (default = on)
      We have several non-redundant databases for the most common genera. If the genus name of your organism matches one of these, a search will be performed against the remaining hypothetical proteins to attempt to find a function. The search uses a combination of BLAT [4] and BLAST [5].
    6. -
  • - -
  • Other -
      -
    1. Perform a basic gene overlap removal (default = off)
      Using multiple gene calling algorithms can result in overlapping gene calls. This program is a custom tool that attempts to minimize overlaps and gaps to provide a set of calls that has a smaller number of gene calling errors. We do not recommend using overlap removal if you are attempting to annotate phage.
    2. - -
    3. Retain old annotations for hypotheticals (default = off)
      In instances where the pipeline fails to find an annotation for a gene, this will retain the original annotation from the input Genome-typed object.
    4. -
  • -
-

- -

Advanced Annotation Options
If you wish to customize the features in your annotation, click the "show advanced options" link. This will display the full set of available annotation options.

- -

The Results -

    -
  • The Objects section has a table of all the data objects that were created by the App. Click on the name of the data object to open a data viewer cell (below the currently selected cell).
  • -
  • The Summary section give details about the coding and noncoding features that were created and the average protein length.
  • -
-

- -

GUI Output
The GUI output currently consists of three tabs. The "Overview" tab provides basic information on the annotation job, the "Browse Features" tab allows the user to scroll through the features that were called, and the "Browse Contigs" tab provides information on the contigs in the genome. Users can sort on the various types of features. Note that some features will overlap (e.g., "prophage" and "CDS").

- -

Additional Information
For more information on the steps of the default RAStk pipeline please refer to our publication on this (publication forthcoming). For more detailed tutorial information and to explore the additional functionality of RASTtk not currently available in the Narrative interface, please refer to http://tutorial.theseed.org.

- -

Team members who developed & deployed algorithm in KBase: - Thomas Brettin, James Davis, Terry Disz, Robert Edwards, Chris Henry, Gary Olsen, Robert Olson, Ross Overbeek, Bruce Parrello, Gordon Pusch, Roman Sutormin, and Fangfang Xia. For questions, please contact us.

- -

The authors of RAST request that if you use the results of this annotation in your work, please cite the first three listed publications:

- - - -publications : - - - display-text: | - [1] Aziz RK, Bartels D, Best AA, DeJongh M, Disz T, Edwards RA, et al. The RAST Server: Rapid Annotations using Subsystems Technology. BMC Genomics. 2008;9: 75. doi:10.1186/1471-2164-9-75 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-9-75 - - - - display-text: | - [2] Overbeek R, Olson R, Pusch GD, Olsen GJ, Davis JJ, Disz T, et al. The SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST). Nucleic Acids Res. 2014;42: D206–D214. doi:10.1093/nar/gkt1226 - link: https://academic.oup.com/nar/article/42/D1/D206/1062536 - - - - display-text: | - [3] Brettin T, Davis JJ, Disz T, Edwards RA, Gerdes S, Olsen GJ, et al. RASTtk: A modular and extensible implementation of the RAST algorithm for building custom annotation pipelines and annotating batches of genomes. Sci Rep. 2015;5. doi:10.1038/srep08365 - link: https://www.nature.com/articles/srep08365 - - - - display-text: | - [4] Kent WJ. BLAT—The BLAST-Like Alignment Tool. Genome Res. 2002;12: 656–664. doi:10.1101/gr.229202 - link: https://genome.cshlp.org/content/12/4/656 - - - - display-text: | - [5] Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 1997;25: 3389-3402. doi:10.1093/nar/25.17.3389 - - - - display-text: | - [6] Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. Nucleic Acids Res. 1997;25: 955–964. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC146525/ - - - - display-text: | - [7] Cobucci-Ponzano B, Rossi M, Moracci M. Translational recoding in archaea. Extremophiles. 2012;16: 793–803. doi:10.1007/s00792-012-0482-8 - link: https://www.ncbi.nlm.nih.gov/pubmed/23015064 - - - - display-text: | - [8] Meyer F, Overbeek R, Rodriguez A. FIGfams: yet another set of protein families. Nucleic Acids Res. 2009;37 6643-54. doi:10.1093/nar/gkp698. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2777423/ - - - - display-text: | - [9] van Belkum A, Sluijuter M, de Groot R, Verbrugh H, Hermans PW. Novel BOX repeat PCR assay for high-resolution typing of Streptococcus pneumoniae strains. J Clin Microbiol. 1996;34: 1176–1179. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC228977/ - - - - display-text: | - [10] Croucher NJ, Vernikos GS, Parkhill J, Bentley SD. Identification, variation and transcription of pneumococcal repeat sequences. BMC Genomics. 2011;12: 120. doi:10.1186/1471-2164-12-120 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-12-120 - - - - display-text: | - [11] Hyatt D, Chen G-L, Locascio PF, Land ML, Larimer FW, Hauser LJ. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010;11: 119. doi:10.1186/1471-2105-11-119 - link: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119 - - - - display-text: | - [12] Delcher AL, Bratke KA, Powers EC, Salzberg SL. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics. 2007;23: 673–679. doi:10.1093/bioinformatics/btm009 - link: https://academic.oup.com/bioinformatics/article/23/6/673/419055 - - - - display-text: | - [13] Akhter S, Aziz RK, Edwards RA. PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies. Nucleic Acids Res. 2012;40: e126. doi:10.1093/nar/gks406 - link: https://academic.oup.com/nar/article/40/16/e126/1027055 diff --git a/ui/narrative/methods/reannotate_microbial_genome/img/rast-red.png b/ui/narrative/methods/reannotate_microbial_genome/img/rast-red.png deleted file mode 100644 index dc5f3609b7ac91605659756ec978ad83ca535e6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6071 zcmZ`cWmuF^vo9q}hctpqBNEcmy>x>#NO!kLEse6k(%oIsE)CKM2ur63k^)Ld!^I!p zpYPuLV`iRb&YYQZX6DSyiP2D#$HS(=1^@t0Q9)Mgkq-X@(33}8qhFi*NU+=#3_Jk< z=jlH{0Wz~G003LvUPeYk!_L*q)zi+^4Xh|519tOpwXt`y1^~aM9Bo@2?L9KFi?uUp z<>-)PWmhdy5Liq4LkLMCGaDTip;9zm!6K<RIQ1Hw&J;ML?0!L5*{CHfvV)IYef#VHwz z&FBUYf*P&pJntA#fXg6J(UgGrYHk`R=ToOEVwARirgJ7#II321NtZ&`x3kw8e!WkvuBV3bTpijtoQfS+1N z%K~;HK=tICSUEud6+mL6+%E(yZ~(6q^=uS?Ppv@D1R+iX0Kx@cX+}r90MG+~x5Eq! zen5BzKms|@6aDkDmT-sd(Nh@>q6kJIxlkhzhdZX89w#m9gd+J%5&?5m^Gr$3KK~3# zo=`r5@26h@AU~1x@wTUTeiQh$6B7ck7JM^~{q83Z^cEJI_dh4A+#~^D!z*a&o}H_a zDn#T-i1U3R(-FFzDNerk#Rr=P0?B3|e{V_e)a{>d7DWQ*%hQAnKntrhQvCe*jCce%Od~y&+2}tB9H?c(ftIR^E@743; z0sv0hU3+I(L1-a%VH=aa562RB^7%|ah>arL9RN&a898-E8zqK703e$m%JxZ$`mC3N zz30iZUW}DqoZGho;nIx#ebR)|*j6FWJl?*jhDx(Wj(?(Id&@N=NyFEpX&0H~hR@lj z`x#%%jo{iGl-tYN5sHN_H4J)cPPY_|ViBv)00WU{L|oGKDxed8U<8julW5W_C-N(B zYsKn-bron%#5_=iqKqMJi6X-Q5A-&&L!LW1v_VsB2De!TQ7%Xo4k^U>X64158Bdm< z_06M+O6;RR!OpiTJA$Qnahai4J-ZZ`y!}qEI0wH-*S}`RO0pcQr?1AX8l?L4rW&n1 z=G=0!-WQ!XRIUe00Nqy_Y)&hsps%8@wxCcBe#Xd0w1g#yg%+;a$3mY}uH3}jMs(Fr zVZp;4rLVxwFh?B!l#4JRTBcu+kvK+%hXs* zDUYhOEqB$CC=N2Xi+N)3MdgdSOtEHuz1e2RAQ;DzveA#7&JnLF(mRyDpSgZ~@x2dQ zd-@$s`v|>E56f~&JLTF9ZbI>;hLl*0S>$cwCopn$v=_Bk%dpGHRKD)PAmz~Sw>OX13Wqh)XJ3%2j z!0+xVO2aj@{6|y9v-Y#Tn2xZ823ESPS#G=eAasFTSGrX_LlON&(?|!TZW|u zM=c#L5r#qrCHlICCXJAqoEoON_L}xGqB4Y9l3G>nob8uSe_BRbDq1N&{akpmT-)N@ z5@>E~L2lbRQ2XU5sWU0CEimp*3==PE5xu^0XJIoerBd0TQ@glE`k^oJN_=on|3IA<31;OcPamXg=GyUE78>GPbd z#;m@>^5wcCfg}5)sFl30e0eW&4R}4jeHoVduK9CH0w*u!mE(GKkh+X|sBQBO1D7RN zqF&AXr`8hR$_w*ja`C_79(kMMo1%hiJZlDKUj=(whg-+gzqsa_MXbvNGdx&5ynC1e z%0ie>Y|&yur9zERZ!X`xr|X7E2rpazZCE^%`k0T9B9)4V)DOAszgL{x-cqy)?@TAyl)JB{CWYDLC#UG`cwRdy}QmRnt@ zjzfu`O}vuRL3By5jjv5&kkiKC>#Tl`zM_-zGa*j4S^*cQuJzl))2JUgKX5h!$kVxo zP2BtoCi>1I%E&4>iP?oyB~_x;UMK&SB4jHk`^a?i)Rzf{?cYb!XG_mDB`_7eAY;r% zt9GKmruq*4E3t#$pK*Yo^0f!!iqx~pTzI?smLU~8>s(>)$DDwJB4 z`*Qs7Yvbs0f?)P>-oqhrAMpmfr6f{WkAp^kyR+Iss6o`-e;Ip|6ys&&nvI{bH=AEa=1_~1#>Cc= zER&~^1~0I#`2D1gavkl?SV?ZGsOm>pP1;P`>}{`cQ%RFLcmS$Q^ zM=O5r<$*l@(DSbQoMT@OJOMo2xa`3n3~sXiTYDE+>ld_m!b?6M1FAPLkYzf%0oLF}ta6N7#^xBw~d-$*;(jhWoeDc}rq+$8zQ@zi6nRUWu&ih{bqW4Ki z^`Z9c_RLy@u_NN>U1xKE&BY#duHeM#S*QOWvWL0ZTp_X5&eM0U7XugP>tw4z9xQhg zW9^H+EJrmR>K$fhl7~`@GOJOu5=Qqc3&$(L4n4m4r&BBP7f&L>Chrr^;|XLi;xppM zBJm;*a$k!J3H^}pxZArelbN)dWX>hOFT5YCB`4zwx|}_jpO{LYDo8s?OX_xhUvjtK zb3j?QJlHo#+co0H%WZo0=H{^1cg2_HxM-a~ed|3qnDxQ#s{UO2TiHz#Wm3`uI$HYa zttBtV^5eV-wp7qk1%P)<01y%e09Ox>bPoW$Uje}WTL2JF2LN(cxY+;%0C0aO%1Y_@ zE&a+dQ!v(}9qMKn2&@S9!RUo7|1poCC}8HWW9N$;AeI7E;0OdxN93GUxX>*MV)eEz zMWvV3+~lh}ho@_R^v~!@HJho2HSdT|uXrQcs%M>^U0{JQs!HD!b7Ub}2J2 z_6kqALO49(U2fq9>{L>N4%Agl=gL%!!jd7HHagR?R-V>XU<}GCXI$mBqhXW;E@sBn zXs6nKY-z&&5DQ`^l6;C;7)9`gD+X6W7L3(s2tsJf=a5Z>2Gfk=A?)APW0mKQrqY<1 zVoVNuX0CYABp8q4W^9n`HR9?_eDXrRsu&YqX&=lAv@qse@CIvb%fN!RLQsngr_sue zx4EFNSt!an3qmL#ZK|Mif;=Q&C=oX#hh?HEH85it3=^fE(RfG_DS0gMwdvX<6d4A{ zAcj!}^gpkAUVoO5Y7)d+P%@HEoZdfkKYo$g_()YvD|q{DixV zcCkUVzLo(qnQ+6Y;ti|J8A#c%U_2k)Ui5XPZYt9|AenyNgaI^#1{RA=CK3Z0Lj+WJ3}{-t9Vl*3U>gP>q9z5yyj2FZfBN zQm4DO21g>w!9{p`JIT?}^x|PSExxI?(>sB(mNO+fNIkq(YP5>YdwV~hGx#*6Z`CB! zbXFC*eT@h>3ffkLg&o(~+cO7C6W-t>`Tta8UcW`L( ze0H@4;$9>+S)}5YUYLi82;RH9tNQCm-c~K_rfk)7p^&7yQzzs-J@2ksVFdp<@}fp< z($4mc`U^R@dC@4sVl`cT`TF{&9vZ`m{dH+JW%xJ$AuL_?Cl9 zz;)Xo4_VQx{`V=$Cbfz2YRBG;qi5HTnh#<=FPu5`j@vN|6E(CuPqf~VOq&XKrp+(S z&{iRN&uq}Kx&*L@xdvL(>`ogZz!hT5C4 zJlov}^jWsiA|fni{mm;vy9G})p7TY4`Z3d*U^AL{7(rZ(qC{KMx%?xTSV11iznFbc zUwqqC&$FiCZZuhR5@2+mr3|u{#!phYd^(_=B_)Ey57l=9Qo;NO&Ss#gpLK$ zzCX66Sm4mT{<#M~__vmoe7sT`)pBH(WH(f=< zs3)|r@xmhY!P#jS?4?A>oeTV+Rk_g$(e`&_xO`uD zsixRr|3Fu?ZA;OQwH~B3KecVYIJ{Y4t{z~8kV}}EdfDnvTF1N3d#6{e-P8C*CiHL) z3T9ICa#}g9)NcjBg`qzMW)9A`D^?G~ZEhq!kzK9S5HU(6toZTqIhmlda6A7=Y6J6> zZ>-HEAKtegGhjit(jHS_HR#mY%)iZ^-^F1jS|TtNSr3JWi!~GWYhwmnZt$CB@uoXO zg$?n=SzR4|Hf7pF+)y?if-Kg1JJ&AEv&aUDCV70e^Y;n6;mLZsJj*cf9!3pykYI>_ z_r6&!ZE7#a_^9{fnxB^<@7;6#;3MN8SM(PRRA1ue-yiL}D8=w_aB;8BCywQIj%4m^ zM9o*5-VDx4X>p))<7iE393MN%*h(jjX>^pg)G32) zmJ%k$Mop>TbZ2{~gkYY4ql-IhgnV0{);2@5&FW|kKo&~M9tbwfK)Hvj1B@Jml03@* zcDep!v*ct-e%Gc)40F@0t%BUGMcV7zACg;AOE`3OQu;_zS-HRRWSv zkStR8+&;!H{Y)+j=j54`OF|SFWKk^#qlcpV<0IKnv|jm|vNfB%7GnAF#<5BENyD0? zbsOf^-z-<7K$yhCjoF~^&UdA)Eo|fEtadi4ebTOP)(Rqd<|ssrInSAoDMa2d6R8X| zrcKyn_l&)wyvnLn%Aujvn(Um6<2`!x<$mU6mb)a~bM+=NGiQ(?Ne{CO)O-mn5M{_Dn zX)r%k+6EI)_vCvli`;-zgIRCq3X8?s58G8_q$hI*)OazAW4!oYG9kP0(Or(+NmNBq zLsMX6C*4SS0>Qs(?e?3h^SPAAQ!B1muaX(`UXtB8`!}@AOr1xg(O{;JV7!75qgd(r4&88M>(IzvLmaq2g}0}M3&F@dJdM_dS4KT<h zt|T4R0BNFnraiCHz7=J3MVLbld3&Cu`-uA*V^j(l#`pm)UT9LFa5=n(gh-phgx@SL zaM@@7jX<>lKMvmJHx}w5FNHB_!K+YI)hf}E4IyZ;Ec}fk0qaUZj{{7u&h@NWu+Fm( zX#t{uNqktMt~r_23v@Nj;c4?u>PC5{?K0}o#P*HkrOfE;9?7x!J+m8bw9xsWaV;kH z)*s9XOd(aiP)lNdHB37Bw?tc#d|1kDoE3T9q|fM{!dbA2V}F=+QtyS?R)$g0g7^JvbEB0q5lJ-hfp{G diff --git a/ui/narrative/methods/reannotate_microbial_genome/spec.json b/ui/narrative/methods/reannotate_microbial_genome/spec.json deleted file mode 100644 index 14f85df..0000000 --- a/ui/narrative/methods/reannotate_microbial_genome/spec.json +++ /dev/null @@ -1,323 +0,0 @@ -{ - "ver" : "1.0.0", - "name" : "Annotate a microbial genome", - "authors" : [ "chenry", "olson" ], - "contact" : "https://www.kbase.us/support/", - "visble" : true, - "categories" : ["active","annotation"], - "widgets" : { - "input" : null, - "output" : "kbaseGenomeView" - }, - "parameters" : [ { - "id" : "input_genome", - "optional" : false, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.Genome","KBaseGenomeAnnotations.GenomeAnnotation" ] - } - }, { - "id" : "output_genome", - "optional" : true, - "advanced" : false, - "allow_multiple" : false, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.Genome" ], - "is_output_name":true - } - }, { - "id": "call_features_rRNA_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_tRNA_trnascan", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_selenoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_pyrrolysoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_repeat_region_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_suis_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_pneumo_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_crispr", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_glimmer3", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_prodigal", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_kmer_v2", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "kmer_v1_parameters", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_similarity", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "retain_old_anno_for_hypotheticals", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "resolve_overlapping_features", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_prophage_phispy", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - - }], - "job_id_output_field": "docker", - "behavior" : { - "service-mapping" : { - "url" : "", - "name" : "RAST_SDK", - "method" : "annotate_genome", - "input_mapping" : [ - { - "input_parameter": "input_genome", - "target_property": "input_genome", - "target_type_transform": "resolved-ref" - }, - { - "input_parameter": "call_features_rRNA_SEED", - "target_property": "call_features_rRNA_SEED" - }, - { - "input_parameter": "call_features_tRNA_trnascan", - "target_property": "call_features_tRNA_trnascan" - }, - { - "input_parameter": "call_selenoproteins", - "target_property": "call_selenoproteins" - }, - { - "input_parameter": "call_pyrrolysoproteins", - "target_property": "call_pyrrolysoproteins" - }, - { - "input_parameter": "call_features_repeat_region_SEED", - "target_property": "call_features_repeat_region_SEED" - }, - { - "input_parameter": "call_features_strep_suis_repeat", - "target_property": "call_features_strep_suis_repeat" - }, - { - "input_parameter": "call_features_strep_pneumo_repeat", - "target_property": "call_features_strep_pneumo_repeat" - }, - { - "input_parameter": "call_features_crispr", - "target_property": "call_features_crispr" - }, - { - "input_parameter": "call_features_CDS_glimmer3", - "target_property": "call_features_CDS_glimmer3" - }, - { - "input_parameter": "call_features_CDS_prodigal", - "target_property": "call_features_CDS_prodigal" - }, - { - "input_parameter": "annotate_proteins_kmer_v2", - "target_property": "annotate_proteins_kmer_v2" - }, - { - "input_parameter": "kmer_v1_parameters", - "target_property": "kmer_v1_parameters" - }, - { - "input_parameter": "annotate_proteins_similarity", - "target_property": "annotate_proteins_similarity" - }, - { - "input_parameter": "resolve_overlapping_features", - "target_property": "resolve_overlapping_features" - }, - { - "input_parameter": "call_features_prophage_phispy", - "target_property": "call_features_prophage_phispy" - }, - { - "input_parameter": "retain_old_anno_for_hypotheticals", - "target_property": "retain_old_anno_for_hypotheticals" - }, - { - "input_parameter": "output_genome", - "generated_value": { - "prefix": "genome_" - }, - "target_property": "output_genome" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace" - } - ], - "output_mapping" : [ - { - "service_method_output_path": [0, "report_name"], - "target_property": "report_name" - }, - { - "service_method_output_path": [0, "report_ref"], - "target_property": "report_ref" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace_name" - }, - { - "input_parameter": "output_genome", - "target_property": "id" - }, - { - "narrative_system_variable": "workspace", - "target_property": "ws" - } - ] - } - } -} diff --git a/ui/narrative/methods/reannotate_microbial_genomes/display.yaml b/ui/narrative/methods/reannotate_microbial_genomes/display.yaml deleted file mode 100644 index aea35a2..0000000 --- a/ui/narrative/methods/reannotate_microbial_genomes/display.yaml +++ /dev/null @@ -1,270 +0,0 @@ -# -# Define basic display information -# -#name : Annotate Multiple Microbial Genomes with RASTtk - v1.073 (*Deprecated by 'Bulk Annotate Genomes/Assemblies with RASTtk - v1.073' in 6 months) -name : Annotate Multiple Microbial Genomes with RASTtk - v1.073 - -tooltip : | - Annotate or re-annotate bacterial or archaeal genomes and/or genome sets using RASTtk (Rapid Annotations using Subsystems Technology toolkit). -screenshots : - [] - -icon: rast-red.png - -# -# Define the set of other narrative methods that should be suggested to the user. -# -suggestions : - apps: - related : - [] - next : - [build_fba_model, build_species_tree] - methods: - related : - [] - next : - [annotate_domains_in_a_genome] - - -# -# Configure the display and description of the parameters -# -parameters : - input_genomes : - ui-name : | - Genomes/GenomeSets - short-hint : | - One or more Genomes and/or GenomeSets to reannotate. - genome_text : - ui-name : | - Genome list - short-hint : | - List of genomes to reannotate, delimited with semicolon (;). This list is in addition to the input genomes above. Example names: Carsonella_rudii_PC;Carsonella_rudii_CE [no spaces] or IDs 21677/17/1;21677/16/1; or delimited by new lines (list each name or ID on a separate line). You can also mix names and IDs as long as there are no spaces (e.g., Carsonella_rudii_PC;21677/18/1). - call_features_rRNA_SEED : - ui-name : | - Call rRNAs - short-hint : | - Call rRNA non-coding features with RAST. See http://rast.nmpdr.org/ for more information. - call_features_tRNA_trnascan : - ui-name : | - Call tRNA trnascan - short-hint : | - Call tRNA non-coding feature with trnascan in RAST. See http://rast.nmpdr.org/ for more information. - call_selenoproteins : - ui-name : | - Call selenoproteins - short-hint : | - Call new selenoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - call_pyrrolysoproteins : - ui-name : | - Call pyrrolysoproteins - short-hint : | - Call new pyrrolysoprotein-containing genes with RAST. See http://rast.nmpdr.org/ for more information. - call_features_repeat_region_SEED : - ui-name : | - Call SEED repeat region - short-hint : | - Call SEED-defined non-coding repeat regions. See http://rast.nmpdr.org/ for more information. - call_features_insertion_sequences : - ui-name : | - Call features insertion sequences - short-hint : | - Call non-coding features insertion sequences with RAST. See http://rast.nmpdr.org/ for more information. - call_features_strep_suis_repeat : - ui-name : | - Call strep suis repeats - short-hint : | - Call non-coding strep suis repeats with RAST. See http://rast.nmpdr.org/ for more information. - call_features_strep_pneumo_repeat : - ui-name : | - Call strep pneumo repeats - short-hint : | - Call non-coding strep pneumo repeats with RAST. See http://rast.nmpdr.org/ for more information. - call_features_crispr : - ui-name : | - Call crisprs - short-hint : | - Call non-coding crisprs with RAST. See http://rast.nmpdr.org/ for more information. - call_features_CDS_glimmer3 : - ui-name : | - Call glimmer3 - short-hint : | - Call CDS features with glimmer3. See http://rast.nmpdr.org/ for more information. WARNING: Calling new genes will delete existing genes. Glimmer will not run if all contigs < 2000nt. - call_features_CDS_prodigal : - ui-name : | - Call prodigal - short-hint : | - Call CDS features with prodigal. See https://github.com/hyattpd/Prodigal for more information. WARNING: Calling new genes will delete existing genes. - annotate_proteins_kmer_v2 : - ui-name : | - Annotate proteins kmer v2 - short-hint : | - Annotate proteins kmer v2 with RAST. See http://rast.nmpdr.org/ for more information. - kmer_v1_parameters : - ui-name : | - Annotate proteins Kmer v1 - short-hint : | - Annotate proteins kmer v1 with RAST. See http://rast.nmpdr.org/ for more information. - annotate_proteins_similarity : - ui-name : | - Annotate proteins similarity - short-hint : | - Annotate proteins similarity with RAST. See http://rast.nmpdr.org/ for more information. - retain_old_anno_for_hypotheticals : - ui-name : | - Retain old annotations for hypotheticals - short-hint : | - Retain old annotations for hypotheticals. - resolve_overlapping_features : - ui-name : | - Resolve overlapping features - short-hint : | - Resolve overlapping features. See http://rast.nmpdr.org/ for more information. - call_features_prophage_phispy : - ui-name : | - Call features prophage phispy - short-hint : | - Call features prophage phispy with RAST. See http://rast.nmpdr.org/ for more information. - output_genome : - ui-name : | - Optional Output GenomeSet Name - short-hint : | - The name of a genome set of the output annotated genomes. - placeholder : | - Name to assign genome set -parameter-groups: - genomes: - ui-name: | - Genomes to reannotate - short-hint: | - Genomes to reannotate. -description : | -

This KBase annotation App (Annotate Multiple Microbial Genomes uses components from the RAST (Rapid Annotations using Subsystems Technology) toolkit [1,2,3] to annotate prokaryotic genomes, to update the annotations of genomes, or to perform computations on a set of genomes so that they are consistent. The newly generated genomes will have the same names as the input genomes with “.RAST” appended.

-

The release versions of the RASTtk component services used in this app are: -

-

-

This KBase annotation App (Annotate Multiple Microbial Genomes uses components from the RAST (Rapid Annotations using Subsystems Technology) toolkit [1,2,3] to annotate prokaryotic genomes, to update the annotations of genomes, or to perform computations on a set of genomes so that they are consistent. The newly generated genomes will have the same names as the input genomes with “.RAST” appended.

- -

The Annotate Multiple Microbial Genomes App, takes genomes as input and allows users to annotate or re-annotate the genomes. This will make the annotations consistent with other KBase genomes and prepare the genomes for further analysis by other KBase Apps, especially the Metabolic Modeling Apps. A Genome object can be generated by uploading a GenBank file, importing a GenBank file from NCBI via FTP, retrieving a Genome-typed object from KBase, or using the output of the Annotate Microbial Assembly App.

-

A Genome object can be imported or generated with one of the following annotation Apps or their multi-object versions: -

-

-

The Default Annotation Pipeline
Clicking "Run" will run the default pipeline. For a typical 2-5 MBp genome, the default annotation pipeline should take about 5 minutes. It is assumed that Genomes already have some annotation. As a result, the default behavior of this App is to use SEED to re-annotate just the protein-encoding genes. The default pipeline for this App consists of the following steps: -

    -
  • DNA/RNA-based predictions -
      -
    1. Call rRNAs (default = off)
      Predict rRNAs in the genome. This is a custom BLAST-based tool for finding rRNAs.
    2. -
    3. Call tRNAs with tRNAscan (default = off)
      Predict tRNAs in the genome with tRNAscan-SE [6].
    4. -
    5. Call CRISPRs (default = off)
      This is a custom tool that uses a perl regular expression-based search to find CRISPR elements.
    6. -
    7. Find prophage elements with phispy (default = off)
      This will use the phispy program to find prophage elements [13].
    8. -
  • -
  • Gene predictions -
      -
    1. Call protein-encoding genes with both Prodigal [11] and Glimmer3 [12] (default = off)
      These options will delete all existing genes in the genome object and replace them with the selected predictions.
    2. -
    3. Call selenoproteins and pyrrolysylproteins [7] (default = off)
      These are custom BLAST-based tools.
    4. -
  • -
  • Repeats -
      -
    1. Call SEED large repeat regions (default = off)
      This is a BLASTn search within the genome for regions greater than 95% nucleotide similarity greater than or equal to 100bp in length.
    2. -
    3. Find Streptococcus repeat regions [9, 10] (default = off)
      This is a command that should only be implemented if the genus is Streptococcus.
    4. -
  • -
  • Add SEED Functions/Annotation to protein-encoding genes (k-mers needed for Metabolic Modeling) -
      -
    1. Annotate protein-encoding genes with k-mers (version 2; default = on)
      This is a set of signature k-mers (amino acid 8-mers) built from the annotations in the CoreSEED. The CoreSEED is a database of ~1,000 diverse microbial genomes and is currently the main focus of the RAST manual annotation efforts. Annotating using this k-mer set provides the user with our most stable and best estimate of the core gene functions.
    2. -
    3. Annotate remaining hypothetical proteins with k-mers (version 1; default = on)
      This set of k-mers is built from the FigFam collection [8] in the PubSEED, which is the publically annotated version of the SEED database that consists of ~12,000 microbial genomes. The "classic" version of RAST on the RAST website (http://rast.nmpdr.org) uses the FigFam-based k-mers (hence the version 1 designation).
    4. -
    5. Annotate remaining hypothetical proteins by protein similarity (default = on)
      We have several non-redundant databases for the most common genera. If the genus name of your organism matches one of these, a search will be performed against the remaining hypothetical proteins to attempt to find a function. The search uses a combination of BLAT [4] and BLAST [5].
    6. -
  • -
  • Other -
      -
    1. Perform a basic gene overlap removal (default = off)
      Using multiple gene calling algorithms can result in overlapping gene calls. This program is a custom tool that attempts to minimize overlaps and gaps to provide a set of calls that has a smaller number of gene calling errors. We do not recommend using overlap removal if you are attempting to annotate phage.
    2. -
    3. Retain old annotations for hypotheticals (default = off)
      In instances where the pipeline fails to find an annotation for a gene, this will retain the original annotation from the input Genome-typed object.
    4. -
  • -
-

-

Advanced Annotation Options
If you wish to customize the features in your annotation, click the "show advanced options" link. This will display the full set of available annotation options.

-

The Results -

    -
  • The Objects section has a table of all the data objects that were created by the App. Click on the name of the data object to open a data viewer cell (below the currently selected cell).
  • -
  • The Summary section gives details about the coding and noncoding features that were created and the average protein length.
  • -
  • The Files section has a downloadable version of the Summary.
  • -
-

-

GUI Output
The GUI output currently consists of three tabs. The "Overview" tab provides basic information on the annotation job, the "Browse Features" tab allows the user to scroll through the features that were called, and the "Browse Contigs" tab provides information on the contigs in the genome. Users can sort on the various types of features. Note that some features will overlap (e.g., "prophage" and "CDS").

-

Additional Information
For more information on the steps of the default RAStk pipeline please refer to our publication on this (publication forthcoming). For more detailed tutorial information and to explore the additional functionality of RASTtk not currently available in the Narrative interface please refer to http://tutorial.theseed.org.

-

Team members who developed & deployed algorithm in KBase: - Thomas Brettin, James Davis, Terry Disz, Robert Edwards, Chris Henry, Gary Olsen, Robert Olson, Ross Overbeek, Bruce Parrello, Gordon Pusch, Roman Sutormin, and Fangfang Xia. For questions, please contact us.

-

The authors of RAST request that if you use the results of this annotation in your work, please cite the first three listed publications:

-publications : - - - display-text: | - [1] Aziz RK, Bartels D, Best AA, DeJongh M, Disz T, Edwards RA, et al. The RAST Server: Rapid Annotations using Subsystems Technology. BMC Genomics. 2008;9: 75. doi:10.1186/1471-2164-9-75 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-9-75 - - - - display-text: | - [2] Overbeek R, Olson R, Pusch GD, Olsen GJ, Davis JJ, Disz T, et al. The SEED and the Rapid Annotation of microbial genomes using Subsystems Technology (RAST). Nucleic Acids Res. 2014;42: D206–D214. doi:10.1093/nar/gkt1226 - link: https://academic.oup.com/nar/article/42/D1/D206/1062536 - - - - display-text: | - [3] Brettin T, Davis JJ, Disz T, Edwards RA, Gerdes S, Olsen GJ, et al. RASTtk: A modular and extensible implementation of the RAST algorithm for building custom annotation pipelines and annotating batches of genomes. Sci Rep. 2015;5. doi:10.1038/srep08365 - link: https://www.nature.com/articles/srep08365 - - - - display-text: | - [4] Kent WJ. BLAT—The BLAST-Like Alignment Tool. Genome Res. 2002;12: 656–664. doi:10.1101/gr.229202 - link: https://genome.cshlp.org/content/12/4/656 - - - - display-text: | - [5] Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 1997;25: 3389-3402. doi:10.1093/nar/25.17.3389 - - - display-text: | - [6] Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. Nucleic Acids Res. 1997;25: 955–964. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC146525/ - - - - display-text: | - [7] Cobucci-Ponzano B, Rossi M, Moracci M. Translational recoding in archaea. Extremophiles. 2012;16: 793–803. doi:10.1007/s00792-012-0482-8 - link: https://www.ncbi.nlm.nih.gov/pubmed/23015064 - - - - display-text: | - [8] Meyer F, Overbeek R, Rodriguez A. FIGfams: yet another set of protein families. - Nucleic Acids Res. 2009;37 6643-54. doi:10.1093/nar/gkp698. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2777423/ - - - - display-text: | - [9] van Belkum A, Sluijuter M, de Groot R, Verbrugh H, Hermans PW. Novel BOX repeat PCR assay for high-resolution typing of Streptococcus pneumoniae strains. J Clin Microbiol. 1996;34: 1176–1179. - link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC228977/ - - - - display-text: | - [10] Croucher NJ, Vernikos GS, Parkhill J, Bentley SD. Identification, variation and transcription of pneumococcal repeat sequences. BMC Genomics. 2011;12: 120. doi:10.1186/1471-2164-12-120 - link: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-12-120 - - - - display-text: | - [11] Hyatt D, Chen G-L, Locascio PF, Land ML, Larimer FW, Hauser LJ. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010;11: 119. doi:10.1186/1471-2105-11-119 - link: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119 - - - - display-text: | - [12] Delcher AL, Bratke KA, Powers EC, Salzberg SL. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics. 2007;23: 673–679. doi:10.1093/bioinformatics/btm009 - link: https://academic.oup.com/bioinformatics/article/23/6/673/419055 - - - - display-text: | - [13] Akhter S, Aziz RK, Edwards RA. PhiSpy: a novel algorithm for finding prophages in bacterial genomes that combines similarity- and composition-based strategies. Nucleic Acids Res. 2012;40: e126. doi:10.1093/nar/gks406 - link: https://academic.oup.com/nar/article/40/16/e126/1027055 - diff --git a/ui/narrative/methods/reannotate_microbial_genomes/img/rast-red.png b/ui/narrative/methods/reannotate_microbial_genomes/img/rast-red.png deleted file mode 100644 index dc5f3609b7ac91605659756ec978ad83ca535e6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6071 zcmZ`cWmuF^vo9q}hctpqBNEcmy>x>#NO!kLEse6k(%oIsE)CKM2ur63k^)Ld!^I!p zpYPuLV`iRb&YYQZX6DSyiP2D#$HS(=1^@t0Q9)Mgkq-X@(33}8qhFi*NU+=#3_Jk< z=jlH{0Wz~G003LvUPeYk!_L*q)zi+^4Xh|519tOpwXt`y1^~aM9Bo@2?L9KFi?uUp z<>-)PWmhdy5Liq4LkLMCGaDTip;9zm!6K<RIQ1Hw&J;ML?0!L5*{CHfvV)IYef#VHwz z&FBUYf*P&pJntA#fXg6J(UgGrYHk`R=ToOEVwARirgJ7#II321NtZ&`x3kw8e!WkvuBV3bTpijtoQfS+1N z%K~;HK=tICSUEud6+mL6+%E(yZ~(6q^=uS?Ppv@D1R+iX0Kx@cX+}r90MG+~x5Eq! zen5BzKms|@6aDkDmT-sd(Nh@>q6kJIxlkhzhdZX89w#m9gd+J%5&?5m^Gr$3KK~3# zo=`r5@26h@AU~1x@wTUTeiQh$6B7ck7JM^~{q83Z^cEJI_dh4A+#~^D!z*a&o}H_a zDn#T-i1U3R(-FFzDNerk#Rr=P0?B3|e{V_e)a{>d7DWQ*%hQAnKntrhQvCe*jCce%Od~y&+2}tB9H?c(ftIR^E@743; z0sv0hU3+I(L1-a%VH=aa562RB^7%|ah>arL9RN&a898-E8zqK703e$m%JxZ$`mC3N zz30iZUW}DqoZGho;nIx#ebR)|*j6FWJl?*jhDx(Wj(?(Id&@N=NyFEpX&0H~hR@lj z`x#%%jo{iGl-tYN5sHN_H4J)cPPY_|ViBv)00WU{L|oGKDxed8U<8julW5W_C-N(B zYsKn-bron%#5_=iqKqMJi6X-Q5A-&&L!LW1v_VsB2De!TQ7%Xo4k^U>X64158Bdm< z_06M+O6;RR!OpiTJA$Qnahai4J-ZZ`y!}qEI0wH-*S}`RO0pcQr?1AX8l?L4rW&n1 z=G=0!-WQ!XRIUe00Nqy_Y)&hsps%8@wxCcBe#Xd0w1g#yg%+;a$3mY}uH3}jMs(Fr zVZp;4rLVxwFh?B!l#4JRTBcu+kvK+%hXs* zDUYhOEqB$CC=N2Xi+N)3MdgdSOtEHuz1e2RAQ;DzveA#7&JnLF(mRyDpSgZ~@x2dQ zd-@$s`v|>E56f~&JLTF9ZbI>;hLl*0S>$cwCopn$v=_Bk%dpGHRKD)PAmz~Sw>OX13Wqh)XJ3%2j z!0+xVO2aj@{6|y9v-Y#Tn2xZ823ESPS#G=eAasFTSGrX_LlON&(?|!TZW|u zM=c#L5r#qrCHlICCXJAqoEoON_L}xGqB4Y9l3G>nob8uSe_BRbDq1N&{akpmT-)N@ z5@>E~L2lbRQ2XU5sWU0CEimp*3==PE5xu^0XJIoerBd0TQ@glE`k^oJN_=on|3IA<31;OcPamXg=GyUE78>GPbd z#;m@>^5wcCfg}5)sFl30e0eW&4R}4jeHoVduK9CH0w*u!mE(GKkh+X|sBQBO1D7RN zqF&AXr`8hR$_w*ja`C_79(kMMo1%hiJZlDKUj=(whg-+gzqsa_MXbvNGdx&5ynC1e z%0ie>Y|&yur9zERZ!X`xr|X7E2rpazZCE^%`k0T9B9)4V)DOAszgL{x-cqy)?@TAyl)JB{CWYDLC#UG`cwRdy}QmRnt@ zjzfu`O}vuRL3By5jjv5&kkiKC>#Tl`zM_-zGa*j4S^*cQuJzl))2JUgKX5h!$kVxo zP2BtoCi>1I%E&4>iP?oyB~_x;UMK&SB4jHk`^a?i)Rzf{?cYb!XG_mDB`_7eAY;r% zt9GKmruq*4E3t#$pK*Yo^0f!!iqx~pTzI?smLU~8>s(>)$DDwJB4 z`*Qs7Yvbs0f?)P>-oqhrAMpmfr6f{WkAp^kyR+Iss6o`-e;Ip|6ys&&nvI{bH=AEa=1_~1#>Cc= zER&~^1~0I#`2D1gavkl?SV?ZGsOm>pP1;P`>}{`cQ%RFLcmS$Q^ zM=O5r<$*l@(DSbQoMT@OJOMo2xa`3n3~sXiTYDE+>ld_m!b?6M1FAPLkYzf%0oLF}ta6N7#^xBw~d-$*;(jhWoeDc}rq+$8zQ@zi6nRUWu&ih{bqW4Ki z^`Z9c_RLy@u_NN>U1xKE&BY#duHeM#S*QOWvWL0ZTp_X5&eM0U7XugP>tw4z9xQhg zW9^H+EJrmR>K$fhl7~`@GOJOu5=Qqc3&$(L4n4m4r&BBP7f&L>Chrr^;|XLi;xppM zBJm;*a$k!J3H^}pxZArelbN)dWX>hOFT5YCB`4zwx|}_jpO{LYDo8s?OX_xhUvjtK zb3j?QJlHo#+co0H%WZo0=H{^1cg2_HxM-a~ed|3qnDxQ#s{UO2TiHz#Wm3`uI$HYa zttBtV^5eV-wp7qk1%P)<01y%e09Ox>bPoW$Uje}WTL2JF2LN(cxY+;%0C0aO%1Y_@ zE&a+dQ!v(}9qMKn2&@S9!RUo7|1poCC}8HWW9N$;AeI7E;0OdxN93GUxX>*MV)eEz zMWvV3+~lh}ho@_R^v~!@HJho2HSdT|uXrQcs%M>^U0{JQs!HD!b7Ub}2J2 z_6kqALO49(U2fq9>{L>N4%Agl=gL%!!jd7HHagR?R-V>XU<}GCXI$mBqhXW;E@sBn zXs6nKY-z&&5DQ`^l6;C;7)9`gD+X6W7L3(s2tsJf=a5Z>2Gfk=A?)APW0mKQrqY<1 zVoVNuX0CYABp8q4W^9n`HR9?_eDXrRsu&YqX&=lAv@qse@CIvb%fN!RLQsngr_sue zx4EFNSt!an3qmL#ZK|Mif;=Q&C=oX#hh?HEH85it3=^fE(RfG_DS0gMwdvX<6d4A{ zAcj!}^gpkAUVoO5Y7)d+P%@HEoZdfkKYo$g_()YvD|q{DixV zcCkUVzLo(qnQ+6Y;ti|J8A#c%U_2k)Ui5XPZYt9|AenyNgaI^#1{RA=CK3Z0Lj+WJ3}{-t9Vl*3U>gP>q9z5yyj2FZfBN zQm4DO21g>w!9{p`JIT?}^x|PSExxI?(>sB(mNO+fNIkq(YP5>YdwV~hGx#*6Z`CB! zbXFC*eT@h>3ffkLg&o(~+cO7C6W-t>`Tta8UcW`L( ze0H@4;$9>+S)}5YUYLi82;RH9tNQCm-c~K_rfk)7p^&7yQzzs-J@2ksVFdp<@}fp< z($4mc`U^R@dC@4sVl`cT`TF{&9vZ`m{dH+JW%xJ$AuL_?Cl9 zz;)Xo4_VQx{`V=$Cbfz2YRBG;qi5HTnh#<=FPu5`j@vN|6E(CuPqf~VOq&XKrp+(S z&{iRN&uq}Kx&*L@xdvL(>`ogZz!hT5C4 zJlov}^jWsiA|fni{mm;vy9G})p7TY4`Z3d*U^AL{7(rZ(qC{KMx%?xTSV11iznFbc zUwqqC&$FiCZZuhR5@2+mr3|u{#!phYd^(_=B_)Ey57l=9Qo;NO&Ss#gpLK$ zzCX66Sm4mT{<#M~__vmoe7sT`)pBH(WH(f=< zs3)|r@xmhY!P#jS?4?A>oeTV+Rk_g$(e`&_xO`uD zsixRr|3Fu?ZA;OQwH~B3KecVYIJ{Y4t{z~8kV}}EdfDnvTF1N3d#6{e-P8C*CiHL) z3T9ICa#}g9)NcjBg`qzMW)9A`D^?G~ZEhq!kzK9S5HU(6toZTqIhmlda6A7=Y6J6> zZ>-HEAKtegGhjit(jHS_HR#mY%)iZ^-^F1jS|TtNSr3JWi!~GWYhwmnZt$CB@uoXO zg$?n=SzR4|Hf7pF+)y?if-Kg1JJ&AEv&aUDCV70e^Y;n6;mLZsJj*cf9!3pykYI>_ z_r6&!ZE7#a_^9{fnxB^<@7;6#;3MN8SM(PRRA1ue-yiL}D8=w_aB;8BCywQIj%4m^ zM9o*5-VDx4X>p))<7iE393MN%*h(jjX>^pg)G32) zmJ%k$Mop>TbZ2{~gkYY4ql-IhgnV0{);2@5&FW|kKo&~M9tbwfK)Hvj1B@Jml03@* zcDep!v*ct-e%Gc)40F@0t%BUGMcV7zACg;AOE`3OQu;_zS-HRRWSv zkStR8+&;!H{Y)+j=j54`OF|SFWKk^#qlcpV<0IKnv|jm|vNfB%7GnAF#<5BENyD0? zbsOf^-z-<7K$yhCjoF~^&UdA)Eo|fEtadi4ebTOP)(Rqd<|ssrInSAoDMa2d6R8X| zrcKyn_l&)wyvnLn%Aujvn(Um6<2`!x<$mU6mb)a~bM+=NGiQ(?Ne{CO)O-mn5M{_Dn zX)r%k+6EI)_vCvli`;-zgIRCq3X8?s58G8_q$hI*)OazAW4!oYG9kP0(Or(+NmNBq zLsMX6C*4SS0>Qs(?e?3h^SPAAQ!B1muaX(`UXtB8`!}@AOr1xg(O{;JV7!75qgd(r4&88M>(IzvLmaq2g}0}M3&F@dJdM_dS4KT<h zt|T4R0BNFnraiCHz7=J3MVLbld3&Cu`-uA*V^j(l#`pm)UT9LFa5=n(gh-phgx@SL zaM@@7jX<>lKMvmJHx}w5FNHB_!K+YI)hf}E4IyZ;Ec}fk0qaUZj{{7u&h@NWu+Fm( zX#t{uNqktMt~r_23v@Nj;c4?u>PC5{?K0}o#P*HkrOfE;9?7x!J+m8bw9xsWaV;kH z)*s9XOd(aiP)lNdHB37Bw?tc#d|1kDoE3T9q|fM{!dbA2V}F=+QtyS?R)$g0g7^JvbEB0q5lJ-hfp{G diff --git a/ui/narrative/methods/reannotate_microbial_genomes/spec.json b/ui/narrative/methods/reannotate_microbial_genomes/spec.json deleted file mode 100644 index 8585cb6..0000000 --- a/ui/narrative/methods/reannotate_microbial_genomes/spec.json +++ /dev/null @@ -1,328 +0,0 @@ -{ - "ver" : "1.0.0", - "name" : "Annotate multiple microbial genomes", - "authors" : [ "chenry", "olson" ], - "contact" : "https://www.kbase.us/support/", - "visble" : true, - "categories" : ["active","annotation"], - "widgets" : { - "input" : null, - "output" : "no-display" - }, - "parameters" : [ { - "id" : "input_genomes", - "optional" : true, - "advanced" : false, - "allow_multiple" : true, - "default_values" : [ "" ], - "field_type" : "text", - "text_options" : { - "valid_ws_types" : [ "KBaseGenomes.Genome","KBaseGenomeAnnotations.GenomeAnnotation", "KBaseSearch.GenomeSet" ] - } - }, { - "id": "genome_text", - "optional": true, - "advanced": false, - "allow_multiple": false, - "default_values": [ "" ], - "field_type": "textarea", - "textarea_options" : { - "n_rows" : 10 - } - }, { - "id": "call_features_rRNA_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_tRNA_trnascan", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_selenoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_pyrrolysoproteins", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_repeat_region_SEED", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_suis_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_strep_pneumo_repeat", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_crispr", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_glimmer3", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_CDS_prodigal", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_kmer_v2", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "kmer_v1_parameters", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "annotate_proteins_similarity", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["1"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "retain_old_anno_for_hypotheticals", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "resolve_overlapping_features", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "call_features_prophage_phispy", - "optional":false, - "advanced":true, - "allow_multiple":false, - "default_values":["0"], - "field_type" : "checkbox", - "checkbox_options":{ - "checked_value": 1, - "unchecked_value": 0 - } - }, { - "id": "output_genome", - "optional": false, - "advanced": false, - "allow_multiple": false, - "default_values": [ "" ], - "field_type": "text", - "text_options" : { - "valid_ws_types" : [ "KBaseSearch.GenomeSet" ], - "is_output_name":true - } - }], - "job_id_output_field": "docker", - "behavior" : { - "service-mapping" : { - "url" : "", - "name" : "RAST_SDK", - "method" : "annotate_genomes", - "input_mapping" : [ - { - "input_parameter": "input_genomes", - "target_property": "input_genomes", - "target_type_transform": "resolved-ref" - }, - { - "input_parameter": "genome_text", - "target_property": "genome_text" - }, - { - "input_parameter": "call_features_rRNA_SEED", - "target_property": "call_features_rRNA_SEED" - }, - { - "input_parameter": "call_features_tRNA_trnascan", - "target_property": "call_features_tRNA_trnascan" - }, - { - "input_parameter": "call_selenoproteins", - "target_property": "call_selenoproteins" - }, - { - "input_parameter": "call_pyrrolysoproteins", - "target_property": "call_pyrrolysoproteins" - }, - { - "input_parameter": "call_features_repeat_region_SEED", - "target_property": "call_features_repeat_region_SEED" - }, - { - "input_parameter": "call_features_strep_suis_repeat", - "target_property": "call_features_strep_suis_repeat" - }, - { - "input_parameter": "call_features_strep_pneumo_repeat", - "target_property": "call_features_strep_pneumo_repeat" - }, - { - "input_parameter": "call_features_crispr", - "target_property": "call_features_crispr" - }, - { - "input_parameter": "call_features_CDS_glimmer3", - "target_property": "call_features_CDS_glimmer3" - }, - { - "input_parameter": "call_features_CDS_prodigal", - "target_property": "call_features_CDS_prodigal" - }, - { - "input_parameter": "annotate_proteins_kmer_v2", - "target_property": "annotate_proteins_kmer_v2" - }, - { - "input_parameter": "kmer_v1_parameters", - "target_property": "kmer_v1_parameters" - }, - { - "input_parameter": "annotate_proteins_similarity", - "target_property": "annotate_proteins_similarity" - }, - { - "input_parameter": "resolve_overlapping_features", - "target_property": "resolve_overlapping_features" - }, - { - "input_parameter": "call_features_prophage_phispy", - "target_property": "call_features_prophage_phispy" - }, - { - "input_parameter": "retain_old_anno_for_hypotheticals", - "target_property": "retain_old_anno_for_hypotheticals" - }, - { - "input_parameter": "output_genome", - "generated_value": { - "prefix": "genomeset_" - }, - "target_property": "output_genome" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace" - } - ], - "output_mapping" : [ - { - "service_method_output_path": [0, "report_name"], - "target_property": "report_name" - }, - { - "service_method_output_path": [0, "report_ref"], - "target_property": "report_ref" - }, - { - "narrative_system_variable": "workspace", - "target_property": "workspace_name" - } - ] - } - } -} From 0b9c821d171634c9ea4ea1bf108b31cbc3401e66 Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Wed, 7 Jun 2023 18:48:14 +0000 Subject: [PATCH 2/5] remove scientific_name, genetic_code and domain from the input parameters of the bulk annotating app --- RAST_SDK.spec | 8 +++++--- lib/RAST_SDK/RAST_SDKImpl.pm | 20 ++++---------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/RAST_SDK.spec b/RAST_SDK.spec index 2322f60..3217ac8 100644 --- a/RAST_SDK.spec +++ b/RAST_SDK.spec @@ -247,7 +247,7 @@ module RAST_SDK { /* - For RAST annotating genomes/assemblies + For RAST annotating genomes/assemblies in bulk Reference to a set of annotated Genome and/or Assembly objects in the workspace @id ws KBaseSearch.GenomeSet @@ -259,9 +259,11 @@ module RAST_SDK { list input_assemblies; genomeSet_ref input_genomeset; string input_text; - string scientific_name; + + /*string scientific_name; int genetic_code; - string domain; + string domain;*/ + int ncbi_taxon_id; int relation_engine_timestamp_ms; string output_workspace; diff --git a/lib/RAST_SDK/RAST_SDKImpl.pm b/lib/RAST_SDK/RAST_SDKImpl.pm index 06c7772..c2884b5 100755 --- a/lib/RAST_SDK/RAST_SDKImpl.pm +++ b/lib/RAST_SDK/RAST_SDKImpl.pm @@ -3,9 +3,9 @@ use strict; use Bio::KBase::Exceptions; # Use Semantic Versioning (2.0.0-rc.1) # http://semver.org -our $VERSION = '1.9.3'; -our $GIT_URL = 'https://github.com/kbaseapps/RAST_SDK'; -our $GIT_COMMIT_HASH = '5e51d931af209b9ffda681301703e03cf5868e02'; +our $VERSION = '1.9.5'; +our $GIT_URL = 'https://github.com/kbaseapps/RAST_SDK.git'; +our $GIT_COMMIT_HASH = 'dba2e368333f0796e08bf617bd5a0b18c755e3cc'; =head1 NAME @@ -2183,9 +2183,6 @@ BulkRastGenomesAssembliesParams is a reference to a hash where the following key input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string - scientific_name has a value which is a string - genetic_code has a value which is an int - domain has a value which is a string ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string @@ -2211,9 +2208,6 @@ BulkRastGenomesAssembliesParams is a reference to a hash where the following key input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string - scientific_name has a value which is a string - genetic_code has a value which is an int - domain has a value which is a string ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string @@ -3150,7 +3144,7 @@ report_ref has a value which is a string =item Description -For RAST annotating genomes/assemblies +For RAST annotating genomes/assemblies in bulk Reference to a set of annotated Genome and/or Assembly objects in the workspace @id ws KBaseSearch.GenomeSet @@ -3192,9 +3186,6 @@ input_genomes has a value which is a reference to a list where each element is a input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string -scientific_name has a value which is a string -genetic_code has a value which is an int -domain has a value which is a string ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string @@ -3211,9 +3202,6 @@ input_genomes has a value which is a reference to a list where each element is a input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string -scientific_name has a value which is a string -genetic_code has a value which is an int -domain has a value which is a string ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string From 4f2a909d3cf105c31cbdf32451d97dcd4a35f03d Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Wed, 7 Jun 2023 22:04:49 +0000 Subject: [PATCH 3/5] removing ncbi_taxon_id as well --- RAST_SDK.spec | 4 ++-- lib/RAST_SDK/RAST_SDKImpl.pm | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/RAST_SDK.spec b/RAST_SDK.spec index 3217ac8..01e6a79 100644 --- a/RAST_SDK.spec +++ b/RAST_SDK.spec @@ -262,9 +262,9 @@ module RAST_SDK { /*string scientific_name; int genetic_code; - string domain;*/ + string domain; + int ncbi_taxon_id;*/ - int ncbi_taxon_id; int relation_engine_timestamp_ms; string output_workspace; string output_GenomeSet_name; diff --git a/lib/RAST_SDK/RAST_SDKImpl.pm b/lib/RAST_SDK/RAST_SDKImpl.pm index c2884b5..1bfc33c 100755 --- a/lib/RAST_SDK/RAST_SDKImpl.pm +++ b/lib/RAST_SDK/RAST_SDKImpl.pm @@ -5,7 +5,7 @@ use Bio::KBase::Exceptions; # http://semver.org our $VERSION = '1.9.5'; our $GIT_URL = 'https://github.com/kbaseapps/RAST_SDK.git'; -our $GIT_COMMIT_HASH = 'dba2e368333f0796e08bf617bd5a0b18c755e3cc'; +our $GIT_COMMIT_HASH = '0b9c821d171634c9ea4ea1bf108b31cbc3401e66'; =head1 NAME @@ -2183,7 +2183,6 @@ BulkRastGenomesAssembliesParams is a reference to a hash where the following key input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string - ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string output_GenomeSet_name has a value which is a string @@ -2208,7 +2207,6 @@ BulkRastGenomesAssembliesParams is a reference to a hash where the following key input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string - ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string output_GenomeSet_name has a value which is a string @@ -3186,7 +3184,6 @@ input_genomes has a value which is a reference to a list where each element is a input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string -ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string output_GenomeSet_name has a value which is a string @@ -3202,7 +3199,6 @@ input_genomes has a value which is a reference to a list where each element is a input_assemblies has a value which is a reference to a list where each element is a RAST_SDK.data_obj_ref input_genomeset has a value which is a RAST_SDK.genomeSet_ref input_text has a value which is a string -ncbi_taxon_id has a value which is an int relation_engine_timestamp_ms has a value which is an int output_workspace has a value which is a string output_GenomeSet_name has a value which is a string From 91c4058b407e287cc19562c16b342561b2549e51 Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Thu, 8 Jun 2023 18:06:23 +0000 Subject: [PATCH 4/5] convert genomeset name to ref before fetch object data to avoid error for genomeset input --- lib/RAST_SDK/AnnotationUtils.pm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/RAST_SDK/AnnotationUtils.pm b/lib/RAST_SDK/AnnotationUtils.pm index e651cd4..6e9bcd1 100644 --- a/lib/RAST_SDK/AnnotationUtils.pm +++ b/lib/RAST_SDK/AnnotationUtils.pm @@ -3317,7 +3317,7 @@ sub _get_bulk_rast_parameters { my $ws = $params->{output_workspace}; my $in_assemblies = $params->{input_assemblies}; my $in_genomes = $params->{input_genomes}; - my $in_genomeset_ref = $params->{input_genomeset}; + my $in_genomeset = $params->{input_genomeset}; my $in_text = $params->{input_text}; print "*********Input genomes------\n".Dumper($in_genomes)."\n"; @@ -3327,7 +3327,10 @@ sub _get_bulk_rast_parameters { # If a genomeSet object is given in the input, fetch the genome refs and add them to # the $in_genomes array first. - if ($in_genomeset_ref) { + if ($in_genomeset) { + my $chk = $self->_validate_KB_objref_name($in_genomeset); + my $gnmset_info = $self->_fetch_object_info($in_genomeset, $chk, $ws); + my $in_genomeset_ref = $gnmset_info->[6].'/'.$gnmset_info->[0].'/'.$gnmset_info->[4]; my $genomeset_data = $self->_fetch_object_data($in_genomeset_ref); my $genomeset_elements = $genomeset_data->{elements}; for my $ele_key (keys(%{$genomeset_elements})) { From 7b44e2e0d1bdd79790754502f779bc14f5df3e10 Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Thu, 8 Jun 2023 19:12:58 +0000 Subject: [PATCH 5/5] updated RELEASE_NOTES.md and bumped up the version to 1.9.6 --- RELEASE_NOTES.md | 4 ++++ kbase.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 8c70389..bb4b862 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,10 @@ ### OVERVIEW This module wraps the RAST annotation pipeline for KBase. +### Version 1.9.6 +__Changes__ +Consolidated apps by combining 2 apps "Annotate Microbial Assembly with RASTtk - v1.073" and "Annotate Microbial Genome with RASTtk - v1.073" into "Annotate Genome/Assembly with RASTtk - v1.073"; another 2 apps "Annotate Multiple Microbial Assemblies with RASTtk - v1.073" and "Annotate Multiple Microbial Genomes with RASTtk - v1.073" into "Annotate Multiple Genomes/Assemblies with RASTtk - v1.073". After the consolidation, RETIRED the previousl 4 apps. + ### Version 1.9.5 __Changes__ Added code to correct many errors in genomes created by RAST: added DNA sequence length to mRNA, added DNA sequence to non-coding genes, corrected repeat noncoding genes, fixed noncoding gene IDs, added md5 to noncoding genes diff --git a/kbase.yml b/kbase.yml index 1123a76..2887fc7 100644 --- a/kbase.yml +++ b/kbase.yml @@ -10,7 +10,7 @@ service-language: perl module-version: - 1.9.5 + 1.9.6 owners: [chenry, scanon, olson, jjeffryes, umaganapathyswork, landml, gaprice, qzhang, ialarmedalien]