jbei-seq format (.xml) sequence files
jbei-seq format sequence files:
jbei-seq format sequence files generally contain a single sequence each (although at times, you may see multiple ORF translated sequences embedded as feature annotations within a long DNA sequence). Feature annotations within jbei-seq format files are extremely useful for being able to view a DNA sequence at a higher/more functional level, and allow for rapidly checking if a designed DNA assembly process will result in the desired sequence.
Currently, the "seq:label" field for a given feature annotation is used to determine if two features (with the same name/label) should be spliced at a DNA assembly junction.
The jbei-seq format is based upon XML. Unlike Genbank format files, there is no requisite white space character counts for formatting purposes, and it is much easier for a computer program to parse.
Here is a stylized example jbei-seq format file:
<?xml version="1.0" encoding="UTF-8" ?>
<seq:seq
xmlns:seq="http://jbei.org/sequence"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://jbei.org/sequence seq.xsd"
>
<seq:name>signal_pep</seq:name>
<seq:circular>false</seq:circular>
<seq:features>
<seq:feature>
<seq:type>CDS</seq:type>
<seq:location>
<seq:genbankStart>1</seq:genbankStart>
<seq:end>63</seq:end>
</seq:location>
<seq:complement>false</seq:complement>
<seq:label>signal_peptide</seq:label>
<seq:attribute name="translation">GSKVYGKEQFLRMRQSMFPDR</seq:attribute>
</seq:feature>
</seq:features>
<seq:sequence>GGCAGCAAGGTCTACGGCAAGGAACAGTTTTTGCGGATGCGCCAGAGCATGTTCCCCGATCGC</seq:sequence>
</seq:seq>
Here is the actual jbei-seq format sequence file: signal_peptide.xml
For the sake of comparison, here is the equivalent Genbank format file:
LOCUS signal_pep 63 bp ds-DNA linear 26-MAR-2011
ACCESSION
VERSION
KEYWORDS .
FEATURES Location/Qualifiers
CDS 1..63
/label="signal_peptide"
/translation="GSKVYGKEQFLRMRQSMFPDR"
ORIGIN
1 ggcagcaagg tctacggcaa ggaacagttt ttgcggatgc gccagagcat gttccccgat
61 cgc
//
Here is the actual Genbank format file: signal_peptide.gb
Here is the stylized schema for the jbei-seq sequence format:
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
elementFormDefault="qualified"
targetNamespace="http://jbei.org/sequence"
xmlns:seq="http://jbei.org/sequence">
<xs:element name="seq">
<xs:complexType>
<xs:sequence>
<xs:element ref="seq:name" />
<xs:element ref="seq:circular" />
<xs:element ref="seq:sequence" />
<xs:element ref="seq:features" minOccurs="0" />
</xs:sequence>
<xs:attribute name="schemaVersion" type="xs:string" /> <!-- "2011.08.0" year.month.revision -->
</xs:complexType>
</xs:element>
<xs:element name="features">
<xs:complexType>
<xs:sequence>
<xs:element ref="seq:feature" maxOccurs="unbounded" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="feature">
<xs:complexType>
<xs:sequence>
<xs:element ref="seq:label"/>
<xs:element ref="seq:complement" />
<xs:element ref="seq:type" />
<xs:element ref="seq:location" maxOccurs="unbounded" />
<xs:element ref="seq:attribute" maxOccurs="unbounded"/>
<xs:element ref="seq:seqHash" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="location">
<xs:complexType>
<xs:sequence>
<xs:element ref="seq:genbankStart" />
<xs:element ref="seq:end" />
</xs:sequence>
<xs:attribute name="siteInbetween" type="xs:boolean"/>
<xs:attribute name="singleResidue" type="xs:boolean"/>
</xs:complexType>
</xs:element>
<xs:element name="name" type="xs:string"/>
<xs:element name="circular" type="xs:boolean"/>
<xs:element name="genbankStart" type="xs:integer"/>
<xs:element name="end" type="xs:integer"/>
<xs:element name="type">
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:enumeration value="allele"/>
<xs:enumeration value="attenuator"/>
<xs:enumeration value="C_region"/>
<xs:enumeration value="CAAT_signal"/>
<xs:enumeration value="CDS"/>
<xs:enumeration value="conflict"/>
<xs:enumeration value="D-loop"/>
<xs:enumeration value="D_segment"/>
<xs:enumeration value="enhancer"/>
<xs:enumeration value="exon"/>
<xs:enumeration value="gene"/>
<xs:enumeration value="GC_signal"/>
<xs:enumeration value="iDNA"/>
<xs:enumeration value="intron"/>
<xs:enumeration value="J_region"/>
<xs:enumeration value="LTR"/>
<xs:enumeration value="mat_peptide"/>
<xs:enumeration value="misc_binding"/>
<xs:enumeration value="misc_difference"/>
<xs:enumeration value="misc_feature"/>
<xs:enumeration value="misc_recomb"/>
<xs:enumeration value="misc_RNA"/>
<xs:enumeration value="misc_signal"/>
<xs:enumeration value="misc_structure"/>
<xs:enumeration value="modified_base"/>
<xs:enumeration value="mRNA"/>
<xs:enumeration value="mutation"/>
<xs:enumeration value="N_region"/>
<xs:enumeration value="old_sequence"/>
<xs:enumeration value="polyA_signal"/>
<xs:enumeration value="polyA_site"/>
<xs:enumeration value="precursor_RNA"/>
<xs:enumeration value="prim_transcript"/>
<xs:enumeration value="primer"/>
<xs:enumeration value="primer_bind"/>
<xs:enumeration value="promoter"/>
<xs:enumeration value="protein_bind"/>
<xs:enumeration value="RBS"/>
<xs:enumeration value="rep_origin"/>
<xs:enumeration value="repeat_region"/>
<xs:enumeration value="repeat_unit"/>
<xs:enumeration value="rRNA"/>
<xs:enumeration value="S_region"/>
<xs:enumeration value="satellite"/>
<xs:enumeration value="scRNA"/>
<xs:enumeration value="sig_peptide"/>
<xs:enumeration value="snRNA"/>
<xs:enumeration value="source"/>
<xs:enumeration value="stem_loop"/>
<xs:enumeration value="STS"/>
<xs:enumeration value="TATA_signal"/>
<xs:enumeration value="terminator"/>
<xs:enumeration value="transit_peptide"/>
<xs:enumeration value="transposon"/>
<xs:enumeration value="tRNA"/>
<xs:enumeration value="unsure"/>
<xs:enumeration value="V_region"/>
<xs:enumeration value="variation"/>
<xs:enumeration value="-10_signal"/>
<xs:enumeration value="-35_signal"/>
<xs:enumeration value="3'clip"/>
<xs:enumeration value="3'UTR"/>
<xs:enumeration value="5'clip"/>
<xs:enumeration value="5'UTR"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="label" type="xs:string"/>
<xs:element name="complement" type ="xs:boolean" />
<xs:element name="attribute">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:attribute name="name" use="required" type="xs:string"/>
<xs:attribute name="quoted" type="xs:boolean"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
<xs:element name="seqHash" type="xs:string" />
<xs:element name="sequence" type="xs:string"/>
</xs:schema>
Here is the actual schema for the jbei-seq format sequence file: seq.xsd