<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
    
    <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <!-- AppResources meta begin -->
        <script type="text/javascript">var ncbi_startTime = new Date();</script>
        <!-- AppResources meta end -->
        
        <!-- TemplateResources meta begin -->
        <meta name="paf_template" content="" />

        <!-- TemplateResources meta end -->
        
        <!-- Logger begin -->
        <meta name="ncbi_db" content="pmc" /><meta name="ncbi_pdid" content="article" /><meta name="ncbi_acc" content="" /><meta name="ncbi_domain" content="nar" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/articles/PMC6868369/" /><meta name="ncbi_app" content="pmc" />
        <!-- Logger end -->
        
        <title>Tandem repeats lead to sequence assembly errors and impose multi-level challenges for genome and protein databases</title>
        
        <!-- AppResources external_resources begin -->
        <link rel="stylesheet" href="/core/jig/1.14.8/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.14.8/js/jig.min.js"></script>

        <!-- AppResources external_resources end -->
        
        <!-- Page meta begin -->
        <meta name="robots" content="INDEX,NOFOLLOW,NOARCHIVE" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6868369/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="citation_journal_title" content="Nucleic Acids Research" /><meta name="citation_title" content="Tandem repeats lead to sequence assembly errors and impose multi-level challenges for genome and protein databases" /><meta name="citation_authors" content="Ole K Tørresen, Bastiaan Star, Pablo Mier, Miguel A Andrade-Navarro, Alex Bateman, Patryk Jarnot, Aleksandra Gruca, Marcin Grynberg, Andrey V Kajava, Vasilis J Promponas, Maria Anisimova, Kjetill S Jakobsen, Dirk Linke" /><meta name="citation_date" content="2 December 2019" /><meta name="citation_issue" content="21" /><meta name="citation_volume" content="47" /><meta name="citation_firstpage" content="10994" /><meta name="citation_doi" content="10.1093/nar/gkz841" /><meta name="citation_abstract_html_url" content="/pmc/articles/PMC6868369/?report=abstract" /><meta name="citation_pmid" content="31584084" /><meta name="DC.Title" content="Tandem repeats lead to sequence assembly errors and impose multi-level challenges for genome and protein databases" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="Oxford University Press" /><meta name="DC.Contributor" content="Ole K Tørresen" /><meta name="DC.Contributor" content="Bastiaan Star" /><meta name="DC.Contributor" content="Pablo Mier" /><meta name="DC.Contributor" content="Miguel A Andrade-Navarro" /><meta name="DC.Contributor" content="Alex Bateman" /><meta name="DC.Contributor" content="Patryk Jarnot" /><meta name="DC.Contributor" content="Aleksandra Gruca" /><meta name="DC.Contributor" content="Marcin Grynberg" /><meta name="DC.Contributor" content="Andrey V Kajava" /><meta name="DC.Contributor" content="Vasilis J Promponas" /><meta name="DC.Contributor" content="Maria Anisimova" /><meta name="DC.Contributor" content="Kjetill S Jakobsen" /><meta name="DC.Contributor" content="Dirk Linke" /><meta name="DC.Date" content="2019 Dec 2" /><meta name="DC.Identifier" content="10.1093/nar/gkz841" /><meta name="DC.Language" content="en" /><meta property="og:title" content="Tandem repeats lead to sequence assembly errors and impose multi-level challenges for genome and protein databases" /><meta property="og:type" content="article" /><meta property="og:description" content="The widespread occurrence of repetitive stretches of DNA in genomes of organisms across the tree of life imposes fundamental challenges for sequencing, genome assembly, and automated annotation of genes and proteins. This multi-level problem can lead ..." /><meta property="og:url" content="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6868369/" /><meta property="og:site_name" content="PubMed Central (PMC)" /><meta property="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/pmc-logo-share.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbi" /><meta name="ncbi_feature" content="associated_data" /><link rel="stylesheet" href="/corehtml/pmc/css/3.14.1/pmc.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/3.14.1/pmc_extras_prnt.min.css" type="text/css" media="print" /><script type="text/javascript" src="/corehtml/pmc/js/common.min.js">//</script><script type="text/javascript" src="/corehtml/pmc/js/NcbiTagServer.min.js">//</script><meta name="citationexporter" content="backend:'https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/'" /><script type="text/javascript" src="https://www.ncbi.nlm.nih.gov/corehtml/pmc/ctxp/jquery.citationexporter.min.js">//</script><link rel="stylesheet" href="https://www.ncbi.nlm.nih.gov/corehtml/pmc/ctxp/citationexporter.css" type="text/css" /><script type="text/javascript" src="/core/mathjax/2.6.1/MathJax.js?config=/corehtml/pmc/js/mathjax-config-classic.3.4.js"></script><script type="text/javascript">window.name="mainwindow";</script><style type="text/css">.pmc-wm {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/pmcgifs/wm-nar.gif);background-size: auto, contain}</style><style type="text/css">.print-view{display:block}</style><style type="text/css">
        div.pmc_para_cit li.highlight,
        div.pmc_para_cit li.highlight .one_line_source
        { background: #E0E0E0; }
        a.bibr.highlight { background: #E0E0E0; } 
      </style><meta name="cited_in_systematic_reviews" content="" /><link rel="alternate" type="application/epub+zip" href="/pmc/articles/PMC6868369/epub/" /><link rel="alternate" type="application/pdf" href="/pmc/articles/PMC6868369/pdf/gkz841.pdf" />

        <!-- Page meta end -->
    <link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE88F247DEFA80310000000001E70130.m_8" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4048120/3846471/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/3881636/3579733/4062871/12930/3964959/3855473/4047625/3854974/3854955/4076335/4128070/9685/3549676/3609192/3609193/3609213/3395586/4143404.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/css/3411343/3882866/4157116.css" media="print" /></head>
    <body class="article">
        <div class="grid">
            <div class="col twelve_col nomargin shadow">
                <!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
                <div class="sysmessages">
                    <noscript>
	<p class="nojs">
	<strong>Warning:</strong>
	The NCBI web site requires JavaScript to function. 
	<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
	</p>
	</noscript>
                </div>
                <!--/.sysmessage-->
                <div class="wrap">
                    <div class="page">
                        <div class="top">
                            <div class="universal_header" id="universal_header"><ul class="inline_list jig-ncbimenu ui-ncbimenu resources_list" id="navcontent"><li class="ui-ncbimenu-item-leaf ui-ncbimenu-item-first ui-helper-reset ui-ncbimenu-item-no-hlt"><a class="ui-ncbimenu-link-first" href="/" role="banner" title="NCBI Home" id="ncbihome" accesskey="1"><span class="offscreen_noflow">NCBI</span><img src="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/img/28977" class="ncbi_logo" title="NCBI" alt="NCBI Logo" /></a></li><li class="offscreen_noflow ui-ncbimenu-item-skip access"><a href="#maincontent" title="Skip to the content" tabindex="0" accesskey="3">Skip to main
                        content</a></li><li class="offscreen_noflow ui-ncbimenu-item-skip access"><a href="#navcontent" title="Skip to the navigation" tabindex="0" accesskey="4">Skip to
                        navigation</a></li><li id="resource-menu" class="topmenu ui-helper-reset ui-ncbimenu-item-first ui-helper-reset"><a class="ui-ncbimenu-first-link-has-submenu ui-ncbimenu-link-first topanchor expandDown" href="/static/header_footer_ajax/submenu/#resources">Resources</a></li><li id="all-howtos-menu" class="topmenu ui-helper-reset ui-ncbimenu-item-first"><a class="ui-ncbimenu-first-link-has-submenu ui-ncbimenu-link-first topanchor expandDown" href="/static/header_footer_ajax/submenu/#howto">How To</a></li><li class="offscreen_noflow ui-ncbimenu-item-skip access"><a href="/guide/browsers/#accesskeys" title="About My NCBI Accesskeys" tabindex="0" accesskey="0">About NCBI Accesskeys</a></li></ul><div class="myncbi"><span id="myncbiusername" style="display:none"><a href="/account/settings/" id="mnu" title="Edit account settings"></a></span><a accesskey="2" href="/myncbi/" id="myncbi" style="display:none">My NCBI</a><a href="/account/" id="sign_in">Sign in to NCBI</a><a href="/account/signout/" id="sign_out" style="display:none">Sign Out</a></div></div>
                            <div class="header">
    <div class="res_logo">
  <h1 class="img_logo"><a href="/pmc/" class="pmc_logo offscreen">PMC</a></h1>
  <div class="NLMLogo">
    <a href="https://www.nlm.nih.gov/" title="US National Library of Medicine">US National Library of Medicine</a>
    <br />
    <a href="https://www.nih.gov/" title="National Institutes of Health">National Institutes of Health</a>
  </div>
</div>
    <div class="search"><form method="get" action="/pmc/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="pmc" selected="selected" data-ac_dict="pmc-search-autocomplete">PMC</option><option value="pubmed" class="last">PubMed</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="biosystems">BioSystems</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="homologene">HomoloGene</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="ncbisearch">NCBI Web Site</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc" data-ac_dict="pmc-search-autocomplete">PMC</option><option value="popset">PopSet</option><option value="probe">Probe</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sparcle">Sparcle</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search PMC. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'pmc-search-autocomplete',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
                        <a href="/pmc/advanced/">Advanced</a>
                    </li><li>
                        <a href="/pmc/journals/">Journal list</a>
                    </li><li class="help">
                        <a target="_blank" href="/books/NBK3825/">Help</a>
                    </li></ul></div>
</div>

                            
                            
                        <!--<component id="Page" label="headcontent"/>-->
                            
                        </div>
                        <div class="content">
                            <!-- site messages -->
                            <div class="container">
    <div id="maincontent" class="content eight_col col">
        <div class="navlink-box">
            <ul class="page-breadcrumbs inline_list small"><li class="journal-list"><a href="/pmc/journals/" class="navlink">Journal List</a></li><li class="archive"><a class="navlink" href="/pmc/journals/4/">Nucleic Acids Res</a></li><li class="issue-page"><a class="navlink" href="/pmc/issues/346115/">v.47(21); 2019 Dec 2</a></li><li class="accid">PMC6868369</li></ul>
        </div>

        <!-- Journal banner -->
        <div class="pmc-page-banner whole_rhythm"><div><img src="/corehtml/pmc/pmcgifs/narlogo.gif" alt="Logo of nar" usemap="#logo-imagemap" /><map id="logo-imagemap" name="logo-imagemap"><area alt="Link to Publisher's site" title="Link to Publisher's site" shape="default" coords="0,0,499,74" href="http://academic.oup.com/nar" target="pmc_ext" ref="reftype=publisher&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CBanner&amp;TO=Publisher%7COther%7CN/A" /></map></div> </div>
        
        <!--component id='MainPortlet' label='search-reference'/-->
        
        <!-- Book content -->
        <div class="">
            
        
            
            <div class="hide-overflow article lit-style content pmc-wm slang-all page-box"><!--main-content--><div class="jig-ncbiinpagenav" data-jigconfig="smoothScroll: false, allHeadingLevels: ['h2'], headingExclude: ':hidden'"><div class="fm-sec half_rhythm no_top_margin"><div class="fm-citation half_rhythm no_top_margin clearfix"><div class="inline_block eight_col va_top"><div><div><span class="cit"><span id="pmcmata">Nucleic Acids Res</span>. 2019 Dec 2; 47(21): 10994–11006. </span></div><div><span class="fm-vol-iss-date">Published online 2019 Oct 4. </span>  <span class="doi">doi: <a href="//dx.doi.org/10.1093%2Fnar%2Fgkz841" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CFront%20Matter&amp;TO=Content%20Provider%7CCrosslink%7CDOI">10.1093/nar/gkz841</a></span></div></div></div><div class="inline_block four_col va_top show-overflow align_right"><div class="fm-citation-ids"><div class="fm-citation-pmcid"><span class="fm-citation-ids-label">PMCID: </span><span>PMC6868369</span></div><div class="fm-citation-pmid">PMID: <a href="/pubmed/31584084">31584084</a></div></div></div></div><h1 class="content-title">Tandem repeats lead to sequence assembly errors and impose multi-level challenges for genome and protein databases</h1><div class="half_rhythm"><div class="contrib-group fm-author"><a href="/pubmed/?term=T%26%23x000f8%3Brresen%20OK%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524550211200" co-class="co-affbox">Ole K T&#x000f8;rresen</a>,<sup>1</sup> <a href="/pubmed/?term=Star%20B%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524591800928" co-class="co-affbox">Bastiaan Star</a>,<sup>1</sup> <a href="/pubmed/?term=Mier%20P%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524547380848" co-class="co-affbox">Pablo Mier</a>,<sup>2</sup> <a href="/pubmed/?term=Andrade-Navarro%20MA%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524547378448" co-class="co-affbox">Miguel A Andrade-Navarro</a>,<sup>2</sup> <a href="/pubmed/?term=Bateman%20A%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524595747056" co-class="co-affbox">Alex Bateman</a>,<sup>3</sup> <a href="/pubmed/?term=Jarnot%20P%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524595743984" co-class="co-affbox">Patryk Jarnot</a>,<sup>4</sup> <a href="/pubmed/?term=Gruca%20A%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524596312736" co-class="co-affbox">Aleksandra Gruca</a>,<sup>4</sup> <a href="/pubmed/?term=Grynberg%20M%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524596310528" co-class="co-affbox">Marcin Grynberg</a>,<sup>5</sup> <a href="/pubmed/?term=Kajava%20AV%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524554093888" co-class="co-affbox">Andrey V Kajava</a>,<sup>6,</sup><sup>7</sup> <a href="/pubmed/?term=Promponas%20VJ%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524554089920" co-class="co-affbox">Vasilis J Promponas</a>,<sup>8</sup> <a href="/pubmed/?term=Anisimova%20M%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524544888624" co-class="co-affbox">Maria Anisimova</a>,<sup>9,</sup><sup>10</sup> <a href="/pubmed/?term=Jakobsen%20KS%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524596726368" co-class="co-affbox">Kjetill S Jakobsen</a>,<sup>1</sup> and  <a href="/pubmed/?term=Linke%20D%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084" class="affpopup" co-rid="_co_idm140524596723360" co-class="co-affbox">Dirk Linke</a><sup>11</sup><sup></sup></div><div style="display:none" class="contrib-group aff-tip"><div id="_co_idm140524550211200"><h3 class="no_margin">Ole K T&#x000f8;rresen</h3><p><sup>1</sup>  Centre for Ecological and Evolutionary Synthesis, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</p><div>Find articles by <a href="/pubmed/?term=T%26%23x000f8%3Brresen%20OK%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Ole K T&#x000f8;rresen</a></div></div><div id="_co_idm140524591800928"><h3 class="no_margin">Bastiaan Star</h3><p><sup>1</sup>  Centre for Ecological and Evolutionary Synthesis, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</p><div>Find articles by <a href="/pubmed/?term=Star%20B%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Bastiaan Star</a></div></div><div id="_co_idm140524547380848"><h3 class="no_margin">Pablo Mier</h3><p><sup>2</sup>  Faculty of Biology, Johannes Gutenberg University Mainz, Hans-Dieter-Husch-Weg 15, 55128 Mainz, Germany</p><div>Find articles by <a href="/pubmed/?term=Mier%20P%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Pablo Mier</a></div></div><div id="_co_idm140524547378448"><h3 class="no_margin">Miguel A Andrade-Navarro</h3><p><sup>2</sup>  Faculty of Biology, Johannes Gutenberg University Mainz, Hans-Dieter-Husch-Weg 15, 55128 Mainz, Germany</p><div>Find articles by <a href="/pubmed/?term=Andrade-Navarro%20MA%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Miguel A Andrade-Navarro</a></div></div><div id="_co_idm140524595747056"><h3 class="no_margin">Alex Bateman</h3><p><sup>3</sup>  European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton. CB10 1SD, UK</p><div>Find articles by <a href="/pubmed/?term=Bateman%20A%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Alex Bateman</a></div></div><div id="_co_idm140524595743984"><h3 class="no_margin">Patryk Jarnot</h3><p><sup>4</sup>  Institute of Informatics, Silesian University of Technology, Akademicka 16, 44-100 Gliwice, Poland</p><div>Find articles by <a href="/pubmed/?term=Jarnot%20P%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Patryk Jarnot</a></div></div><div id="_co_idm140524596312736"><h3 class="no_margin">Aleksandra Gruca</h3><p><sup>4</sup>  Institute of Informatics, Silesian University of Technology, Akademicka 16, 44-100 Gliwice, Poland</p><div>Find articles by <a href="/pubmed/?term=Gruca%20A%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Aleksandra Gruca</a></div></div><div id="_co_idm140524596310528"><h3 class="no_margin">Marcin Grynberg</h3><p><sup>5</sup>  Institute of Biochemistry and Biophysics PAS, Pawi&#x00144;skiego 5A, 02-106 Warsaw, Poland</p><div>Find articles by <a href="/pubmed/?term=Grynberg%20M%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Marcin Grynberg</a></div></div><div id="_co_idm140524554093888"><h3 class="no_margin">Andrey V Kajava</h3><p><sup>6</sup>  Centre de Recherche en Biologie cellulaire de Montpellier, UMR 5237 CNRS, Universite Montpellier 1919 Route de Mende, CEDEX 5, 34293&#x000a0;Montpellier, France</p><p><sup>7</sup>  Institut de Biologie Computationnelle, 34095&#x000a0;Montpellier, France</p><div>Find articles by <a href="/pubmed/?term=Kajava%20AV%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Andrey V Kajava</a></div></div><div id="_co_idm140524554089920"><h3 class="no_margin">Vasilis J Promponas</h3><p><sup>8</sup>  Bioinformatics Research Laboratory, Department of Biological Sciences, University of Cyprus, PO Box 20537, CY 1678&#x000a0;Nicosia, Cyprus</p><div>Find articles by <a href="/pubmed/?term=Promponas%20VJ%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Vasilis J Promponas</a></div></div><div id="_co_idm140524544888624"><h3 class="no_margin">Maria Anisimova</h3><p><sup>9</sup>  Institute of Applied Simulations, School of Life Sciences and Facility Management, Zurich University of Applied Sciences (ZHAW), W&#x000e4;denswil, Switzerland</p><p><sup>10</sup>  Swiss Institute of Bioinformatics (SIB), Lausanne, Switzerland</p><div>Find articles by <a href="/pubmed/?term=Anisimova%20M%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Maria Anisimova</a></div></div><div id="_co_idm140524596726368"><h3 class="no_margin">Kjetill S Jakobsen</h3><p><sup>1</sup>  Centre for Ecological and Evolutionary Synthesis, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</p><div>Find articles by <a href="/pubmed/?term=Jakobsen%20KS%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Kjetill S Jakobsen</a></div></div><div id="_co_idm140524596723360"><h3 class="no_margin">Dirk Linke</h3><p><sup>11</sup>  Section for Genetics and Evolutionary Biology, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</p><div>Find articles by <a href="/pubmed/?term=Linke%20D%5BAuthor%5D&amp;cauthor=true&amp;cauthor_uid=31584084">Dirk Linke</a></div></div></div></div><div class="fm-panel half_rhythm"><div class="togglers"><a href="#" class="pmctoggle" rid="idm140524589505984_ai">Author information</a> <a href="#" class="pmctoggle" rid="idm140524589505984_an">Article notes</a> <a href="#" class="pmctoggle" rid="idm140524589505984_cpl">Copyright and License information</a> <a href="/pmc/about/disclaimer/">Disclaimer</a></div><div class="fm-authors-info fm-panel hide half_rhythm" id="idm140524589505984_ai" style="display:none"><div class="fm-affl" lang="en" id="AFF1"><sup>1</sup>  Centre for Ecological and Evolutionary Synthesis, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</div><div class="fm-affl" lang="en" id="AFF2"><sup>2</sup>  Faculty of Biology, Johannes Gutenberg University Mainz, Hans-Dieter-Husch-Weg 15, 55128 Mainz, Germany</div><div class="fm-affl" lang="en" id="AFF3"><sup>3</sup>  European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton. CB10 1SD, UK</div><div class="fm-affl" lang="en" id="AFF4"><sup>4</sup>  Institute of Informatics, Silesian University of Technology, Akademicka 16, 44-100 Gliwice, Poland</div><div class="fm-affl" lang="en" id="AFF5"><sup>5</sup>  Institute of Biochemistry and Biophysics PAS, Pawi&#x00144;skiego 5A, 02-106 Warsaw, Poland</div><div class="fm-affl" lang="en" id="AFF6"><sup>6</sup>  Centre de Recherche en Biologie cellulaire de Montpellier, UMR 5237 CNRS, Universite Montpellier 1919 Route de Mende, CEDEX 5, 34293&#x000a0;Montpellier, France</div><div class="fm-affl" lang="en" id="AFF7"><sup>7</sup>  Institut de Biologie Computationnelle, 34095&#x000a0;Montpellier, France</div><div class="fm-affl" lang="en" id="AFF8"><sup>8</sup>  Bioinformatics Research Laboratory, Department of Biological Sciences, University of Cyprus, PO Box 20537, CY 1678&#x000a0;Nicosia, Cyprus</div><div class="fm-affl" lang="en" id="AFF9"><sup>9</sup>  Institute of Applied Simulations, School of Life Sciences and Facility Management, Zurich University of Applied Sciences (ZHAW), W&#x000e4;denswil, Switzerland</div><div class="fm-affl" lang="en" id="AFF10"><sup>10</sup>  Swiss Institute of Bioinformatics (SIB), Lausanne, Switzerland</div><div class="fm-affl" lang="en" id="AFF11"><sup>11</sup>  Section for Genetics and Evolutionary Biology, Department of Biosciences, University of Oslo, NO-0316 Oslo, Norway</div><div id="COR1">To whom correspondence should be addressed. Tel: +47 22857654; Email: <a href="mailto:dev@null" data-email="on.oiu.vbi@eknil.krid" class="oemail">on.oiu.vbi@eknil.krid</a></div></div><div class="fm-article-notes fm-panel hide half_rhythm" id="idm140524589505984_an" style="display:none"><div class="fm-pubdate half_rhythm">Received 2019 Jun 7; Revised 2019 Sep 3; Accepted 2019 Oct 1.</div></div><div class="permissions fm-panel half_rhythm hide" id="idm140524589505984_cpl" style="display:none"><div class="fm-copyright half_rhythm"><a href="/pmc/about/copyright/">Copyright</a> &#x000a9; The Author(s) 2019. Published by Oxford University Press on behalf of Nucleic Acids Research.</div><div class="license half_rhythm">This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<a href="http://creativecommons.org/licenses/by/4.0/" data-ga-action="click_feat_suppl" ref="reftype=extlink&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CFront%20Matter&amp;TO=External%7CLink%7CURI" target="_blank">http://creativecommons.org/licenses/by/4.0/</a>), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</div></div></div><div id="pmclinksbox" class="links-box whole_rhythm hidden"></div></div><div class="sec"></div><div id="ass-data" class="tsec fm-sec whole_rhythm" data-section="Featured_PMC_Datacitation"><h2>Associated Data</h2><dl data-count="1" class="box-data-suppmats whole_rhythm no_bottom_margin"><dt><a href="#" rid="data-suppmats" data-ga-action="click_feat_toggler" data-ga-label="Supplementary Materials" class="pmctoggle">Supplementary Materials</a></dt><dd id="data-suppmats" style="display: none;"><div class="half_rhythm"><div class="caption half_rhythm no_bottom_margin"><strong>gkz841_Supplemental_File.</strong></div><div><a href="/pmc/articles/PMC6868369/bin/gkz841_supplemental_file.pdf" data-ga-action="click_feat_suppl">gkz841_supplemental_file.pdf</a><span style="color:gray"> (314K)</span></div><div class="small guid">GUID: ED5483C9-6243-4EA5-88E7-A13DD7B6BFA4</div></div></dd></dl></div><div id="idm140524551932432" lang="en" class="tsec sec"><h2 class="head no_bottom_margin" id="idm140524551932432title" style="text-transform: uppercase;">Abstract</h2><!--article-meta--><div><p id="__p1" class="p p-first-last">The widespread occurrence of repetitive stretches of DNA in genomes of organisms across the tree of life imposes fundamental challenges for sequencing, genome assembly, and automated annotation of genes and proteins. This multi-level problem can lead to errors in genome and protein databases that are often not recognized or acknowledged. As a consequence, end users working with sequences with repetitive regions are faced with &#x02018;ready-to-use&#x02019; deposited data whose trustworthiness is difficult to determine, let alone to quantify. Here, we provide a review of the problems associated with tandem repeat sequences that originate from different stages during the sequencing-assembly-annotation-deposition workflow, and that may proliferate in public database repositories affecting all downstream analyses. As a case study, we provide examples of the Atlantic cod genome, whose sequencing and assembly were hindered by a particularly high prevalence of tandem repeats. We complement this case study with examples from other species, where mis-annotations and sequencing errors have propagated into protein databases. With this review, we aim to raise the awareness level within the community of database users, and alert scientists working in the underlying workflow of database creation that the data they omit or improperly assemble may well contain important biological information valuable to others.</p></div></div><div id="SEC1" class="tsec sec"><h2 class="head no_bottom_margin" id="SEC1title" style="text-transform: uppercase;">INTRODUCTION</h2><p id="__p2" class="p p-first">The availability of DNA and protein sequence data has revolutionized the way we study cellular, molecular, physiological, evolutionary and developmental processes, allowing the association of phenotypes with genotypes at a single nucleotide (or single amino acid) resolution. Researchers rely on public sequence depositories and other databases for sharing their data, such as GenBank or UniProt, and the content of these databases has grown exponentially in the last decades. While such databases initially consisted predominantly of submissions of individual gene or protein sequences that were carefully curated, large proportions of the content of genome and protein databases today originate from different types of metagenome and genome sequencing and assembly projects. GenBank, for example, included more than 2635 Gbp (billion base pairs) in its 2017 release number 221, of which 2242 Gbp (85%) originated from whole-genome shotgun sequencing (<a href="#B1" rid="B1" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133082">1</a>). For an informed use of such data, it is essential that end users understand the distinct contrast in quality between individual, well-curated submissions and entries generated from automated sequence annotation pipelines. The latter procedures can contain unrecognized errors.</p><p id="__p3">Here, we argue that awareness of potential database errors is especially relevant with regards to repetitive stretches of DNA, which can occur in both noncoding and coding regions of genomes. The specific nature of this type of DNA sequences can introduce and propagate bias during multiple levels of analyses, and resulting uncertainties and errors are automatically translated further into protein sequences where they become impossible to recognize. Such issues may arise from problems originating from DNA sequencing, from difficulties with assembling repetitive DNA regions and from inaccuracies generated during the annotation process. The multiplicity of these error sources makes it particularly difficult for researchers to understand and assess the bias that may be underlying the sequences that they retrieve from public databases. As an example, in Table <a href="/pmc/articles/PMC6868369/table/tbl1/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl1" rid-ob="ob-tbl1" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -2.5em;">Table1,</span></span><span>1</span></a>, we have listed the total number of proteins in UniProtKB/Swiss-Prot that have changed the length of their repetitive region from the first occurrence in the database to the latest&#x02014;suggesting that errors in repetitive region length have been identified and corrected. The average difference in length is 13.57 amino acids, a substantial number. The 1669 proteins with differences in repeats (Table <a href="/pmc/articles/PMC6868369/table/tbl1/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl1" rid-ob="ob-tbl1" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -3em;">(Table1)</span></span><span>1</span></a>) are 6% of all proteins in the database that have a repetitive region (see Table <a href="/pmc/articles/PMC6868369/table/tbl2/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl2" rid-ob="ob-tbl2" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -2.5em;">Table2).</span></span><span>2</span></a>). These numbers do not reflect a true error rate but suggest that errors in repeat numbers and repeat length are frequent and might often go unnoticed, especially in databases that are less well curated than UniProtKB/Swiss-Prot.</p><!--table ft1--><!--table-wrap mode="anchored" t5--><div class="table-wrap table anchored whole_rhythm" id="tbl1"><h3>Table 1.</h3><!--caption a7--><div class="caption"><p id="__p4">Summary of proteins from UniProtKB/Swiss-Prot where the length of repetitive region has changed between different versions of the database</p></div><div data-largeobj="" data-largeobj-link-rid="largeobj_idm140524548268848" class="xtable"><table frame="hsides" rules="groups" class="rendered small default_table"><thead><tr><th rowspan="1" colspan="1">Proteins (<em>n</em>)</th><th rowspan="1" colspan="1">Proteins with different sequence between versions (<em>n</em>)</th><th rowspan="1" colspan="1">Proteins with different repetitive region lengths (<em>n</em>)</th><th rowspan="1" colspan="1">Average/standard deviation of the length of repetitive regions in original version of the sequence<sup>a</sup></th><th rowspan="1" colspan="1">Average/standard deviation of the length of repetitive regions in the version 2018_06<br />
of the sequence<sup>a</sup></th><th rowspan="1" colspan="1">Average/standard deviation of the difference in lengths of repetitive regions<sup>a</sup></th></tr></thead><tbody><tr><td rowspan="1" colspan="1">554&#x000a0;241</td><td rowspan="1" colspan="1">74434</td><td rowspan="1" colspan="1">1669</td><td rowspan="1" colspan="1">31.14/72.09</td><td rowspan="1" colspan="1">35.20/84.08</td><td rowspan="1" colspan="1">13.57/45.69</td></tr></tbody></table></div><div id="largeobj_idm140524548268848" class="largeobj-link align_right" style="display: none"><a target="object" href="/pmc/articles/PMC6868369/table/tbl1/?report=objectonly">Open in a separate window</a></div><div class="tblwrap-foot"><div id="T1TFN1"><p id="__p5" class="p p-first-last">
<sup>a</sup>Measured in amino acid residues.</p></div></div></div><!--table ft1--><!--table-wrap mode="anchored" t5--><div class="table-wrap table anchored whole_rhythm" id="tbl2"><h3>Table 2.</h3><!--caption a7--><div class="caption"><p id="__p6">Differences of repetitive region lengths in evolutionarily distinct groups of organisms</p></div><div data-largeobj="" data-largeobj-link-rid="largeobj_idm140524596081056" class="xtable"><table frame="hsides" rules="groups" class="rendered small default_table"><thead><tr><th rowspan="1" colspan="1">Database name</th><th rowspan="1" colspan="1">Number of proteins</th><th rowspan="1" colspan="1">Number of proteins with STRs</th><th rowspan="1" colspan="1">% of proteins with STRs</th><th rowspan="1" colspan="1">Median<sup>a</sup></th><th rowspan="1" colspan="1">Average<sup>a</sup></th><th rowspan="1" colspan="1">Standard deviation<sup>a</sup></th><th rowspan="1" colspan="1">Number of clusters<sup>b</sup></th></tr></thead><tbody><tr><td rowspan="1" colspan="1">UniProtKB/Swiss-Prot (total)</td><td rowspan="1" colspan="1">554&#x000a0;241</td><td rowspan="1" colspan="1">28003</td><td rowspan="1" colspan="1">5.05%</td><td rowspan="1" colspan="1">14.75</td><td rowspan="1" colspan="1">15.14</td><td rowspan="1" colspan="1">3.69</td><td rowspan="1" colspan="1">6237</td></tr><tr><td rowspan="1" colspan="1">Archaea</td><td rowspan="1" colspan="1">19&#x000a0;525</td><td rowspan="1" colspan="1">351</td><td rowspan="1" colspan="1">1.80%</td><td rowspan="1" colspan="1">10.71</td><td rowspan="1" colspan="1">10.63</td><td rowspan="1" colspan="1">1.27</td><td rowspan="1" colspan="1">45</td></tr><tr><td rowspan="1" colspan="1">Bacteria</td><td rowspan="1" colspan="1">333&#x000a0;691</td><td rowspan="1" colspan="1">6794</td><td rowspan="1" colspan="1">2.04%</td><td rowspan="1" colspan="1">17.38</td><td rowspan="1" colspan="1">17.45</td><td rowspan="1" colspan="1">2.66</td><td rowspan="1" colspan="1">1048</td></tr><tr><td rowspan="1" colspan="1">Euk: Fungi</td><td rowspan="1" colspan="1">33&#x000a0;613</td><td rowspan="1" colspan="1">3996</td><td rowspan="1" colspan="1">11.89%</td><td rowspan="1" colspan="1">13.46</td><td rowspan="1" colspan="1">13.79</td><td rowspan="1" colspan="1">3.65</td><td rowspan="1" colspan="1">893</td></tr><tr><td rowspan="1" colspan="1">Euk: Invertebrata</td><td rowspan="1" colspan="1">27&#x000a0;607</td><td rowspan="1" colspan="1">3372</td><td rowspan="1" colspan="1">12.21%</td><td rowspan="1" colspan="1">17.34</td><td rowspan="1" colspan="1">18.62</td><td rowspan="1" colspan="1">7.95</td><td rowspan="1" colspan="1">812</td></tr><tr><td rowspan="1" colspan="1">Euk: Vertebrata</td><td rowspan="1" colspan="1">18&#x000a0;292</td><td rowspan="1" colspan="1">1461</td><td rowspan="1" colspan="1">7.99%</td><td rowspan="1" colspan="1">13.66</td><td rowspan="1" colspan="1">13.90</td><td rowspan="1" colspan="1">2.42</td><td rowspan="1" colspan="1">1801</td></tr><tr><td rowspan="1" colspan="1">Euk: Plants</td><td rowspan="1" colspan="1">42&#x000a0;101</td><td rowspan="1" colspan="1">3601</td><td rowspan="1" colspan="1">8.55%</td><td rowspan="1" colspan="1">12.51</td><td rowspan="1" colspan="1">12.82</td><td rowspan="1" colspan="1">2.98</td><td rowspan="1" colspan="1">795</td></tr><tr><td rowspan="1" colspan="1">Viruses</td><td rowspan="1" colspan="1">16&#x000a0;852</td><td rowspan="1" colspan="1">889</td><td rowspan="1" colspan="1">5.28%</td><td rowspan="1" colspan="1">14.07</td><td rowspan="1" colspan="1">14.15</td><td rowspan="1" colspan="1">2.57</td><td rowspan="1" colspan="1">203</td></tr></tbody></table></div><div id="largeobj_idm140524596081056" class="largeobj-link align_right" style="display: none"><a target="object" href="/pmc/articles/PMC6868369/table/tbl2/?report=objectonly">Open in a separate window</a></div><div class="tblwrap-foot"><div id="T2TFN1"><p id="__p7" class="p p-first-last">
<sup>a</sup>Repetitive region length, measured in amino acid residues.</p></div><div id="T2TFN2"><p id="__p8" class="p p-first-last">
<sup>b</sup>Clustering was used to define repeat classes. Should a protein contain three different, co-localized STRs, the clustering method will produce 6 clusters: three with regular STRs and three with fused repeats. See also supplementary material for more information.</p></div></div></div><p id="__p9" class="p">In this review, we discuss different types of sequencing and database errors, using prominent, published examples where such errors have been found. We first provide a description of the different types of repeats that occur on the DNA and protein level and an overview of DNA sequencing technologies with their benefits and limitations. We then describe the genome assembly, annotation, and database deposition processes, and then link these processes to the different types of errors that may occur at different points in this workflow. We aim to alert the ever-growing community of database end-users of these errors, and to raise awareness among the scientists working in the underlying workflow of database creation, that data that they omit or improperly assemble may well contain important biological information valuable to others.</p><div id="SEC1-1" class="sec"><h3 id="SEC1-1title">Repetitive elements in genomes</h3><p id="__p10" class="p p-first">Repetitive DNA occurs in all domains of life&#x02014;Bacteria, Archaea and Eukaryota&#x02014;and can be grouped into two categories: interspersed repeats, such as transposable elements occurring in multiple loci across the genome, and tandem repeats (TRs) that occur in a single locus. In eukaryotes, repetitive DNA also occurs in specific chromosomal regions, such as the (sub)telomeric regions (<a href="#B2" rid="B2" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133102">2</a>,<a href="#B3" rid="B3" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133134">3</a>) and the centromeres (<a href="#B4" rid="B4" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133026">4</a>).&#x000a0;Transposable elements (TEs) are typically several thousand base pairs (kbp) in size, and in eukaryotes their size can range from 100 base pairs (bp) to 20 kbp&#x000a0;(<a href="#B5" rid="B5" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133099">5</a>). Large fractions of vertebrate genomes are filled with active and inactive fragments of TEs, with more than 40% of the genome of zebrafish and more than a third of mammalian genomes consisting of TEs (<a href="#B6" rid="B6" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133067">6</a>). Evolutionarily old TEs will accumulate mutations and will diverge from the original sequence, and TEs can therefore lose their repetitive nature over time. In contrast, TRs may consist of motifs as short as 1 bp, where the motif is repeated in tandem. Short tandem repeats (with a motif shorter than 10 bp) were originally called microsatellites (<a href="#B7" rid="B7" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133121">7</a>), longer tandem repeats (with a motif between 10 and 100 bp) were called minisatellite DNA (<a href="#B8" rid="B8" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133109">8</a>), and long tandem repeats (with a repeating motif longer than 100 bp) were called satellite DNA (<a href="#B9" rid="B9" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133110">9</a>). In eukaryotes (based on studies done on metazoans, green algae, plants and yeast), the content of TRs with a unit size of 1&#x02013;50 bp usually varies between 2000 bp/Mbp and 55 000 bp/Mbp (corresponding to 0.2&#x02013;5.5% of the genome) (<a href="#B10" rid="B10" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133073">10</a>,<a href="#B11" rid="B11" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133146">11</a>). Repeats also lead to significant intra-specific variation (i.e. variation between individuals of the same species) (<a href="#B12" rid="B12" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133037">12</a>,<a href="#B13" rid="B13" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133096">13</a>) as shown in a wide range of eukaryotes, for instance <em>Arabidopsis</em> (<a href="#B13" rid="B13" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133054">13</a>,<a href="#B14" rid="B14" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133043">14</a>) and <em>Drosophila</em> (<a href="#B15" rid="B15" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133048">15</a>). Within humans, repeats outnumber the number of bases affected by SNP variation by an order of magnitude (4&#x02013;5 fold) (<a href="#B16" rid="B16" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133127">16</a>). Intra-specific variation poses its own intrinsic challenges for instance when sequencing samples from pooled individuals (<a href="#B17" rid="B17" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133124">17</a>). Short tandem repeats (STR) are less prevalent in bacteria compared to eukaryotes&#x02014;presumably due to the typically compact bacterial genomes&#x02014;but nonetheless regularly occur in bacterial coding regions (<a href="#B18" rid="B18" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133070">18</a>).</p><p id="__p11" class="p p-last">TEs can cause &#x02018;breakage&#x02019; of a continuous assembly and lead to assembly collapse, where the number of copies of a repeat found in a genome assembly is lower than the true number, but the relatively large and often evolutionary divergent TEs are unlikely to greatly affect the accuracy of sequencing, assembly and annotation of individual protein-coding regions. While such TEs might sometimes insert themselves into gene regions, the disruptive effects of multiple kbps of sequence inserted into coding regions likely make these events extremely rare. In contrast, TRs are usually much shorter, and can often be in-frame in coding regions; therefore, we mainly focus on the problems caused by this class of repeats on the sequencing, assembly, annotation and database deposition processes.</p></div><div id="SEC1-2" class="sec sec-last"><h3 id="SEC1-2title">Short and long tandem repeats in coding sequences</h3><p id="__p12" class="p p-first">TRs are found in both non-coding and coding genomic regions, and the latter make repeated sequences also ubiquitous in proteomes. Conservative estimates suggest that TRs are present in at least one third of human protein sequences and in half of the protein sequences of the unicellular malaria parasite <em>Plasmodium falciparum</em> and the mold <em>Dictyostelium discoideum</em> (<a href="#B19" rid="B19" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133108">19</a>,<a href="#B20" rid="B20" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553610">20</a>). In UniProtKB/Swiss-Prot, 5% of all proteins have a repetitive region (see <a href="#sup1" rid="sup1" class=" supplementary-material">Supplementary Material and</a> Table <a href="/pmc/articles/PMC6868369/table/tbl2/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl2" rid-ob="ob-tbl2" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -2.5em;">Table2).</span></span><span>2</span></a>). The TR regions come in various flavors; from single amino acid repeats (homorepeats) to the repetition of homologous domains of 100 or more residues (<a href="#B21" rid="B21" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133098">21</a>,<a href="#B22" rid="B22" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133056">22</a>). TRs with short repetitive units are more frequent than those with long repetitive units (<a href="#B19" rid="B19" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133112">19</a>,<a href="#B23" rid="B23" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133030">23</a>,<a href="#B24" rid="B24" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133062">24</a>), and repeats are more frequent in Eukaryota compared to Bacteria and Archaea (Table <a href="/pmc/articles/PMC6868369/table/tbl2/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl2" rid-ob="ob-tbl2" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -3em;">(Table2).</span></span><span>2</span></a>). With their highly mutable nature, the presence of variable TRs in coding sequences may directly lead to an increase in protein variation and modification, which is particularly relevant for functional and evolutionary studies (<a href="#B25" rid="B25" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133137">25</a>,<a href="#B26" rid="B26" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133147">26</a>).</p><p id="__p13">Tri-nucleotide repeats in coding regions may result in amino acid homorepeats (or polyX). These are widely distributed in all branches of the tree of life and in many protein types (<a href="#B27" rid="B27" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133066">27</a>). Like other TRs, homorepeats can be important for function and their length variation is modulated by selection, as has been demonstrated for many protein families (<a href="#B28" rid="B28" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133083">28</a>). In particular, the expansion of CAG repeats that translate to polyglutamine tracts (polyQ) have been widely studied. These polyQ stretches seem to be advantageous for function in protein interactions. When the length of the repeats is too long, the resulting proteins can aggregate and cause disease, leading to selection against further repeat expansion (<a href="#B29" rid="B29" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133072">29</a>). Dedicated databases and resources have been developed to list and characterize amino acid homorepeats of all types (<a href="#B30" rid="B30" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133093">30</a>,<a href="#B31" rid="B31" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133088">31</a>).</p><p id="__p14">Approximately half of the TR regions in proteins may be naturally unfolded (<a href="#B32" rid="B32" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133106">32&#x02013;34</a>), while the other half of these repetitive regions folds with a plethora of shapes and functions (<a href="#B35" rid="B35" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133051">35</a>,<a href="#B36" rid="B36" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133148">36</a>). Their protein structures can be subdivided into five major classes: (i) crystalline aggregates formed by regions with 1 or 2 residue long repeats, (ii) fibrous structures stabilized by interchain interactions with 3&#x02013;7 residue repeats, (iii) structures with the repeats of 5&#x02013;40 residues dominated by solenoid proteins, (iv) &#x02018;closed&#x02019; (not elongated) structures with 30&#x02013;60 residue long repeats and, finally, (v) &#x02018;beads on a string&#x02019; structures with typical size of repeats over 50 residues, which are already large enough to fold independently into stable domains (<a href="#B35" rid="B35" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133120">35</a>,<a href="#B36" rid="B36" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133031">36</a>). When studying repetitive protein structures, it is essential that the underlying sequence information is accurate, not only regarding the type of repeats, but also the exact repeat unit number, as the latter will for example influence the length of protein fibres or the curvature of solenoid proteins. Unexpectedly high conservation of TR repeat unit number and order has been reported for proteins from species separated by long evolutionary time (<a href="#B23" rid="B23" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133022">23</a>,<a href="#B37" rid="B37" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133021">37</a>). This implies that negative selective pressures act on TRs to preserve important protein functions. The same studies suggest that diversifying selective pressures may play equally important role in function of TR-containing proteins. For example, leucine-rich repeats can be both conserved and play role in adaptation (<a href="#B37" rid="B37" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133086">37&#x02013;39</a>). Indeed, consistent with this premise, TRs are frequently found in virulence factors of pathogens, toxins, allergens, amyloidogenic proteins and other disease-related sequences. Fast-evolving repeat regions might confer variation to the surface proteins of pathogens allowing them to escape the host defense systems (<a href="#B40" rid="B40" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133058">40</a>,<a href="#B41" rid="B41" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133139">41</a>). Moreover, there is an increasing amount of evidence for a causal relationship between mutations in TR regions and human-inherited genetic disorders (<a href="#B42" rid="B42" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133040">42</a>). All these examples show that errors in databases are not only an academic problem but also pose risks in analyses of medically relevant data.</p><p id="__p15" class="p p-last">In the following sections, we discuss different problems that occur in today's sequence databases. All these problems originate directly or indirectly from the sequencing and assembly process, and all relate to repeats on the DNA level, leading to fundamental errors in the final database entries.</p></div></div><div id="SEC2" class="tsec sec"><h2 class="head no_bottom_margin" id="SEC2title" style="text-transform: uppercase;">SEQUENCING AND GENOME ASSEMBLY ARE AFFECTED BY TANDEM REPEATS</h2><div id="SEC2-1" class="sec sec-first"><h3 id="SEC2-1title">High-throughput sequencing technologies</h3><p id="__p16" class="p p-first">High-throughput sequencing technologies remain under fast development and several types of technology have been or are currently available. Each of these technologies has its own distinct features that influence their ability to characterize repeats. In the Sanger sequencing technology era, each read was accompanied by a fluorescent peak trace chromatogram. This enabled researches to double-check whether or not the correct base was incorporated in a position, which could be helpful in troublesome regions such as repeats. While similar information is available for high-throughput sequencing technologies, usually encoded as quality scores, the massive amounts of data produced makes it infeasible to manually check the quality of individual bases.</p><p id="__p17">The most widely-used technology is the Illumina sequencing platform (<a href="#B43" rid="B43" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133065">43</a>). This technology has a relatively low sequencing error rate (&#x0003c;0.1%) (<a href="#B44" rid="B44" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133027">44</a>), and errors are mainly due to substitution errors. Nonetheless, Illumina reads are relatively short (&#x0003c;250 bp), which is a limiting factor since many repeat regions are longer than the length of the read. This technology is therefore not able to fully resolve such longer repeats.</p><p id="__p18">Platforms with significantly longer read length comprise the Single Molecule Real Time Sequencing from Pacific Biosystems (&#x02018;PacBio&#x02019;) (<a href="#B45" rid="B45" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133039">45</a>) and Nanopore Sequencing from Oxford Nanopore Technologies (&#x02018;Nanopore&#x02019;) (<a href="#B46" rid="B46" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133141">46</a>). The longer read lengths (1&#x02013;100+ kbp, usually 10&#x02013;40 kbp) can successfully span longer stretches of repetitive DNA such as TRs and TEs. Both platforms, however, have high single-pass error-rates (11&#x02013;15% for PacBio (<a href="#B47" rid="B47" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133089">47</a>), similar for Nanopore (<a href="#B48" rid="B48" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133071">48</a>)). The majority of these errors consist of insertion and deletions (indels), leading to additional or fewer nucleotides compared to the actual genomic sequence. These error rates can be addressed by more sequencing data (to a higher coverage), which will allow for better error correction during assembly. This effort comes at considerable additional economic costs, which can be up to an order of magnitude more expensive than Illumina sequencing.</p><p id="__p19">A discontinued platform is the Roche/454 pyrosequencing technology. Producing reads up to 1000 bp, the 454 technology had difficulty with accurately sequencing homopolymers, leading to indel errors in such regions (<a href="#B49" rid="B49" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133113">49</a>). Albeit 454 finds nearly no use for whole-genome sequencing today, data obtained from this technology still constitutes a considerable part of the DNA and protein sequence databases, being the platform with the second most entries in SRA still today (see Supplementary Material). The Ion Torrent system is similar to the Roche/454, and also has similar issues with indels (<a href="#B50" rid="B50" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553612">50</a>). The relatively long read lengths of these technologies have benefits for crossing repeat regions, yet this advantage is somewhat negated by their inability to correctly assess longer (&#x0003e;4&#x02013;5 nucleotides) stretches of homopolymers (<a href="#B51" rid="B51" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133078">51</a>).</p><p id="__p20" class="p p-last">It is clear from descriptions above that in a perfect world, all sequence data generated would consist of high-coverage, long-range PacBio or Nanopore sequencing as a basis, with some Illumina data for error correction. Yet, the short Illumina reads are economical, accurate and can resolve most parts of any genome, which includes most coding regions and degraded TEs. The economy and utility of the Illumina platform is the main reason why so many genomes have been and are still sequenced by that technology, even though PacBio and Nanopore sequencing would technically yield more complete genome assemblies. Given the widespread use of Illumina technology, genome assemblies and databases are currently likely biased against longer TRs in that many of them do not get incorporated into assembled sequences. How this impacts or biases protein databases cannot be quantified, but individual examples show that especially data from short-read technologies must be taken with care when working with repeat proteins; we show some of these examples in detail further below. We do know that large fractions of proteins in protein databases do contain short TR regions (5% in UniProtKB/Swiss-Prot, Table <a href="/pmc/articles/PMC6868369/table/tbl2/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl2" rid-ob="ob-tbl2" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -2.5em;">Table2)</span></span><span>2</span></a>) and that some of these have had changes in their TR region length from one &#x02018;version&#x02019; of the protein to another (Table <a href="/pmc/articles/PMC6868369/table/tbl1/" target="table" class="fig-table-link figpopup" rid-figpopup="tbl1" rid-ob="ob-tbl1" co-legend-rid=""><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -3em;">(Table1).</span></span><span>1</span></a>). Taken together, it is likely that protein databases underrepresent TRs and that many of the TRs that are in these databases are not correct.</p></div><div id="SEC2-2" class="sec"><h3 id="SEC2-2title">Genome assembly methods</h3><p id="__p21" class="p p-first">The process of genome assembly creates a tentative reconstruction of a complete genome based on information found in the sequencing reads and possibly other sources of information, such as linkage maps. There are two major approaches for genome assembly, the &#x02018;<em>de Bruijn graph</em>&#x02019; and &#x02018;<em>overlap/layout/consensus (OLC) methods</em>&#x02019; and these differ significantly in how repeats get resolved during the assembly process.</p><p id="__p22">The <em>de Bruijn graph</em> method uses subsequences (<em>k</em>-mers) found in the reads and creates a graph where each node represents a fixed-length sequence (<em>k</em>-mer), and the edges connect two <em>k</em>-mers with <em>k</em>&#x000a0;&#x02013; 1 bp sequence in common (which can be found in multiple reads) (<a href="#B52" rid="B52" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133143">52</a>). This graph is then parsed, and depending on implementation, contigs (contiguous sequence based on consensus sequence from the reads) and scaffolds (contigs ordered and oriented based on paired read information) are generated. For the <em>de Bruijn</em> approach, the length of an entire repeat region has to be shorter than the <em>k</em>-mer (which is usually between 21 and 96, with 31 often used as the default setting) to be properly resolved. For instance, the <em>de Bruijn graph</em>-based assembler ALLPATHS-LG collapses all repeats equal to or longer than 96 to 96, its <em>k</em>-mer size, in its first processing stages (<a href="#B53" rid="B53" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133097">53</a>), but the repeats can be expanded later in the assembly process. Newer implementations of the de Bruijn approach, such as SPAdes (<a href="#B54" rid="B54" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133111">54</a>) and SKESA (<a href="#B55" rid="B55" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553613">55</a>), use multiple <em>k</em>-mers to better assemble low sequence coverage regions and repeats. However, neither are designed to assemble larger (such as plant or vertebrate) genomes.</p><p id="__p23" class="p p-last">One implementation of the <em>OLC method</em> was Celera Assembler, which was used to assemble the <em>Drosophila</em> genome in 2000 (<a href="#B56" rid="B56" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133105">56</a>), the first whole genome shotgun sequencing project of a multicellular organism. This approach works by first detecting overlap between all sequencing reads, then creating a graph based on the overlaps, simplifying and traversing the graph, before outputting so-called unitigs (sequences that are either unique in the genome or are collapsed, repeated sequence where repeats occurring in multiple locations in a genome are all found on top of each other in one sequence), based on a multiple sequence alignment from the overlaps (<a href="#B57" rid="B57" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133135">57</a>). Because the overlap step compares each read to all other reads, computational demand can be high (certainly higher than the <em>de Bruijn</em> method), but it is reduced with fewer but longer reads because fewer overlaps need to be computed. The overlap step can also tolerate mismatches and indels between the reads, and therefore performs well with longer reads even if these are error-prone. The unitigs are further categorized into unique and repeat unitigs, before they are ordered and oriented into scaffolds based on information from paired reads (if included in the assembly). The <em>OLC method</em> can resolve those repeats that are shorter than the read length, and it is not limited by any <em>k</em>-mer size as the <em>de Bruijn</em> method. Before the availability of long reads such as PacBio and Nanopore, the shorter Illumina reads were usually assembled with the <em>de Bruijn</em> method because <em>OLC</em> can be computationally demanding. Now, with long reads decreasing in cost, most genome sequencing projects utilize these and assemble them with an assembler implementing <em>OLC</em>. This will lead to more complete genomes being published, with more repeats resolved.</p></div><div id="SEC2-3" class="sec sec-last"><h3 id="SEC2-3title">Repeat content and fragmented assemblies</h3><p id="__p24" class="p p-first-last">While the choice of best-practice sequencing methods and assembly approaches can be used to minimize the effects of repeats, their amount, length, localization and sequence identity constitute key limitations to obtaining a complete and contiguous genome assembly (<a href="#B58" rid="B58" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553615">58</a>). TE content is likely the largest factor contributing to fragmented genome assemblies (<a href="#B59" rid="B59" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133077">59</a>). This holds for both assemblies based on Illumina and for PacBio reads, but the problem is larger for assemblies with shorter reads. TE content is part of the reason why larger genomes are harder to assemble, since it is highly correlated with genome size (<a href="#B6" rid="B6" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133154">6</a>,<a href="#B60" rid="B60" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133133">60</a>). While TEs might induce gaps in the genome assembly, the effects of TRs are harder to quantify. It is not completely clear how PacBio reads handle long STR regions. In one study (<a href="#B61" rid="B61" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133084">61</a>), the authors investigated how PacBio reads handled different STRs, and showed that &#x0003c;50% of reads called the correct length of a STR consisting of 30xAC, most likely due to polymerase slippage errors. This observation partly contradicts the notion that long reads might be the solution to resolving repetitive regions (see conclusions section). However, such slippage problems appear limited to extreme examples, and overall, PacBio-based assemblies using <em>OLC</em> should be more accurate than Illumina-based assemblies with regards to STRs (<a href="#B62" rid="B62" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133025">62</a>).</p></div></div><div id="SEC3" class="tsec sec"><h2 class="head no_bottom_margin" id="SEC3title" style="text-transform: uppercase;">EXAMPLES OF REPEAT-DRIVEN ERROR PROLIFERATION</h2><div id="SEC3-1" class="sec sec-first"><h3 id="SEC3-1title">Tandem repeats cause sequencing and genome assembly challenges</h3><p id="__p25" class="p p-first">Significant variation in the natural abundance of TRs exists in different organisms which complicates assembly procedures and the development of adequate algorithms that perform well in all cases. Atlantic cod (<em>Gadus morhua</em>) has been identified as a vertebrate species with an exceptionally high occurrence of STRs (<a href="#B63" rid="B63" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133142">63</a>,<a href="#B64" rid="B64" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133076">64</a>), in particular AC dinucleotide repeats (<a href="#B62" rid="B62" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133050">62</a>,<a href="#B65" rid="B65" class=" bibr popnode">65</a>). The high abundance of these repeats has caused several complications, both from a laboratory and bioinformatic perspective, and on the level of DNA and (translated) protein sequences. The first <em>de novo</em> assembly (gadMor1) of the Atlantic cod genome was based on 454 sequencing data (<a href="#B66" rid="B66" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133151">66</a>) and resulted in a fragmented assembly with many gaps. More than 30% of the contig edges contained an STR and nearly a quarter of the gaps in scaffolds were flanked by STRs (<a href="#sup1" rid="sup1" class=" supplementary-material">Supplementary Note 7</a> in (<a href="#B66" rid="B66" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133038">66</a>)), indicating that these STRs strongly affected the successful assembly into more contiguous genomic regions. By incorporating PacBio reads, an updated assembly (gadMor2; (<a href="#B62" rid="B62" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133116">62</a>)) yielded an improved continuity, allowing a more in-depth quantification of these repeats. For instance, the antifreeze glycoproteins were completely missing in the gadMor1 assembly (<a href="#B67" rid="B67" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133029">67</a>), while they are found in gadMor2 (see section &#x02018;<em>Tandem repeats can hinder proper gene annotation</em>&#x02019; below). While it is well established that repeats in general can hinder genome assembly, there is little discussions about TRs in particular in the literature besides the example above. For instance, in a discussion regarding fragmented genome assemblies of plants, the authors do discuss briefly the role of TEs in the fragmentation of the assemblies, but never mention TRs in the same setting (<a href="#B68" rid="B68" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133069">68</a>). When discussing repeat content, they only mention TEs. They further mention long reads as the main aid in generating more complete genome assemblies.</p><p id="__p26" class="p p-last">The prolific STR occurrence in Atlantic cod may also interfere with PCR amplification, often an essential step for creating sequencing libraries. Ancient DNA (aDNA) sequencing data from historic Atlantic cod specimens contained inflated STR abundances (up to 35%), which is far beyond the naturally observed levels (<a href="#B65" rid="B65" class=" bibr popnode">65</a>). This inflation can be suppressed by a reduced number of amplification cycles and by the inclusion of synthesized dinucleotide repeat oligonucleotides during amplification. These data indicate that a biased amplification reaction, whereby repeats &#x02018;<em>self-prime</em>&#x02019; during PCR, leads to artificially high levels of AC and AG repeats. Although this <em>self-priming</em> appears to be particularly problematic in cod&#x02014;likely due to its high content of repeats with relatively low sequence complexity (<a href="#B65" rid="B65" class=" bibr popnode">65</a>)&#x02014;this process also explains the typical PCR fragmentation patterns observed when using transcript-activator like effector (TALE) technology (<a href="#B69" rid="B69" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133018">69</a>). This highlights the propensity of repetitive DNA to interfere with amplification in a variety of protocols and conditions.</p></div><div id="SEC3-2" class="sec sec-last"><h3 id="SEC3-2title">Tandem-repeated gene families causing assembly collapse</h3><p id="__p27" class="p p-first">Gene family expansions often originate from a gene locus being replicated in tandem, giving rise to two or more (almost) identical copies of a gene that can be regarded in essence as a long tandem repeat (<a href="#B70" rid="B70" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133085">70</a>). Over time, these two copies can evolve independently, resulting in two genes with different function (neofunctionalization) or two genes with different expression patterns subfunctionalization). One such example is the &#x003b1;- and&#x000a0;&#x003b2;-globin clusters in vertebrates, where multiple globin genes are found in tandem in each cluster, and where the different genes are expressed at different stages during the development (<a href="#B71" rid="B71" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133119">71</a>). In teleost fishes, the two chromosomal regions are inhabited by different numbers of &#x003b1;&#x02212;&#x000a0;and&#x000a0;&#x003b2;-genes, reflecting functional diversity (<a href="#B72" rid="B72" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133075">72</a>). For instance, the different numbers of hemoglobin genes in codfishes are suggested to reflect the depth the different species are found at (i.e. a temperature-variation proxy) (<a href="#B73" rid="B73" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133095">73</a>). Another gene family that greatly expanded in teleost fish are the nod-like receptor (NLR) genes (<a href="#B74" rid="B74" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133126">74</a>,<a href="#B75" rid="B75" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133136">75</a>), genes encoding proteins active in the innate immune system. It is not completely clear why this class of genes are expanded, but since they are involved in pathogen recognition the expansion might correspond to novel pathogen environments (<a href="#B75" rid="B75" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133107">75</a>). In most teleost species, there does not seem to be a clear pattern to the genomic distribution of these genes (<a href="#B74" rid="B74" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133125">74</a>), and although in many cases occurring as clustered (tandem) repeats they are also spread across the genome similar to transposable elements. Most notably, this multiplicity of similar sequences can cause local genome assembly collapse (i.e. the repeated genes are so similar that they collapse into one gene/region displaying much higher coverage than the rest of the genome) and annotation problems (i.e. annotated as a single gene while in reality multiple, or the genes might be hidden from annotation because the software register them as repeats). This problem can be illustrated by different releases of the zebrafish genome. In previous versions of this genome assembly (i.e. Zv6) the <em>NLR</em> genes were more or less collapsed. However, zebrafish assembly GRCz10 was created with substantial efforts in BAC and fosmid clones to close gaps, which enabled researchers to show that 159 of the 368 identified <em>NLR</em> genes are present as TRs on the long arm of chromosome 4 (<a href="#B76" rid="B76" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133144">76</a>). As a further complicating repeat-issue they occur interspersed with Zn-finger genes and arranged irregularly. The specific organization of the <em>NLR</em> and <em>Zn-finger</em> genes is likely the result of multiple different local duplications. The repeated nature of this huge genomic architecture makes it difficult to be confident that all the genes have been properly assembled and annotated, even with manual annotation and curation (<a href="#B76" rid="B76" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133036">76</a>).</p><p id="__p28">Many immune genes such as NLRs contain leucine rich repeats (LRRs) (<a href="#B77" rid="B77" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553606">77</a>). These are tandem repeats at the amino acid level, but not necessary at the nucleotide level. In jawless vertebrates the variable lymphocyte receptors (VLRs), another class of immune genes, also contain LRRs (<a href="#B78" rid="B78" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133132">78</a>). In lamprey there are three <em>VLR</em> genes that each have multiple LRR-encoding modules in their vicinity. Together they can encode several hundreds of different proteins (<a href="#B78" rid="B78" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133150">78</a>). During lymphocyte development, the <em>VLR</em> gene region is reorganised, ending up with the incorporation of several of the surrounding LRR modules. Different lymphocytes have different organisations of their <em>VLR</em> gene. In the sea lamprey assembly the <em>VLRC</em> gene is not complete and is found together with 182 different LRR donor genomic cassettes on 24 scaffolds (<a href="#B79" rid="B79" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133138">79</a>). It is likely that the nature of these LRR cassettes make them hard to assemble properly, but this is not fully clear from the literature (<a href="#B79" rid="B79" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133044">79</a>). An improved genome assembly of sea lamprey including PacBio reads has recently been published (<a href="#B80" rid="B80" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133053">80</a>), but it remains to be seen if that assembly would resolve these complicated regions better.</p><p id="__p29">Long tandem repeats (LTRs) are often associated with protein-coding regions, and can include duplicated genes as well as duplicated (or otherwise multiplied) domains within a protein-coding gene. They are affected by the filtering and masking operations during genome assembly. A problem occurs when the read length of the sequencing method is shorter than the LTR&#x02014;in this case, repeat numbers can be massively misjudged. In the case of protein-coding regions, this has direct effects on the interpretation of biological function. LTRs are not uncommon in structural proteins on cell surfaces, and in pathogenicity factors of bacteria, parasites, and viruses. As an example, Wrobel <em>et&#x000a0;al.</em> (<a href="#B81" rid="B81" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133152">81</a>) have shown that in the fish pathogen <em>Yersinia ruckeri</em>, a surface adhesin involved in biofilm formation called Ilm has &#x0003e;20 Ig-like domains repeated in tandem that are identical even on the DNA level (repeat length &#x0223c;300 bp). Repeat numbers vary slightly from strain to strain, but in this case only PacBio-based genomes show the correct number of repeats (Figure <a href="/pmc/articles/PMC6868369/figure/F1/" target="figure" class="fig-table-link figpopup" rid-figpopup="F1" rid-ob="ob-F1" co-legend-rid="lgnd_F1"><span style="position: relative;text-decoration:none;">&#x200B;<span class="figpopup-sensitive-area" style="left: -3.5em;">(Figure1).</span></span><span>1</span></a>). Deposited genomes based on short-read methods show underestimated repeat numbers (by a factor of 4 to 5). The fact that the underestimated repeat number is an approximation made during genome assembly is not visible in the deposited genome data. In a very similar example, Franz&#x000e9;n <em>et&#x000a0;al.</em> find that in the human and animal parasite Giardia, variable surface proteins (VSPs) are difficult to sequence using 454 sequencing. Using this technology, only a few genes could be assembled due to their highly repetitive nature (<a href="#B82" rid="B82" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133128">82</a>). From other experiments (including some re-sequencing using different technologies), the authors estimate that ca. 300 of these repetitive surface proteins should exist in the genome. In yeast, a large set of LTR proteins are included in flocculation (self-adhesion), a process important in biotechnology for removal of the yeast cells by sedimentation or filtration. These <em>flo</em> genes are often truncated in deposited genomes, but it is possible that in many cases, this is due to sequencing and assembly issues, and that in reality, these genes are intact in many of the sequenced strains (<a href="#B83" rid="B83" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133024">83</a>). In primates, filaggrin protein is a component of the skin, and the underlying genes have copy number variations between different species (<a href="#B84" rid="B84" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133118">84</a>). The gene contains multiple copies (<a href="#B10" rid="B10" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133092">10&#x02013;12</a>) of a repeat that is 972&#x02013;975 nucleotides long. Here, researchers found incomplete versions of the gene for chimpanzee, gorilla, orangutan and macaque in the NCBI database, but were able to reconstruct the complete genes by using a combination of PacBio and Illumina sequencing (<a href="#B84" rid="B84" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133057">84</a>), again showing the importance of the choice of sequencing technology. One extreme example of a LTR is <em>Pseudomonas koreensis</em> P19E3 where a 70 kbp repeat could not be resolved by PacBio sequencing reads (<a href="#B85" rid="B85" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133091">85</a>). However, by utilizing very long reads from Oxford Nanopore in addition to PacBio and Illumina sequences, the researchers were able to properly resolve this LTR (<a href="#B85" rid="B85" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133017">85</a>). Even in cases such as this, researchers may take different approaches to representing the sequence within the database. Guo <em>et&#x000a0;al.</em> (<a href="#B86" rid="B86" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133055">86</a>,<a href="#B87" rid="B87" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133114">87</a>) identified a 37 kbp repeat in the <em>Marinomonas primoryensis</em> ice binding protein (MpIBP) but were unable to sequence through the region with PacBio sequencing. Based on pulsed-field gel electrophoresis they estimated that is contained about 120 copies of a 104 amino acid. When submitting the protein sequence, they deposited two sequences, one for the amino terminal side of the repeats and one for the carboxy terminal side of the repeats. In other cases such as the sequence determination of the R28 protein from <em>Streptococcus pyogenes</em> (<a href="#B88" rid="B88" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133087">88</a>) the authors determined the sequence of the terminal repeats as well as random internal repeats derived from PCR and based on the estimated size of the PCR product of the complete repeat region deposited a full length sequence with an assumption that every repeat was identical.</p><!--fig ft0--><!--fig mode=article f1--><div class="fig iconblock whole_rhythm clearfix" id="F1" co-legend-rid="lgnd_F1"><a href="/pmc/articles/PMC6868369/figure/F1/" target="figure" rid-figpopup="F1" rid-ob="ob-F1"><!--fig/graphic|fig/alternatives/graphic mode="anchored" m1--><div data-largeobj="" data-largeobj-link-rid="largeobj_idm140524595867808" class="figure"><a class="inline_block ts_canvas" href="/core/lw/2.0/html/tileshop_pmc/tileshop_pmc_inline.html?title=Click%20on%20image%20to%20zoom&amp;p=PMC3&amp;id=6868369_gkz841fig1.jpg" target="tileshopwindow"><div class="ts_bar small" title="Click on image to zoom"></div><img alt="An external file that holds a picture, illustration, etc.&#10;Object name is gkz841fig1.jpg" title="Click on image to zoom" class="tileshop" src="/pmc/articles/PMC6868369/bin/gkz841fig1.jpg" /></a></div><div id="largeobj_idm140524595867808" class="largeobj-link align_right" style="display: none"><a target="object" href="/pmc/articles/PMC6868369/figure/F1/?report=objectonly">Open in a separate window</a></div></a><div class="icnblk_cntnt" id="lgnd_F1"><div><a class="figpopup" href="/pmc/articles/PMC6868369/figure/F1/" target="figure" rid-figpopup="F1" rid-ob="ob-F1">Figure 1.</a></div><!--caption a7--><div class="caption"><p id="__p30">DNA alignment of a &#x0223c;39 kb-long DNA region containing the <em>yrIlm</em> gene and flanking CDS in <em>Y. ruckeri</em> genomes deposited in GenBank. Each CDS is indicated by a yellow arrow, with the percentage of sequence identity to CSF007-82 reported inside the arrow. <em>yrIlm</em> consists of an array of tandemly repeated, identical Ig-like domains (in red) and in addition of Ig-like domains of lower pairwise sequence similarity (in orange). It is usually capped by a C-type lectin domain (CTLD, in green). The dashed lines indicate gaps in the DNA alignment. In strain 150 the grey box indicates a contig break in the assembly. The asterisk (*) indicates assemblies generated through PacBio SMRT sequencing. Note that the other assemblies have significant lower repeat numbers, suggesting that the repeats were not found using short-read sequencing technologies. Modified from Wrobel,A., Ottoni,C., Leo,J.C., Gulla,S. and Linke,D. (2018) The repeat structure of two paralogous genes, Yersinia ruckeri invasin (yrInv) and a &#x02018;Y. ruckeri invasin-like molecule&#x02019;, (yrIlm) sheds light on the evolution of adhesive capacities of a fish pathogen. Journal of Structural Biology, 201, 171&#x02013;183, with permission from Elsevier.</p></div></div></div><p id="__p31" class="p p-last">It is worth noting that repeat numbers within coding regions may vary within a single bacterial colony, potentially leading to another level of complication when estimating repeat numbers. This effect is called hypervariable copy number variation; an example is the SasG protein from <em>Staphylococus aureus</em> strain NCTC 8325 which contains eight identical 128 amino acid B repeats. Roche and colleagues found that PCR of the full length SasG gene led to a ladder of products differing in size by the 400 bp repeat size (<a href="#B89" rid="B89" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133064">89</a>). Individual bands were gel purified and used as a new template for PCR and in each case only a single band was identified demonstrating that the different size products were not due to mis-priming of the repeat DNA during amplification.</p></div></div><div id="SEC4" class="tsec sec"><h2 class="head no_bottom_margin" id="SEC4title" style="text-transform: uppercase;">ANNOTATION OF FUNCTION CAN BE AFFECTED BY TANDEM REPEATS</h2><div id="SEC4-1" class="sec sec-first"><h3 id="SEC4-1title">Annotation of repeats</h3><p id="__p32" class="p p-first">The task of accurate characterization of TRs should not rely on just one method. This is because the statistical error rates and power of TR prediction vary extensively for different repeat types and different methods - due to fundamental differences in prediction methodology and method assumptions (<a href="#B24" rid="B24" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133145">24</a>). For example, the Tandem Repeats Finder program appears to be very conservative and has a very low power of predicting diverged repeats (Figure 3 in <a href="#B24" rid="B24" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133060">24</a>). As a result, the agreement of TR annotations by different methods is low, since different methods achieve optimal power for different subsets of TR space (in terms of TR unit length, repeat number and unit similarity). Indeed, testing four selected popular TR finders, Schaper and colleagues reported that 89% of TRs were found by only one program, &#x0003c;1% were found by three and only 0.2% by all four programs&#x000a0;(<a href="#B24" rid="B24" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133155">24</a>). To improve the accuracy and power of TR annotation, it is advisable to use a proper statistical framework combined with a meta-approach that employs several repeat prediction methods, followed by subsequent filtering of false positives using rigorous statistical tests (<a href="#B90" rid="B90" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133090">90</a>). Currently, such procedure can be implemented using the Tandem Repeat Annotation Library (TRAL) (<a href="#B91" rid="B91" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133129">91</a>). The TRAL library can be easily included in developing new pipelines for genome assembly and repeat annotation. Further, TRAL allows for evolutionary analyses of the annotated repeats, such as evaluating whether a TR region may be under selection.</p><p id="__p33" class="p p-last">A genome assembly is most useful when different features such as genes, TEs and other repeats are annotated with their precise location on a scaffold/chromosome and with a unique identifier. This can then provide essential background information for further experiments on gene expression or function, for example when investigating the difference in gene expression between two experimental set-ups with RNA-Seq (<a href="#B92" rid="B92" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133041">92</a>). We often distinguish between structural annotation, specifying all the genes with their intron and exon structure, and functional annotation of genes and their properties (including individual function (e.g. for enzymes) or function in more complex pathways (e.g. in signaling)) (<a href="#B93" rid="B93" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133020">93</a>,<a href="#B94" rid="B94" class=" bibr popnode">94</a>). A key issue is the typical workflow of annotation in semi-automated pipelines. The annotation process starts with identifying as many repetitive elements as possible, possibly by creating a custom-made repeat library using both homology-based and <em>de novo</em> tools (<a href="#B95" rid="B95" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133153">95</a>). Complete TEs often contain genes that are used to facilitate transposition and are often considered less important when investigating a particular species compared to the specific genes of that species. Repeat libraries are thus used to mask the repeats, making annotation of the genes of the species under investigation easier, but removing information related to genes found in transposable elements. TEs and TRs are usually masked. The reason for masking repeats is that <em>ab initio</em> gene prediction programs such as AUGUSTUS (<a href="#B96" rid="B96" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133028">96</a>) or GeneMark (<a href="#B97" rid="B97" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133149">97</a>) need to be trained, i.e., optimized for the specific species with regards to codon bias and splicing signals, and this training can be biased by repeats. Evidence for actively expressed genes can be added in the form of transcriptome data assembled by Trinity (<a href="#B98" rid="B98" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133104">98</a>) or StringTie (<a href="#B99" rid="B99" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133081">99</a>), or with the full-length transcripts generated by PacBio Iso-Seq (<a href="#B100" rid="B100" class=" bibr popnode">100</a>). The transcriptome data is often crucial, since it - of the methods mentioned here - alone provide concrete evidence for the presence of the particular genes of a species, and not just assumed via prediction or mapping of proteins. Non-redundant protein databases such as UniProtKB/Swiss-Prot (<a href="#B101" rid="B101" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133034">101</a>) can be included as the basis for annotation, ideally complemented by specific databases of well-annotated proteins from closely related species. All this information can then be integrated by using a program such as MAKER (<a href="#B102" rid="B102" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133063">102</a>,<a href="#B103" rid="B103" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133079">103</a>) or EVM (<a href="#B104" rid="B104" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133042">104</a>). This approach provides a set of predicted transcripts and proteins, together with a GFF (General Feature Format) track with positions of all the annotated features, describing their properties. The predicted proteins can be searched using InterProScan (<a href="#B105" rid="B105" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133117">105</a>) to classify proteins to different molecular functions, biological processes and pathways. Since such annotation is likely to be performed on assemblies where biologically relevant repetitive sequences have been removed from the data already, it may generate serious problems. The most important is the risk of removal of vital information about the genome from the final annotation. Consequently, if a TR makes up a large part of an exon or a whole gene, that exon or gene might not be properly annotated.</p></div><div id="SEC4-2" class="sec"><h3 id="SEC4-2title">Tandem repeats can hinder gene annotation</h3><p id="__p34" class="p p-first">While the process above can already accidentally filter out genes with repetitive regions, the more detailed annotation process can add another level of problems. Specifically, homology search methods such as BLAST usually have built-in filters that hinder alignment to low complexity regions (which often exist as part of repetitive regions or are repetitive regions) (<a href="#B106" rid="B106" class=" bibr popnode">106</a>), and are not adapted to accurately align homologous sequences with different numbers of TR units.</p><p id="__p35">Therefore, the annotation process is often just a rough overview of the different genes, repeats and other features in the species of interest, and may not be sufficient for investigations into gene families that are particularly interesting for a researcher. Manual inspection, re-annotation and re-alignment are often necessary for troublesome gene families. One such gene family is the anti-freeze proteins, in particular the anti-freeze glycoproteins (AFGPs) of notothenioid fishes and codfishes (<a href="#B107" rid="B107" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133122">107</a>,<a href="#B108" rid="B108" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133094">108</a>). In nototheniods the AFGPs consist of a repeated pattern of Thr-Ala(/Pro)-Ala, and in codfishes it sometimes is represented by Arg-Ala(/Pro)-Ala (<a href="#B108" rid="B108" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133140">108</a>). The repeated nature of these gene families requires manual annotation, and this was performed in a comparative survey of AFGPs in notothenioid fishes and codfishes (<a href="#B109" rid="B109" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553611">109</a>). Indeed, the automated annotation of the Atlantic cod genome masked these genes as repeats and they would not have been properly characterized without careful investigation using BLAST (<a href="#B109" rid="B109" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553605">109</a>). These genes were not properly assembled in the first version of the Atlantic cod genome (<a href="#B66" rid="B66" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133130">66</a>), but were in the second version created with PacBio reads (<a href="#B62" rid="B62" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133131">62</a>,<a href="#B109" rid="B109" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553609">109</a>).</p><p id="__p36" class="p p-last">Detection of genuine gene fusion events has been reported long before the first complete genomes became available (<a href="#B110" rid="B110" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133047">110</a>,<a href="#B111" rid="B111" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133035">111</a>), but beyond that point they have been proven instrumental in detecting gene/protein associations with high specificity (<a href="#B112" rid="B112" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133019">112</a>,<a href="#B113" rid="B113" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133080">113</a>). Repeats may artificially cause gene fusion events, when genes/proteins that are encoded as distinct units in the genome under study (possibly in distant loci or even in different chromosomes). More specifically, in the case where the 5&#x02032; and 3&#x02032; termini of two gene loci share a similar repeat or low complexity pattern, there is an increased probability that genome assemblers can erroneously detect an overlap, thus artificially fusing these genes into a single entity. There are known cases where similar repeat regions in adjacent genes can lead to recombination-driven gene fusion (<a href="#B114" rid="B114" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133052">114</a>), but with short sequence reads, assembly errors can arguably lead to &#x02018;artificially&#x02019; fused genes (as detailed above). Such erroneous gene calls may (i) become the cause of downstream gene-prediction or annotation errors, (ii) generate false positive predictions for gene/protein associations and (iii) hinder large-scale genome evolution studies (<a href="#B115" rid="B115" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133033">115</a>,<a href="#B116" rid="B116" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553614">116</a>).</p></div><div id="SEC4-3" class="sec sec-last"><h3 id="SEC4-3title">Databases, submission and curation</h3><p id="__p37" class="p p-first">DNA and protein sequences are routinely submitted to online repositories that make these data available to the public. This is a largely unsupervised process and there is usually little or no post-submission curation of the data. For nucleotide sequences, submitters must only ensure that the submission adheres to various formatting and data standards, and the archival database will make various automated checks of the data and metadata. Problems such as misassembly and contamination are not investigated. At the protein level, the UniProt database takes predicted sequences from nucleotide entries and places them within the UniProtKB/TrEMBL portion of the database with no further quality control. The RefSeq database, at least for bacterial genomes, ignores the submitted protein sequences and runs their own bespoke PGAP pipeline - this leads to a more consistent set of protein sequences and annotations. Only the manually reviewed section of UniProt, UniProtKB/Swiss-Prot allows for corrections to be made to protein sequences and curators will merge multiple entries from UniProtKB/TrEMBL, thus improving the likelihood of identifying the fully correct protein sequence. But even when manually curated, it is difficult to assess whether or not a protein contains the correct number of a repeated pattern or amino acid, and whether errors have occurred in the underlying DNA sequencing process. The difficulty of identifying and classifying DNA tandem repeats, in addition to their extreme variation from species to species, as well as within populations, has promoted the development of specialized bioinformatic algorithms and databases dedicated to repeat detection and characterization.</p><p id="__p38" class="p p-last">The first database on human repetitive DNA elements, including TRs, was developed in 1992 (<a href="#B117" rid="B117" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133123">117</a>), eventually becoming RepBase (<a href="#B118" rid="B118" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133074">118</a>). Widespread genome sequencing further fueled the development of specialized resources (both methods for detecting repeats and repeat databases). The parallel development of general and specialized resources related to DNA tandem repeats, has been crucial to the increased awareness of their widespread distribution and has been instrumental for their use both in basic and applied science. With over 50 TR detectors available, equally numerous repeat sequence databases exist today whose data is constantly used in practical applications like agriculture, medicine and forensics. Examples include the Human Genome Browser at UCSC (<a href="#B119" rid="B119" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133061">119</a>), the STRBase (<a href="#B120" rid="B120" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133059">120</a>) maintained by the National Institute of Standards and Technology (NIST, Maryland, US) or the Tandem Repeats Database (TRDB; (<a href="#B121" rid="B121" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133068">121</a>)). Some of these databases have specific applications. For instance, the STRBase has a focus on human STRs whereas the TRDB was developed as a workbench for sequence analyses. Other specialized databases have been developed recently in this regard (e.g. (<a href="#B122" rid="B122" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553608">122&#x02013;126</a>)), starting off from human-centered research questions and expanding to examples of many other species, such as the tobacco plant (<a href="#B127" rid="B127" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133045">127</a>), <em>Trichophytum rubrum</em>, a fungus causing skin disease (<a href="#B128" rid="B128" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553607">128</a>), or the Cannabis plant to characterize the origin of hemp seeds (US Cannabis DNA database; (<a href="#B129" rid="B129" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133049">129</a>)). Despite this diversity, the majority of these databases rely on the results of well-established automated bioinformatic approaches such as the Tandem Repeats Finder (TRF) program (<a href="#B130" rid="B130" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133115">130</a>) or RepeatMasker (<a href="#B118" rid="B118" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133103">118</a>) to characterize repeat content. Especially the use of RepeatMasker as <em>the</em> preferred software to identify and mask repeats, (<a href="http://www.repeatmasker.org/" data-ga-action="click_feat_suppl" ref="reftype=extlink&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CBody&amp;TO=External%7CLink%7CURI" target="_blank">http://www.repeatmasker.org/</a>), has allowed the standardized treatment of raw genomic sequences and reproducibility of protocols for the establishment of these databases. However, using RepeatMasker and TRF on their own might not be enough to accurately characterize all TRs, and using a meta-approach such as TRAL (mentioned above) would likely lead to better annotation of TRs in both proteins and DNA.</p></div></div><div id="SEC5" class="tsec sec"><h2 class="head no_bottom_margin" id="SEC5title" style="text-transform: uppercase;">CONCLUSIONS</h2><p id="__p39" class="p p-first-last">Both short and long repeat regions in genomes convey important biological functions; but as they cause significant technical problems with DNA sequencing, genome assembly, and gene and genome annotation, they often include significant errors, or are even omitted from datasets in public databases. Researchers with an interest in the function of such repeats may not be fully aware of the multi-level complexities and use genome data without questioning its quality. It is possible but not well documented that numerous publications on repeat numbers, gene duplications or recombination events are based on erroneous data and thus might include wrong evolutionary or functional conclusions. There is no easy solution to this issue and the key purpose of this article is to raise the awareness to the problem, especially amongst end-users of genome and protein databases, but likewise amongst the researchers working on sequencing, assembly and annotation projects that are often not fully aware of the biological importance of the repeat regions that they mis-sequence, mask, or remove. It would be beneficial if deposited data included qualitative and quantitative information on the type of sequencing methods used, the quality of the assembly and of the annotation. We strongly encourage the use of long-read sequencing technologies to better capture the tandem repeats at the sequencing and assembly stages. Specifically, we urge researchers to aim for a sequencing strategy similar to what has been decided for the Vertebrate Genome Project (not published, but partly described in (<a href="#B131" rid="B131" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133046">131</a>) and on <a href="https://www.rockefeller.edu/research/vertebrate-genomes-project/technology-pipeline-and-policies/" data-ga-action="click_feat_suppl" ref="reftype=extlink&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CBody&amp;TO=External%7CLink%7CURI" target="_blank">https://www.rockefeller.edu/research/vertebrate-genomes-project/technology-pipeline-and-policies/</a>), and for Earth Biogenome Project (<a href="#B132" rid="B132" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133032">132</a>). This sequencing strategy should in most cases lead to chromosome level genome assemblies for eukaryotes, where there are few gaps in the sequence and most repeats are resolved. For prokaryotes, substantial coverage in PacBio reads (60&#x000d7;), plus some Illumina reads (50&#x000d7;) and some coverage in very long Nanopore reads as described earlier would likely lead to complete prokaryote genome assemblies (<a href="#B85" rid="B85" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133101">85</a>). It is important that more than one round of polishing with Illumina reads are performed on the assemblies, as that reduces any issues that might stem from the long reads (<a href="#B133" rid="B133" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553618">133</a>,<a href="#B134" rid="B134" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816133023">134</a>). The combination of long and short reads has been shown to be beneficial for resolving tandem repeats in genomes (<a href="#B135" rid="B135" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553617">135</a>), and it should create a better foundation for characterizing large gene families that might be underreported. Recent technological advances by PacBio have enabled circular consensus sequencing of both RNA and DNA, resulting in long (&#x0003e;10 kb), highly accurate (99.8%) reads (<a href="#B136" rid="B136" class=" bibr popnode tag_hotlink tag_tooltip" id="__tag_816553616">136</a>). Wide-spread adoption of these technologies should address most of the issues raised here. While best-practice methods and quality control can improve new datasets that are made available to the research community, it is less clear how to manage the many problems found in existing, deposited data. More work should go into identifying such issues. It would be of great help if databases would allow user comments to deposited items, to alert other users of the problems and to avoid the reiteration of mistakes and misinterpretations. We expect that the wide-spread adaptation of such recommendations is improved by an increased awareness of the challenges associated with TRs within the community of database creators and end-users.</p></div><div id="__sec1" class="tsec sec"><a id="supplementary-material-sec"></a><h2 class="head no_bottom_margin" id="__sec1title" style="text-transform: uppercase;">Supplementary Material</h2><!--/article/body/sec/--><div class="sec suppmat" id="sup1"><h4>gkz841_Supplemental_File</h4><div class="sup-box half_rhythm" id="idm140524553737504"><a href="/pmc/articles/PMC6868369/bin/gkz841_supplemental_file.pdf" data-ga-action="click_feat_suppl">Click here for additional data file.</a><sup>(314K, pdf)</sup></div></div></div><div id="app" class="tsec sec"><h2 class="head no_bottom_margin" id="apptitle" style="text-transform: uppercase;">APPENDIX</h2><!--/article/back/app-group/app/--><div id="app1-1" class="sec sec-first"><h3 id="app1-1title">Glossary</h3><p id="__p44" class="p p-first">
<strong>aDNA:</strong> Ancient DNA. DNA isolated from material that are up to several hundred thousand years old.</p><p id="__p45">
<strong>Contigs:</strong> Sequence assembled from shorter sequencing reads into a contiguous stretch of nucleotides.</p><p id="__p46">
<strong>de Bruijn graph:</strong> One of two main computational approaches (the other is <strong>OLC</strong>) for the assembly of sequencing reads into longer sequences such as contigs. Works by dividing reads into overlapping <em>k</em>-mers. A graph is created with nodes corresponding to <em>k</em>-mers and directional edges connecting overlapping nodes. A traversal of the graph can be output as contigs.</p><p id="__p47">
<strong>GenBank:</strong> One of several databases containing all publicly available DNA sequences.</p><p id="__p48">
<strong>Homorepeat:</strong> Also known as <strong>homopolymer tract</strong>, or <strong>polyX</strong> for amino acids, where X is the repeated residue. A perfect <strong>tandem repeat</strong> with unit size one where all the nucleotides or amino acids are the same.</p><p id="__p49">
<strong>Interspersed repeat:</strong> A motif or pattern that is found in multiple loci across a genome, such as <strong>transposable elements</strong>. In contrast, a <strong>tandem repeat</strong> has the motif or pattern repeated in tandem at one locus.</p><p id="__p50">
<strong><em>K</em>-mer:</strong> A sequence of nucleotides that is <em>k-</em>residues long, such as a 31-mer with 31 nucleotides.</p><p id="__p51">
<strong>LRR: Leucine rich repeats</strong> are amino acid motifs found in many different proteins, often repeated in tandem.</p><p id="__p52">
<strong>NLR: Nod-like receptors</strong> are proteins involved in innate immune response and contains LRRs among other domains.</p><p id="__p53">
<strong>OLC: Overlap-layout-consensus</strong>. One of two main computational approaches (the other is <strong>de Bruijn graph</strong>) for the assembly of sequencing reads into longer sequences such as contigs. Works by finding common sequences in reads (overlaps), and creates a graph where the overlaps are nodes. Traversal of the graph can be output as contigs.</p><p id="__p54">
<strong>Polishing:</strong> The act of mapping reads back to an assembly and recalling the consensus sequence. This is a necessity for assemblies based on PacBio and/or Oxford Nanopore reads, and are often performed in multiple rounds where at least the last couple are done with Illumina reads.</p><p id="__p55">
<strong>Scaffolds:</strong> Contains multiple contigs that are placed into proper order and orientation based on paired reads or other positional information (linked reads, optical maps, linkage maps).</p><p id="__p56">
<strong>Short tandem repeat (STR):</strong> A <strong>tandem repeat</strong> with a unit size shorter than 10 nucleotides.</p><p id="__p57">
<strong>Sequence Read Archive (SRA):</strong> A database of sequencing data and alignment information from high-throughput sequencing platforms such as Illumina, 454 and PacBio among others.</p><p id="__p58">
<strong>Tandem repeat (TR):</strong> A region of DNA or protein where a motif or pattern is repeated in tandem at one locus. The motif or pattern has a size, which is usually called a unit size. For example, the tandem repeat ACACACAC has a unit size of 2. This is in contrast to an <strong>interspersed repeat</strong> where the motif or pattern is found in multiple loci across a genome.</p><p id="__p59">
<strong>Transposable elements (TE):</strong> A class of repetitive elements that often code for their own propagation. Found across the genome as <strong>interspersed repeats</strong>.</p><p id="__p60">
<strong>UniProtKB/Swiss-Prot:</strong> A database of protein sequences that have been manually curated.</p><p id="__p61" class="p p-last">
<strong>VLRs: Variable lymphocyte receptors:</strong> immune genes found in jawless vertebrates, also containing LRRs.</p></div></div><div id="SEC6" class="tsec bk-sec"><h2 class="head no_bottom_margin" id="SEC6title" style="text-transform: uppercase;">SUPPLEMENTARY DATA</h2><!--/article/back/sec/--><p id="__p41" class="p p-first-last">
<a href="https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkz841#supplementary-data" data-ga-action="click_feat_suppl" ref="reftype=extlink&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CBody&amp;TO=External%7CLink%7CURI" target="_blank">Supplementary Data</a> are available at NAR Online.</p></div><div id="SEC7" class="tsec bk-sec"><h2 class="head no_bottom_margin" id="SEC7title" style="text-transform: uppercase;">FUNDING</h2><!--/article/back/sec/--><p id="__p42" class="p p-first">The idea for this article was developed during two consecutive meetings of the EU COST-Action BM1405 &#x02018;Non-globular proteins: from sequence to structure, function and application in molecular physiopathology&#x02019;; Research Council of Norway [251076 to K.S.J.]; institutional funds of the University of Oslo, Faculty of Mathematics and Natural Sciences (to D.L. and B.S.); Institute of Informatics [BK-204/RAU2/2019 to to A.G.]; European Union through the European Social Fund [POWR.03.02.00-00-I029 to P.J.]. Funding for open access charge: Institutional Funds, University of Oslo.</p><p id="__p43" class="p p-last">
<em>Conflict of interest statement</em>. None declared.</p></div><div id="REF1" class="tsec sec"><h2 class="head no_bottom_margin" id="REF1title" style="text-transform: uppercase;">REFERENCES</h2><div class="ref-list-sec sec" id="reference-list"><div class="ref-cit-blk half_rhythm" id="B1">1. <span class="mixed-citation">
Benson D.A., Cavanaugh M., Clark K., Karsch-Mizrachi I., Ostell J., Pruitt K.D., Sayers E.W.
<span class="ref-title">GenBank</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2018; <span class="ref-vol">46</span>:D41&#x02013;D47. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5753231/">PMC free article</a>]</span> [<a href="/pubmed/29140468" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=GenBank&amp;author=D.A.+Benson&amp;author=M.+Cavanaugh&amp;author=K.+Clark&amp;author=I.+Karsch-Mizrachi&amp;author=J.+Ostell&amp;volume=46&amp;publication_year=2018&amp;pages=D41-D47&amp;pmid=29140468&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B2">2. <span class="mixed-citation">
Blackburn E.H., Gall J.G.
<span class="ref-title">A tandemly repeated sequence at the termini of the extrachromosomal ribosomal RNA genes in Tetrahymena</span>. <span class="ref-journal">J. Mol. Biol.</span> 1978; <span class="ref-vol">120</span>:33&#x02013;53. [<a href="/pubmed/642006" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Mol.+Biol.&amp;title=A+tandemly+repeated+sequence+at+the+termini+of+the+extrachromosomal+ribosomal+RNA+genes+in+Tetrahymena&amp;author=E.H.+Blackburn&amp;author=J.G.+Gall&amp;volume=120&amp;publication_year=1978&amp;pages=33-53&amp;pmid=642006&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B3">3. <span class="mixed-citation">
Riethman H., Ambrosini A., Paul S.
<span class="ref-title">Human subtelomere structure and variation</span>. <span class="ref-journal">Chromosome Res.</span> 2005; <span class="ref-vol">13</span>:505&#x02013;515. [<a href="/pubmed/16132815" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Chromosome+Res.&amp;title=Human+subtelomere+structure+and+variation&amp;author=H.+Riethman&amp;author=A.+Ambrosini&amp;author=S.+Paul&amp;volume=13&amp;publication_year=2005&amp;pages=505-515&amp;pmid=16132815&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B4">4. <span class="mixed-citation">
Mehta G.D., Agarwal M.P., Ghosh S.K.
<span class="ref-title">Centromere identity: a challenge to be faced</span>. <span class="ref-journal">Mol. Genet. Genomics</span>. 2010; <span class="ref-vol">284</span>:75&#x02013;94. [<a href="/pubmed/20585957" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Genet.+Genomics&amp;title=Centromere+identity:+a+challenge+to+be+faced&amp;author=G.D.+Mehta&amp;author=M.P.+Agarwal&amp;author=S.K.+Ghosh&amp;volume=284&amp;publication_year=2010&amp;pages=75-94&amp;pmid=20585957&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B5">5. <span class="mixed-citation">
Kidwell M.G.
<span class="ref-title">Transposable elements and the evolution of genome size in eukaryotes</span>. <span class="ref-journal">Genetica</span>. 2002; <span class="ref-vol">115</span>:49&#x02013;63. [<a href="/pubmed/12188048" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genetica&amp;title=Transposable+elements+and+the+evolution+of+genome+size+in+eukaryotes&amp;author=M.G.+Kidwell&amp;volume=115&amp;publication_year=2002&amp;pages=49-63&amp;pmid=12188048&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B6">6. <span class="mixed-citation">
Chalopin D., Naville M., Plard F., Galiana D., Volff J.-N.
<span class="ref-title">Comparative analysis of transposable elements highlights mobilome diversity and evolution in vertebrates</span>. <span class="ref-journal">Genome Biol Evol</span>. 2015; <span class="ref-vol">7</span>:567&#x02013;580. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4350176/">PMC free article</a>]</span> [<a href="/pubmed/25577199" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol+Evol&amp;title=Comparative+analysis+of+transposable+elements+highlights+mobilome+diversity+and+evolution+in+vertebrates&amp;author=D.+Chalopin&amp;author=M.+Naville&amp;author=F.+Plard&amp;author=D.+Galiana&amp;author=J.-N.+Volff&amp;volume=7&amp;publication_year=2015&amp;pages=567-580&amp;pmid=25577199&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B7">7. <span class="mixed-citation">
Litt M., Luty J.A.
<span class="ref-title">A hypervariable microsatellite revealed by in vitro amplification of a dinucleotide repeat within the cardiac muscle actin gene</span>. <span class="ref-journal">Am. J. Hum. Genet.</span> 1989; <span class="ref-vol">44</span>:397&#x02013;401. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC1715430/">PMC free article</a>]</span> [<a href="/pubmed/2563634" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Am.+J.+Hum.+Genet.&amp;title=A+hypervariable+microsatellite+revealed+by+in+vitro+amplification+of+a+dinucleotide+repeat+within+the+cardiac+muscle+actin+gene&amp;author=M.+Litt&amp;author=J.A.+Luty&amp;volume=44&amp;publication_year=1989&amp;pages=397-401&amp;pmid=2563634&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B8">8. <span class="mixed-citation">
Jeffreys A.J., Wilson V., Thein S.L.
<span class="ref-title">Hypervariable &#x02018;minisatellite&#x02019; regions in human DNA</span>. <span class="ref-journal">Nature</span>. 1985; <span class="ref-vol">314</span>:67&#x02013;73. [<a href="/pubmed/3856104" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=Hypervariable+&#x02018;minisatellite&#x02019;+regions+in+human+DNA&amp;author=A.J.+Jeffreys&amp;author=V.+Wilson&amp;author=S.L.+Thein&amp;volume=314&amp;publication_year=1985&amp;pages=67-73&amp;pmid=3856104&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B9">9. <span class="mixed-citation">
Vergnaud G., Denoeud F.
<span class="ref-title">Minisatellites: mutability and genome architecture</span>. <span class="ref-journal">Genome Res.</span> 2000; <span class="ref-vol">10</span>:899&#x02013;907. [<a href="/pubmed/10899139" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=Minisatellites:+mutability+and+genome+architecture&amp;author=G.+Vergnaud&amp;author=F.+Denoeud&amp;volume=10&amp;publication_year=2000&amp;pages=899-907&amp;pmid=10899139&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B10">10. <span class="mixed-citation">
Mayer C., Leese F., Tollrian R.
<span class="ref-title">Genome-wide analysis of tandem repeats in <em>Daphnia pulex</em> - a comparative approach</span>. <span class="ref-journal">BMC Genomics</span>. 2010; <span class="ref-vol">11</span>:277. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3152781/">PMC free article</a>]</span> [<a href="/pubmed/20433735" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Genomics&amp;title=Genome-wide+analysis+of+tandem+repeats+in+Daphnia+pulex+-+a+comparative+approach&amp;author=C.+Mayer&amp;author=F.+Leese&amp;author=R.+Tollrian&amp;volume=11&amp;publication_year=2010&amp;pages=277&amp;pmid=20433735&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B11">11. <span class="mixed-citation">
Zhao Z., Guo C., Sutharzan S., Li P., Echt C.S., Zhang J., Liang C.
<span class="ref-title">Genome-wide analysis of tandem repeats in plants and green algae</span>. <span class="ref-journal">G3</span>. 2014; <span class="ref-vol">4</span>:67&#x02013;78. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3887541/">PMC free article</a>]</span> [<a href="/pubmed/24192840" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=G3&amp;title=Genome-wide+analysis+of+tandem+repeats+in+plants+and+green+algae&amp;author=Z.+Zhao&amp;author=C.+Guo&amp;author=S.+Sutharzan&amp;author=P.+Li&amp;author=C.S.+Echt&amp;volume=4&amp;publication_year=2014&amp;pages=67-78&amp;pmid=24192840&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B12">12. <span class="mixed-citation">
Gymrek M.
<span class="ref-title">A genomic view of short tandem repeats</span>. <span class="ref-journal">Curr. Opin. Genet. Dev.</span> 2017; <span class="ref-vol">44</span>:9&#x02013;16. [<a href="/pubmed/28213161" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Curr.+Opin.+Genet.+Dev.&amp;title=A+genomic+view+of+short+tandem+repeats&amp;author=M.+Gymrek&amp;volume=44&amp;publication_year=2017&amp;pages=9-16&amp;pmid=28213161&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B13">13. <span class="mixed-citation">
DeBolt S.
<span class="ref-title">Copy number variation shapes genome diversity in Arabidopsis over immediate family generational scales</span>. <span class="ref-journal">Genome Biol. Evol.</span> 2010; <span class="ref-vol">2</span>:441&#x02013;453. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2997553/">PMC free article</a>]</span> [<a href="/pubmed/20624746" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.+Evol.&amp;title=Copy+number+variation+shapes+genome+diversity+in+Arabidopsis+over+immediate+family+generational+scales&amp;author=S.+DeBolt&amp;volume=2&amp;publication_year=2010&amp;pages=441-453&amp;pmid=20624746&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B14">14. <span class="mixed-citation">
Press M.O., McCoy R.C., Hall A.N., Akey J.M., Queitsch C.
<span class="ref-title">Massive variation of short tandem repeats with functional consequences across strains of <em>Arabidopsis thaliana</em></span>. <span class="ref-journal">Genome Res.</span> 2018; <span class="ref-vol">28</span>:1169&#x02013;1178. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC6071631/">PMC free article</a>]</span> [<a href="/pubmed/29970452" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=Massive+variation+of+short+tandem+repeats+with+functional+consequences+across+strains+of+Arabidopsis+thaliana&amp;author=M.O.+Press&amp;author=R.C.+McCoy&amp;author=A.N.+Hall&amp;author=J.M.+Akey&amp;author=C.+Queitsch&amp;volume=28&amp;publication_year=2018&amp;pages=1169-1178&amp;pmid=29970452&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B15">15. <span class="mixed-citation">
Chakraborty M., VanKuren N.W., Zhao R., Zhang X., Kalsow S., Emerson J.J.
<span class="ref-title">Hidden genetic variation shapes the structure of functional elements in Drosophila</span>. <span class="ref-journal">Nat. Genet.</span> 2018; <span class="ref-vol">50</span>:20&#x02013;25. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5742068/">PMC free article</a>]</span> [<a href="/pubmed/29255259" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Genet.&amp;title=Hidden+genetic+variation+shapes+the+structure+of+functional+elements+in+Drosophila&amp;author=M.+Chakraborty&amp;author=N.W.+VanKuren&amp;author=R.+Zhao&amp;author=X.+Zhang&amp;author=S.+Kalsow&amp;volume=50&amp;publication_year=2018&amp;pages=20-25&amp;pmid=29255259&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B16">16. <span class="mixed-citation">
1000 Genomes Project Consortium
<span class="ref-title">A global reference for human genetic variation</span>. <span class="ref-journal">Nature</span>. 2015; <span class="ref-vol">526</span>:68&#x02013;74. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4750478/">PMC free article</a>]</span> [<a href="/pubmed/26432245" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=A+global+reference+for+human+genetic+variation&amp;volume=526&amp;publication_year=2015&amp;pages=68-74&amp;pmid=26432245&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B17">17. <span class="mixed-citation">
Futschik A., Schl&#x000f6;tterer C.
<span class="ref-title">The next generation of molecular markers from massively parallel sequencing of pooled DNA samples</span>. <span class="ref-journal">Genetics</span>. 2010; <span class="ref-vol">186</span>:207&#x02013;218. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2940288/">PMC free article</a>]</span> [<a href="/pubmed/20457880" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genetics&amp;title=The+next+generation+of+molecular+markers+from+massively+parallel+sequencing+of+pooled+DNA+samples&amp;author=A.+Futschik&amp;author=C.+Schl&#x000f6;tterer&amp;volume=186&amp;publication_year=2010&amp;pages=207-218&amp;pmid=20457880&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B18">18. <span class="mixed-citation">
Zhou K., Aertsen A., Michiels C.W.
<span class="ref-title">The role of variable DNA tandem repeats in bacterial adaptation</span>. <span class="ref-journal">FEMS Microbiol. Rev.</span> 2014; <span class="ref-vol">38</span>:119&#x02013;141. [<a href="/pubmed/23927439" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=FEMS+Microbiol.+Rev.&amp;title=The+role+of+variable+DNA+tandem+repeats+in+bacterial+adaptation&amp;author=K.+Zhou&amp;author=A.+Aertsen&amp;author=C.W.+Michiels&amp;volume=38&amp;publication_year=2014&amp;pages=119-141&amp;pmid=23927439&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B19">19. <span class="mixed-citation">
Marcotte E.M., Pellegrini M., Yeates T.O., Eisenberg D.
<span class="ref-title">A census of protein repeats</span>. <span class="ref-journal">J. Mol. Biol.</span> 1999; <span class="ref-vol">293</span>:151&#x02013;160. [<a href="/pubmed/10512723" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Mol.+Biol.&amp;title=A+census+of+protein+repeats&amp;author=E.M.+Marcotte&amp;author=M.+Pellegrini&amp;author=T.O.+Yeates&amp;author=D.+Eisenberg&amp;volume=293&amp;publication_year=1999&amp;pages=151-160&amp;pmid=10512723&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B20">20. <span class="mixed-citation">
Pellegrini M.
<span class="ref-title">Tandem repeats in proteins: prediction algorithms and biological role</span>. <span class="ref-journal">Front. Bioeng. Biotechnol.</span> 2015; <span class="ref-vol">3</span>:1536. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4585158/">PMC free article</a>]</span> [<a href="/pubmed/26442257" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Front.+Bioeng.+Biotechnol.&amp;title=Tandem+repeats+in+proteins:+prediction+algorithms+and+biological+role&amp;author=M.+Pellegrini&amp;volume=3&amp;publication_year=2015&amp;pages=1536&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B21">21. <span class="mixed-citation">
Heringa J.
<span class="ref-title">Detection of internal repeats: how common are they</span>. <span class="ref-journal">Curr. Opin. Struct. Biol.</span> 1998; <span class="ref-vol">8</span>:338&#x02013;345. [<a href="/pubmed/9666330" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Curr.+Opin.+Struct.+Biol.&amp;title=Detection+of+internal+repeats:+how+common+are+they&amp;author=J.+Heringa&amp;volume=8&amp;publication_year=1998&amp;pages=338-345&amp;pmid=9666330&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B22">22. <span class="mixed-citation">
Andrade M.A., Ponting C.P., Gibson T.J., Bork P.
<span class="ref-title">Homology-based method for identification of protein repeats using statistical significance estimates</span>. <span class="ref-journal">J. Mol. Biol.</span> 2000; <span class="ref-vol">298</span>:521&#x02013;537. [<a href="/pubmed/10772867" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Mol.+Biol.&amp;title=Homology-based+method+for+identification+of+protein+repeats+using+statistical+significance+estimates&amp;author=M.A.+Andrade&amp;author=C.P.+Ponting&amp;author=T.J.+Gibson&amp;author=P.+Bork&amp;volume=298&amp;publication_year=2000&amp;pages=521-537&amp;pmid=10772867&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B23">23. <span class="mixed-citation">
Schaper E., Gascuel O., Anisimova M.
<span class="ref-title">Deep conservation of human protein tandem repeats within the eukaryotes</span>. <span class="ref-journal">Mol. Biol. Evol.</span> 2014; <span class="ref-vol">31</span>:1132&#x02013;1148. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3995336/">PMC free article</a>]</span> [<a href="/pubmed/24497029" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Biol.+Evol.&amp;title=Deep+conservation+of+human+protein+tandem+repeats+within+the+eukaryotes&amp;author=E.+Schaper&amp;author=O.+Gascuel&amp;author=M.+Anisimova&amp;volume=31&amp;publication_year=2014&amp;pages=1132-1148&amp;pmid=24497029&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B24">24. <span class="mixed-citation">
Schaper E., Kajava A.V., Hauser A., Anisimova M.
<span class="ref-title">Repeat or not repeat?&#x02013;Statistical validation of tandem repeat prediction in genomic sequences</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2012; <span class="ref-vol">40</span>:10005&#x02013;10017. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3488214/">PMC free article</a>]</span> [<a href="/pubmed/22923522" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=Repeat+or+not+repeat?&#x02013;Statistical+validation+of+tandem+repeat+prediction+in+genomic+sequences&amp;author=E.+Schaper&amp;author=A.V.+Kajava&amp;author=A.+Hauser&amp;author=M.+Anisimova&amp;volume=40&amp;publication_year=2012&amp;pages=10005-10017&amp;pmid=22923522&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B25">25. <span class="mixed-citation">
Kushwaha A.K., Grove A.
<span class="ref-title">C-terminal low-complexity sequence repeats of <em>Mycobacterium smegmatis</em> Ku modulate DNA binding</span>. <span class="ref-journal">Biosci. Rep.</span> 2013; <span class="ref-vol">33</span>:175&#x02013;184. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3553676/">PMC free article</a>]</span> [<a href="/pubmed/23167261" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Biosci.+Rep.&amp;title=C-terminal+low-complexity+sequence+repeats+of+Mycobacterium+smegmatis+Ku+modulate+DNA+binding&amp;author=A.K.+Kushwaha&amp;author=A.+Grove&amp;volume=33&amp;publication_year=2013&amp;pages=175-184&amp;pmid=23167261&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B26">26. <span class="mixed-citation">
Rad&#x000f3;-Trilla N., Alb&#x000e0; M.
<span class="ref-title">Dissecting the role of low-complexity regions in the evolution of vertebrate proteins</span>. <span class="ref-journal">BMC Evol. Biol.</span> 2012; <span class="ref-vol">12</span>:155&#x02013;110. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3523016/">PMC free article</a>]</span> [<a href="/pubmed/22920595" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Evol.+Biol.&amp;title=Dissecting+the+role+of+low-complexity+regions+in+the+evolution+of+vertebrate+proteins&amp;author=N.+Rad&#x000f3;-Trilla&amp;author=M.+Alb&#x000e0;&amp;volume=12&amp;publication_year=2012&amp;pages=155-110&amp;pmid=22920595&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B27">27. <span class="mixed-citation">
Jorda J., Kajava A.V.
<span class="ref-title">Protein homorepeats: sequences, structures, evolution, and functions</span>. <span class="ref-journal">Adv. Protein Chem. Struct. Biol.</span> 2010; <span class="ref-vol">79</span>:59&#x02013;88. [<a href="/pubmed/20621281" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Adv.+Protein+Chem.+Struct.+Biol.&amp;title=Protein+homorepeats:+sequences,+structures,+evolution,+and+functions&amp;author=J.+Jorda&amp;author=A.V.+Kajava&amp;volume=79&amp;publication_year=2010&amp;pages=59-88&amp;pmid=20621281&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B28">28. <span class="mixed-citation">
Mularoni L., Ledda A., Toll-Riera M., Alb&#x000e0; M.M.
<span class="ref-title">Natural selection drives the accumulation of amino acid tandem repeats in human proteins</span>. <span class="ref-journal">Genome Res.</span> 2010; <span class="ref-vol">20</span>:745&#x02013;754. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2877571/">PMC free article</a>]</span> [<a href="/pubmed/20335526" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=Natural+selection+drives+the+accumulation+of+amino+acid+tandem+repeats+in+human+proteins&amp;author=L.+Mularoni&amp;author=A.+Ledda&amp;author=M.+Toll-Riera&amp;author=M.M.+Alb&#x000e0;&amp;volume=20&amp;publication_year=2010&amp;pages=745-754&amp;pmid=20335526&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B29">29. <span class="mixed-citation">
Mier P., Andrade-Navarro M.A.
<span class="ref-title">Glutamine codon usage and polyQ evolution in primates depend on the Q stretch length</span>. <span class="ref-journal">Genome Biol Evol</span>. 2018; <span class="ref-vol">10</span>:816&#x02013;825. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5841385/">PMC free article</a>]</span> [<a href="/pubmed/29608721" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol+Evol&amp;title=Glutamine+codon+usage+and+polyQ+evolution+in+primates+depend+on+the+Q+stretch+length&amp;author=P.+Mier&amp;author=M.A.+Andrade-Navarro&amp;volume=10&amp;publication_year=2018&amp;pages=816-825&amp;pmid=29608721&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B30">30. <span class="mixed-citation">
Mier P., Andrade-Navarro M.A.
<span class="ref-title">dAPE: a web server to detect homorepeats and follow their evolution</span>. <span class="ref-journal">Bioinformatics</span>. 2017; <span class="ref-vol">33</span>:1221&#x02013;1223. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5408840/">PMC free article</a>]</span> [<a href="/pubmed/28031183" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioinformatics&amp;title=dAPE:+a+web+server+to+detect+homorepeats+and+follow+their+evolution&amp;author=P.+Mier&amp;author=M.A.+Andrade-Navarro&amp;volume=33&amp;publication_year=2017&amp;pages=1221-1223&amp;pmid=28031183&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B31">31. <span class="mixed-citation">
Lobanov M.Y., Sokolovskiy I.V., Galzitskaya O.V.
<span class="ref-title">HRaP: database of occurrence of HomoRepeats and patterns in proteomes</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2014; <span class="ref-vol">42</span>:D273&#x02013;D278. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3965023/">PMC free article</a>]</span> [<a href="/pubmed/24150944" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=HRaP:+database+of+occurrence+of+HomoRepeats+and+patterns+in+proteomes&amp;author=M.Y.+Lobanov&amp;author=I.V.+Sokolovskiy&amp;author=O.V.+Galzitskaya&amp;volume=42&amp;publication_year=2014&amp;pages=D273-D278&amp;pmid=24150944&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B32">32. <span class="mixed-citation">
Tompa P.
<span class="ref-title">Intrinsically unstructured proteins evolve by repeat expansion</span>. <span class="ref-journal">Bioessays</span>. 2003; <span class="ref-vol">25</span>:847&#x02013;855. [<a href="/pubmed/12938174" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioessays&amp;title=Intrinsically+unstructured+proteins+evolve+by+repeat+expansion&amp;author=P.+Tompa&amp;volume=25&amp;publication_year=2003&amp;pages=847-855&amp;pmid=12938174&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B33">33. <span class="mixed-citation">
Simon M., Hancock J.M.
<span class="ref-title">Tandem and cryptic amino acid repeats accumulate in disordered regions of proteins</span>. <span class="ref-journal">Genome Biol.</span> 2009; <span class="ref-vol">10</span>:R59. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2718493/">PMC free article</a>]</span> [<a href="/pubmed/19486509" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.&amp;title=Tandem+and+cryptic+amino+acid+repeats+accumulate+in+disordered+regions+of+proteins&amp;author=M.+Simon&amp;author=J.M.+Hancock&amp;volume=10&amp;publication_year=2009&amp;pages=R59&amp;pmid=19486509&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B34">34. <span class="mixed-citation">
Jorda J., Xue B., Uversky V.N., Kajava A.V.
<span class="ref-title">Protein tandem repeats&#x02014;the more perfect, the less structured</span>. <span class="ref-journal">FEBS J.</span> 2010; <span class="ref-vol">277</span>:2673&#x02013;2682. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2928880/">PMC free article</a>]</span> [<a href="/pubmed/20553501" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=FEBS+J.&amp;title=Protein+tandem+repeats&#x02014;the+more+perfect,+the+less+structured&amp;author=J.+Jorda&amp;author=B.+Xue&amp;author=V.N.+Uversky&amp;author=A.V.+Kajava&amp;volume=277&amp;publication_year=2010&amp;pages=2673-2682&amp;pmid=20553501&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B35">35. <span class="mixed-citation">
Kajava A.V.
<span class="ref-title">Tandem repeats in proteins: From sequence to structure</span>. <span class="ref-journal">J. Struct. Biol.</span> 2012; <span class="ref-vol">179</span>:279&#x02013;288. [<a href="/pubmed/21884799" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Struct.+Biol.&amp;title=Tandem+repeats+in+proteins:+From+sequence+to+structure&amp;author=A.V.+Kajava&amp;volume=179&amp;publication_year=2012&amp;pages=279-288&amp;pmid=21884799&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B36">36. <span class="mixed-citation">
Paladin L., Hirsh L., Piovesan D., Andrade-Navarro M.A., Kajava A.V., Tosatto S.C.E.
<span class="ref-title">RepeatsDB 2.0: improved annotation, classification, search and visualization of repeat protein structures</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2017; <span class="ref-vol">45</span>:D308&#x02013;D312. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5210593/">PMC free article</a>]</span> [<a href="/pubmed/27899671" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=RepeatsDB+2.0:+improved+annotation,+classification,+search+and+visualization+of+repeat+protein+structures&amp;author=L.+Paladin&amp;author=L.+Hirsh&amp;author=D.+Piovesan&amp;author=M.A.+Andrade-Navarro&amp;author=A.V.+Kajava&amp;volume=45&amp;publication_year=2017&amp;pages=D308-D312&amp;pmid=27899671&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B37">37. <span class="mixed-citation">
Schaper E., Anisimova M.
<span class="ref-title">The evolution and function of protein tandem repeats in plants</span>. <span class="ref-journal">New Phytol.</span> 2015; <span class="ref-vol">206</span>:397&#x02013;410. [<a href="/pubmed/25420631" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=New+Phytol.&amp;title=The+evolution+and+function+of+protein+tandem+repeats+in+plants&amp;author=E.+Schaper&amp;author=M.+Anisimova&amp;volume=206&amp;publication_year=2015&amp;pages=397-410&amp;pmid=25420631&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B38">38. <span class="mixed-citation">
Kajava A.V., Anisimova M., Peeters N.
<span class="ref-title">Origin and evolution of GALA-LRR, a new member of the CC-LRR subfamily: from plants to bacteria</span>. <span class="ref-journal">PLoS One</span>. 2008; <span class="ref-vol">3</span>:e1694. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2244805/">PMC free article</a>]</span> [<a href="/pubmed/18301771" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+One&amp;title=Origin+and+evolution+of+GALA-LRR,+a+new+member+of+the+CC-LRR+subfamily:+from+plants+to+bacteria&amp;author=A.V.+Kajava&amp;author=M.+Anisimova&amp;author=N.+Peeters&amp;volume=3&amp;publication_year=2008&amp;pages=e1694&amp;pmid=18301771&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B39">39. <span class="mixed-citation">
Szalkowski A.M., Anisimova M.
<span class="ref-title">Graph-based modeling of tandem repeats improves global multiple sequence alignment</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2013; <span class="ref-vol">41</span>:e162. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3783189/">PMC free article</a>]</span> [<a href="/pubmed/23877246" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=Graph-based+modeling+of+tandem+repeats+improves+global+multiple+sequence+alignment&amp;author=A.M.+Szalkowski&amp;author=M.+Anisimova&amp;volume=41&amp;publication_year=2013&amp;pages=e162&amp;pmid=23877246&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B40">40. <span class="mixed-citation">
Verstrepen K.J., Jansen A., Lewitter F., Fink G.R.
<span class="ref-title">Intragenic tandem repeats generate functional variability</span>. <span class="ref-journal">Nat. Genet.</span> 2005; <span class="ref-vol">37</span>:986&#x02013;990. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC1462868/">PMC free article</a>]</span> [<a href="/pubmed/16086015" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Genet.&amp;title=Intragenic+tandem+repeats+generate+functional+variability&amp;author=K.J.+Verstrepen&amp;author=A.+Jansen&amp;author=F.+Lewitter&amp;author=G.R.+Fink&amp;volume=37&amp;publication_year=2005&amp;pages=986-990&amp;pmid=16086015&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B41">41. <span class="mixed-citation">
Kashi Y., King D.G.
<span class="ref-title">Simple sequence repeats as advantageous mutators in evolution</span>. <span class="ref-journal">Trends Genet.</span> 2006; <span class="ref-vol">22</span>:253&#x02013;259. [<a href="/pubmed/16567018" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Trends+Genet.&amp;title=Simple+sequence+repeats+as+advantageous+mutators+in+evolution&amp;author=Y.+Kashi&amp;author=D.G.+King&amp;volume=22&amp;publication_year=2006&amp;pages=253-259&amp;pmid=16567018&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B42">42. <span class="mixed-citation">
Sutherland G.R., Richards R.I.
<span class="ref-title">Simple tandem DNA repeats and human genetic disease</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 1995; <span class="ref-vol">92</span>:3636&#x02013;3641. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC42017/">PMC free article</a>]</span> [<a href="/pubmed/7731957" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Simple+tandem+DNA+repeats+and+human+genetic+disease&amp;author=G.R.+Sutherland&amp;author=R.I.+Richards&amp;volume=92&amp;publication_year=1995&amp;pages=3636-3641&amp;pmid=7731957&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B43">43. <span class="mixed-citation">
Bentley D.R., Balasubramanian S., Swerdlow H.P., Smith G.P., Milton J., Brown C.G., Hall K.P., Evers D.J., Barnes C.L., Bignell H.R. et al. .
<span class="ref-title">Accurate whole human genome sequencing using reversible terminator chemistry</span>. <span class="ref-journal">Nature</span>. 2008; <span class="ref-vol">456</span>:53&#x02013;59. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2581791/">PMC free article</a>]</span> [<a href="/pubmed/18987734" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=Accurate+whole+human+genome+sequencing+using+reversible+terminator+chemistry&amp;author=D.R.+Bentley&amp;author=S.+Balasubramanian&amp;author=H.P.+Swerdlow&amp;author=G.P.+Smith&amp;author=J.+Milton&amp;volume=456&amp;publication_year=2008&amp;pages=53-59&amp;pmid=18987734&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B44">44. <span class="mixed-citation">
Glenn T.C.
<span class="ref-title">Field guide to next&#x02010;generation DNA sequencers</span>. <span class="ref-journal">Mol. Ecol. Resour.</span> 2011; <span class="ref-vol">11</span>:759&#x02013;769. [<a href="/pubmed/21592312" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Ecol.+Resour.&amp;title=Field+guide+to+next&#x02010;generation+DNA+sequencers&amp;author=T.C.+Glenn&amp;volume=11&amp;publication_year=2011&amp;pages=759-769&amp;pmid=21592312&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B45">45. <span class="mixed-citation">
Eid J., Fehr A., Gray J., Luong K., Lyle J., Otto G., Peluso P., Rank D., Baybayan P., Bettman B. et al. .
<span class="ref-title">Real-time DNA sequencing from single polymerase molecules</span>. <span class="ref-journal">Science</span>. 2009; <span class="ref-vol">323</span>:133&#x02013;138. [<a href="/pubmed/19023044" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Science&amp;title=Real-time+DNA+sequencing+from+single+polymerase+molecules&amp;author=J.+Eid&amp;author=A.+Fehr&amp;author=J.+Gray&amp;author=K.+Luong&amp;author=J.+Lyle&amp;volume=323&amp;publication_year=2009&amp;pages=133-138&amp;pmid=19023044&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B46">46. <span class="mixed-citation">
Olasagasti F., Lieberman K.R., Benner S., Cherf G.M., Dahl J.M., Deamer D.W., Akeson M.
<span class="ref-title">Replication of individual DNA molecules under electronic control using a protein nanopore</span>. <span class="ref-journal">Nat. Nanotechnol.</span> 2010; <span class="ref-vol">5</span>:798&#x02013;806. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3711841/">PMC free article</a>]</span> [<a href="/pubmed/20871614" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Nanotechnol.&amp;title=Replication+of+individual+DNA+molecules+under+electronic+control+using+a+protein+nanopore&amp;author=F.+Olasagasti&amp;author=K.R.+Lieberman&amp;author=S.+Benner&amp;author=G.M.+Cherf&amp;author=J.M.+Dahl&amp;volume=5&amp;publication_year=2010&amp;pages=798-806&amp;pmid=20871614&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B47">47. <span class="mixed-citation">
Rhoads A., Au K.F.
<span class="ref-title">PacBio sequencing and its applications</span>. <span class="ref-journal">Genomics Proteomics Bioinformatics</span>. 2015; <span class="ref-vol">13</span>:278&#x02013;289. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4678779/">PMC free article</a>]</span> [<a href="/pubmed/26542840" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genomics+Proteomics+Bioinformatics&amp;title=PacBio+sequencing+and+its+applications&amp;author=A.+Rhoads&amp;author=K.F.+Au&amp;volume=13&amp;publication_year=2015&amp;pages=278-289&amp;pmid=26542840&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B48">48. <span class="mixed-citation">
Weirather J.L., de&#x000a0;Cesare M., Wang Y., Piazza P., Sebastiano V., Wang X.-J., Buck D., Au K.F.
<span class="ref-title">Comprehensive comparison of Pacific Biosciences and Oxford Nanopore Technologies and their applications to transcriptome analysis [version 2; peer review: 2 approved]</span>. <span class="ref-journal">F1000Research</span>. 2017; <span class="ref-vol">6</span>:100. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5553090/">PMC free article</a>]</span> [<a href="/pubmed/28868132" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=F1000Research&amp;title=Comprehensive+comparison+of+Pacific+Biosciences+and+Oxford+Nanopore+Technologies+and+their+applications+to+transcriptome+analysis+[version+2;+peer+review:+2+approved]&amp;author=J.L.+Weirather&amp;author=M.+de&#x000a0;Cesare&amp;author=Y.+Wang&amp;author=P.+Piazza&amp;author=V.+Sebastiano&amp;volume=6&amp;publication_year=2017&amp;pages=100&amp;pmid=28868132&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B49">49. <span class="mixed-citation">
Balzer S., Malde K., Lanz&#x000e9;n A., Sharma A., Jonassen I.
<span class="ref-title">Characteristics of 454 pyrosequencing data&#x02013;enabling realistic simulation with flowsim</span>. <span class="ref-journal">Bioinformatics</span>. 2010; <span class="ref-vol">26</span>:i420&#x02013;i425. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2935434/">PMC free article</a>]</span> [<a href="/pubmed/20823302" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioinformatics&amp;title=Characteristics+of+454+pyrosequencing+data&#x02013;enabling+realistic+simulation+with+flowsim&amp;author=S.+Balzer&amp;author=K.+Malde&amp;author=A.+Lanz&#x000e9;n&amp;author=A.+Sharma&amp;author=I.+Jonassen&amp;volume=26&amp;publication_year=2010&amp;pages=i420-i425&amp;pmid=20823302&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B50">50. <span class="mixed-citation">
Bragg L.M., Stone G., Butler M.K., Hugenholtz P., Tyson G.W.
<span class="ref-title">Shining a light on dark sequencing: characterising errors in ion torrent PGM data</span>. <span class="ref-journal">PLoS Comp. Biol.</span> 2013; <span class="ref-vol">9</span>:e1003031. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3623719/">PMC free article</a>]</span> [<a href="/pubmed/23592973" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+Comp.+Biol.&amp;title=Shining+a+light+on+dark+sequencing:+characterising+errors+in+ion+torrent+PGM+data&amp;author=L.M.+Bragg&amp;author=G.+Stone&amp;author=M.K.+Butler&amp;author=P.+Hugenholtz&amp;author=G.W.+Tyson&amp;volume=9&amp;publication_year=2013&amp;pages=e1003031&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B51">51. <span class="mixed-citation">
Luo C., Tsementzi D., Kyrpides N., Read T., Konstantinidis K.T.
<span class="ref-title">Direct comparisons of Illumina vs. Roche 454 sequencing technologies on the same microbial community DNA sample</span>. <span class="ref-journal">PLoS One</span>. 2012; <span class="ref-vol">7</span>:e30087. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3277595/">PMC free article</a>]</span> [<a href="/pubmed/22347999" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+One&amp;title=Direct+comparisons+of+Illumina+vs.+Roche+454+sequencing+technologies+on+the+same+microbial+community+DNA+sample&amp;author=C.+Luo&amp;author=D.+Tsementzi&amp;author=N.+Kyrpides&amp;author=T.+Read&amp;author=K.T.+Konstantinidis&amp;volume=7&amp;publication_year=2012&amp;pages=e30087&amp;pmid=22347999&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B52">52. <span class="mixed-citation">
Zerbino D.R., Birney E.
<span class="ref-title">Velvet: Algorithms for de novo short read assembly using de Bruijn graphs</span>. <span class="ref-journal">Genome Res.</span> 2008; <span class="ref-vol">18</span>:821&#x02013;829. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2336801/">PMC free article</a>]</span> [<a href="/pubmed/18349386" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=Velvet:+Algorithms+for+de+novo+short+read+assembly+using+de+Bruijn+graphs&amp;author=D.R.+Zerbino&amp;author=E.+Birney&amp;volume=18&amp;publication_year=2008&amp;pages=821-829&amp;pmid=18349386&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B53">53. <span class="mixed-citation">
Gnerre S., Maccallum I., Przybylski D., Ribeiro F.J., Burton J.N., Walker B.J., Sharpe T., Hall G., Shea T.P., Sykes S. et al. .
<span class="ref-title">High-quality draft assemblies of mammalian genomes from massively parallel sequence data</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 2011; <span class="ref-vol">108</span>:1513&#x02013;1518. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3029755/">PMC free article</a>]</span> [<a href="/pubmed/21187386" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=High-quality+draft+assemblies+of+mammalian+genomes+from+massively+parallel+sequence+data&amp;author=S.+Gnerre&amp;author=I.+Maccallum&amp;author=D.+Przybylski&amp;author=F.J.+Ribeiro&amp;author=J.N.+Burton&amp;volume=108&amp;publication_year=2011&amp;pages=1513-1518&amp;pmid=21187386&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B54">54. <span class="mixed-citation">
Bankevich A., Nurk S., Antipov D., Gurevich A.A., Dvorkin M., Kulikov A.S., Lesin V.M., Nikolenko S.I., Pham S., Prjibelski A.D. et al. .
<span class="ref-title">SPAdes: a new genome assembly algorithm and its applications to single-cell sequencing</span>. <span class="ref-journal">J. Comput. Biol.</span> 2012; <span class="ref-vol">19</span>:455&#x02013;477. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3342519/">PMC free article</a>]</span> [<a href="/pubmed/22506599" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Comput.+Biol.&amp;title=SPAdes:+a+new+genome+assembly+algorithm+and+its+applications+to+single-cell+sequencing&amp;author=A.+Bankevich&amp;author=S.+Nurk&amp;author=D.+Antipov&amp;author=A.A.+Gurevich&amp;author=M.+Dvorkin&amp;volume=19&amp;publication_year=2012&amp;pages=455-477&amp;pmid=22506599&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B55">55. <span class="mixed-citation">
Souvorov A., Agarwala R., Lipman D.J.
<span class="ref-title">SKESA: strategic k-mer extension for scrupulous assemblies</span>. <span class="ref-journal">Genome Biol.</span> 2018; <span class="ref-vol">19</span>:1&#x02013;13. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC6172800/">PMC free article</a>]</span> [<a href="/pubmed/30286803" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.&amp;title=SKESA:+strategic+k-mer+extension+for+scrupulous+assemblies&amp;author=A.+Souvorov&amp;author=R.+Agarwala&amp;author=D.J.+Lipman&amp;volume=19&amp;publication_year=2018&amp;pages=1-13&amp;pmid=29301551&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B56">56. <span class="mixed-citation">
Myers E.W., Sutton G.G., Delcher A.L., Dew I.M., Fasulo D.P., Flanigan M.J., Kravitz S.A., Mobarry C.M., Reinert K.H., Remington K.A. et al. .
<span class="ref-title">A whole-genome assembly of <em>Drosophila</em></span>. <span class="ref-journal">Science</span>. 2000; <span class="ref-vol">287</span>:2196&#x02013;2204. [<a href="/pubmed/10731133" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Science&amp;title=A+whole-genome+assembly+of+Drosophila&amp;author=E.W.+Myers&amp;author=G.G.+Sutton&amp;author=A.L.+Delcher&amp;author=I.M.+Dew&amp;author=D.P.+Fasulo&amp;volume=287&amp;publication_year=2000&amp;pages=2196-2204&amp;pmid=10731133&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B57">57. <span class="mixed-citation">
Miller J.R., Koren S., Sutton G.G.
<span class="ref-title">Assembly algorithms for next-generation sequencing data</span>. <span class="ref-journal">Genomics</span>. 2010; <span class="ref-vol">95</span>:315&#x02013;327. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2874646/">PMC free article</a>]</span> [<a href="/pubmed/20211242" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genomics&amp;title=Assembly+algorithms+for+next-generation+sequencing+data&amp;author=J.R.+Miller&amp;author=S.+Koren&amp;author=G.G.+Sutton&amp;volume=95&amp;publication_year=2010&amp;pages=315-327&amp;pmid=20211242&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B58">58. <span class="mixed-citation">
Treangen T.J., Salzberg S.L.
<span class="ref-title">Repetitive DNA and next-generation sequencing: computational challenges and solutions</span>. <span class="ref-journal">Nat. Rev. Genet</span>. 2012; <span class="ref-vol">13</span>:36&#x02013;46. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3324860/">PMC free article</a>]</span> [<a href="/pubmed/22124482" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Rev.+Genet&amp;title=Repetitive+DNA+and+next-generation+sequencing:+computational+challenges+and+solutions&amp;author=T.J.+Treangen&amp;author=S.L.+Salzberg&amp;volume=13&amp;publication_year=2012&amp;pages=36-46&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B59">59. <span class="mixed-citation">
Sotero-Caio C.G., Platt R.N., Suh A., Ray D.A.
<span class="ref-title">Evolution and diversity of transposable elements in vertebrate genomes</span>. <span class="ref-journal">Genome Biol. Evol.</span> 2017; <span class="ref-vol">9</span>:161&#x02013;177. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5381603/">PMC free article</a>]</span> [<a href="/pubmed/28158585" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.+Evol.&amp;title=Evolution+and+diversity+of+transposable+elements+in+vertebrate+genomes&amp;author=C.G.+Sotero-Caio&amp;author=R.N.+Platt&amp;author=A.+Suh&amp;author=D.A.+Ray&amp;volume=9&amp;publication_year=2017&amp;pages=161-177&amp;pmid=28158585&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B60">60. <span class="mixed-citation">
Elliott T.A., Gregory T.R.
<span class="ref-title">What's in a genome? The C-value enigma and the evolution of eukaryotic genome content</span>. <span class="ref-journal">Philos. Trans. R Soc. Lond, B, Biol Sci</span>. 2015; <span class="ref-vol">370</span>:20140331. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4571570/">PMC free article</a>]</span> [<a href="/pubmed/26323762" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Philos.+Trans.+R+Soc.+Lond,+B,+Biol+Sci&amp;title=What's+in+a+genome?+The+C-value+enigma+and+the+evolution+of+eukaryotic+genome+content&amp;author=T.A.+Elliott&amp;author=T.R.+Gregory&amp;volume=370&amp;publication_year=2015&amp;pages=20140331&amp;pmid=26323762&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B61">61. <span class="mixed-citation">
Liljegren M.M., de&#x000a0;Muinck E.J., Trosvik P.
<span class="ref-title">Microsatellite length scoring by single molecule real time sequencing - effects of sequence structure and PCR regime</span>. <span class="ref-journal">PLoS One</span>. 2016; <span class="ref-vol">11</span>:e0159232. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4945053/">PMC free article</a>]</span> [<a href="/pubmed/27414800" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+One&amp;title=Microsatellite+length+scoring+by+single+molecule+real+time+sequencing+-+effects+of+sequence+structure+and+PCR+regime&amp;author=M.M.+Liljegren&amp;author=E.J.+de&#x000a0;Muinck&amp;author=P.+Trosvik&amp;volume=11&amp;publication_year=2016&amp;pages=e0159232&amp;pmid=27414800&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B62">62. <span class="mixed-citation">
T&#x000f8;rresen O.K., Star B., Jentoft S., Reinar W.B., Grove H., Miller J.R., Walenz B.P., Knight J., Ekholm J.M., Peluso P. et al. .
<span class="ref-title">An improved genome assembly uncovers prolific tandem repeats in Atlantic cod</span>. <span class="ref-journal">BMC Genomics</span>. 2017; <span class="ref-vol">18</span>:95. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5241972/">PMC free article</a>]</span> [<a href="/pubmed/28100185" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Genomics&amp;title=An+improved+genome+assembly+uncovers+prolific+tandem+repeats+in+Atlantic+cod&amp;author=O.K.+T&#x000f8;rresen&amp;author=B.+Star&amp;author=S.+Jentoft&amp;author=W.B.+Reinar&amp;author=H.+Grove&amp;volume=18&amp;publication_year=2017&amp;pages=95&amp;pmid=28100185&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B63">63. <span class="mixed-citation">
Adams R.H., Blackmon H., Reyes-Velasco J., Schield D.R., Card D.C., Andrew A.L., Waynewood N., Castoe T.A.
<span class="ref-title">Microsatellite landscape evolutionary dynamics across 450 million years of vertebrate genome evolution</span>. <span class="ref-journal">Genome</span>. 2016; <span class="ref-vol">59</span>:295&#x02013;310. [<a href="/pubmed/27064176" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome&amp;title=Microsatellite+landscape+evolutionary+dynamics+across+450+million+years+of+vertebrate+genome+evolution&amp;author=R.H.+Adams&amp;author=H.+Blackmon&amp;author=J.+Reyes-Velasco&amp;author=D.R.+Schield&amp;author=D.C.+Card&amp;volume=59&amp;publication_year=2016&amp;pages=295-310&amp;pmid=27064176&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B64">64. <span class="mixed-citation">
Jiang Q., Li Q., Yu H., Kong L.
<span class="ref-title">Genome-wide analysis of simple sequence repeats in marine animals-a comparative approach</span>. <span class="ref-journal">Mar. Biotechnol.</span> 2014; <span class="ref-vol">16</span>:604&#x02013;619. [<a href="/pubmed/24939717" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mar.+Biotechnol.&amp;title=Genome-wide+analysis+of+simple+sequence+repeats+in+marine+animals-a+comparative+approach&amp;author=Q.+Jiang&amp;author=Q.+Li&amp;author=H.+Yu&amp;author=L.+Kong&amp;volume=16&amp;publication_year=2014&amp;pages=604-619&amp;pmid=24939717&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B65">65. <span class="mixed-citation">
Star B., Hansen M.H., Skage M., Bradbury I.R., Godiksen J.A., Kjesbu O.S., Jentoft S.
<span class="ref-title">Preferential amplification of repetitive DNA during whole genome sequencing library creation from historic samples</span>. <span class="ref-journal">Sci. Technol. Archaeol. Res.</span> 2016; <span class="ref-vol">2</span>:36&#x02013;45. <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Sci.+Technol.+Archaeol.+Res.&amp;title=Preferential+amplification+of+repetitive+DNA+during+whole+genome+sequencing+library+creation+from+historic+samples&amp;author=B.+Star&amp;author=M.H.+Hansen&amp;author=M.+Skage&amp;author=I.R.+Bradbury&amp;author=J.A.+Godiksen&amp;volume=2&amp;publication_year=2016&amp;pages=36-45&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B66">66. <span class="mixed-citation">
Star B., Nederbragt A.J., Jentoft S., Grimholt U., Malmstr&#x000f8;m M., Gregers T.F., Rounge T.B., Paulsen J., Solbakken M.H., Sharma A. et al. .
<span class="ref-title">The genome sequence of Atlantic cod reveals a unique immune system</span>. <span class="ref-journal">Nature</span>. 2011; <span class="ref-vol">477</span>:207&#x02013;210. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3537168/">PMC free article</a>]</span> [<a href="/pubmed/21832995" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=The+genome+sequence+of+Atlantic+cod+reveals+a+unique+immune+system&amp;author=B.+Star&amp;author=A.J.+Nederbragt&amp;author=S.+Jentoft&amp;author=U.+Grimholt&amp;author=M.+Malmstr&#x000f8;m&amp;volume=477&amp;publication_year=2011&amp;pages=207-210&amp;pmid=21832995&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B67">67. <span class="mixed-citation">
Zhuang X., Yang C., Fevolden S.-E., Cheng C.-H.
<span class="ref-title">Protein genes in repetitive sequence&#x02014;antifreeze glycoproteins in Atlantic cod genome</span>. <span class="ref-journal">BMC Genomics</span>. 2012; <span class="ref-vol">13</span>:293. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3441883/">PMC free article</a>]</span> [<a href="/pubmed/22747999" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Genomics&amp;title=Protein+genes+in+repetitive+sequence&#x02014;antifreeze+glycoproteins+in+Atlantic+cod+genome&amp;author=X.+Zhuang&amp;author=C.+Yang&amp;author=S.-E.+Fevolden&amp;author=C.-H.+Cheng&amp;volume=13&amp;publication_year=2012&amp;pages=293&amp;pmid=22747999&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B68">68. <span class="mixed-citation">
Belser C., Istace B., Denis E., Dubarry M., Baurens F.-C., Falentin C., Genete M., Berrabah W., Ch&#x000e8;vre A.-M., Delourme R. et al. .
<span class="ref-title">Chromosome-scale assemblies of plant genomes using nanopore long reads and optical maps</span>. <span class="ref-journal">Nat. Plants</span>. 2018; <span class="ref-vol">4</span>:879&#x02013;887. [<a href="/pubmed/30390080" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Plants&amp;title=Chromosome-scale+assemblies+of+plant+genomes+using+nanopore+long+reads+and+optical+maps&amp;author=C.+Belser&amp;author=B.+Istace&amp;author=E.+Denis&amp;author=M.+Dubarry&amp;author=F.-C.+Baurens&amp;volume=4&amp;publication_year=2018&amp;pages=879-887&amp;pmid=30390080&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B69">69. <span class="mixed-citation">
Hommelsheim C.M., Frantzeskakis L., Huang M., &#x000dc;lker B.
<span class="ref-title">PCR amplification of repetitive DNA: a limitation to genome editing technologies and many other applications</span>. <span class="ref-journal">Sci. Rep.</span> 2014; <span class="ref-vol">4</span>:5052. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4031481/">PMC free article</a>]</span> [<a href="/pubmed/24852006" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Sci.+Rep.&amp;title=PCR+amplification+of+repetitive+DNA:+a+limitation+to+genome+editing+technologies+and+many+other+applications&amp;author=C.M.+Hommelsheim&amp;author=L.+Frantzeskakis&amp;author=M.+Huang&amp;author=B.+&#x000dc;lker&amp;volume=4&amp;publication_year=2014&amp;pages=5052&amp;pmid=24852006&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B70">70. <span class="mixed-citation">
Hurles M.
<span class="ref-title">Gene duplication: the genomic trade in spare parts</span>. <span class="ref-journal">PLoS Biol.</span> 2004; <span class="ref-vol">2</span>:e206. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC449868/">PMC free article</a>]</span> [<a href="/pubmed/15252449" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+Biol.&amp;title=Gene+duplication:+the+genomic+trade+in+spare+parts&amp;author=M.+Hurles&amp;volume=2&amp;publication_year=2004&amp;pages=e206&amp;pmid=15252449&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B71">71. <span class="mixed-citation">
Hardison R.C.
<span class="ref-title">Evolution of hemoglobin and its genes</span>. <span class="ref-journal">Cold Spring Harb. Perspect. Med.</span> 2012; <span class="ref-vol">2</span>:a011627. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3543078/">PMC free article</a>]</span> [<a href="/pubmed/23209182" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Cold+Spring+Harb.+Perspect.+Med.&amp;title=Evolution+of+hemoglobin+and+its+genes&amp;author=R.C.+Hardison&amp;volume=2&amp;publication_year=2012&amp;pages=a011627&amp;pmid=23209182&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B72">72. <span class="mixed-citation">
Opazo J.C., Butts G.T., Nery M.F., Storz J.F., Hoffmann F.G.
<span class="ref-title">Whole-genome duplication and the functional diversification of teleost fish hemoglobins</span>. <span class="ref-journal">Mol. Biol. Evol.</span> 2013; <span class="ref-vol">30</span>:140&#x02013;153. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3525417/">PMC free article</a>]</span> [<a href="/pubmed/22949522" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Biol.+Evol.&amp;title=Whole-genome+duplication+and+the+functional+diversification+of+teleost+fish+hemoglobins&amp;author=J.C.+Opazo&amp;author=G.T.+Butts&amp;author=M.F.+Nery&amp;author=J.F.+Storz&amp;author=F.G.+Hoffmann&amp;volume=30&amp;publication_year=2013&amp;pages=140-153&amp;pmid=22949522&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B73">73. <span class="mixed-citation">
Baalsrud H.T., Voje K.L., T&#x000f8;rresen O.K., Solbakken M.H., Matschiner M., Malmstr&#x000f8;m M., Hanel R., Salzburger W., Jakobsen K.S., Jentoft S.
<span class="ref-title">Evolution of hemoglobin genes in codfishes influenced by ocean depth</span>. <span class="ref-journal">Sci. Rep.</span> 2017; <span class="ref-vol">7</span>:7956. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5554263/">PMC free article</a>]</span> [<a href="/pubmed/28801564" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Sci.+Rep.&amp;title=Evolution+of+hemoglobin+genes+in+codfishes+influenced+by+ocean+depth&amp;author=H.T.+Baalsrud&amp;author=K.L.+Voje&amp;author=O.K.+T&#x000f8;rresen&amp;author=M.H.+Solbakken&amp;author=M.+Matschiner&amp;volume=7&amp;publication_year=2017&amp;pages=7956&amp;pmid=28801564&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B74">74. <span class="mixed-citation">
T&#x000f8;rresen O.K., Brieuc M.S.O., Solbakken M.H., S&#x000f8;rhus E., Nederbragt A.J., Jakobsen K.S., Meier S., Edvardsen R.B., Jentoft S.
<span class="ref-title">Genomic architecture of haddock (<em>Melanogrammus aeglefinus</em>) shows expansions of innate immune genes and short tandem repeats</span>. <span class="ref-journal">BMC Genomics</span>. 2018; <span class="ref-vol">19</span>:240. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5894186/">PMC free article</a>]</span> [<a href="/pubmed/29636006" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Genomics&amp;title=Genomic+architecture+of+haddock+(Melanogrammus+aeglefinus)+shows+expansions+of+innate+immune+genes+and+short+tandem+repeats&amp;author=O.K.+T&#x000f8;rresen&amp;author=M.S.O.+Brieuc&amp;author=M.H.+Solbakken&amp;author=E.+S&#x000f8;rhus&amp;author=A.J.+Nederbragt&amp;volume=19&amp;publication_year=2018&amp;pages=240&amp;pmid=29636006&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B75">75. <span class="mixed-citation">
Stein C., Caccamo M., Laird G., Leptin M.
<span class="ref-title">Conservation and divergence of gene families encoding components of innate immune response systems in zebrafish</span>. <span class="ref-journal">Genome Biol.</span> 2007; <span class="ref-vol">8</span>:R251. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2258186/">PMC free article</a>]</span> [<a href="/pubmed/18039395" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.&amp;title=Conservation+and+divergence+of+gene+families+encoding+components+of+innate+immune+response+systems+in+zebrafish&amp;author=C.+Stein&amp;author=M.+Caccamo&amp;author=G.+Laird&amp;author=M.+Leptin&amp;volume=8&amp;publication_year=2007&amp;pages=R251&amp;pmid=18039395&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B76">76. <span class="mixed-citation">
Howe K., Schiffer P.H., Zielinski J., Wiehe T., Laird G.K., Marioni J.C., Soylemez O., Kondrashov F., Leptin M.
<span class="ref-title">Structure and evolutionary history of a large family of NLR proteins in the zebrafish</span>. <span class="ref-journal">Open Biol.</span> 2016; <span class="ref-vol">6</span>:160009. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4852459/">PMC free article</a>]</span> [<a href="/pubmed/27248802" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Open+Biol.&amp;title=Structure+and+evolutionary+history+of+a+large+family+of+NLR+proteins+in+the+zebrafish&amp;author=K.+Howe&amp;author=P.H.+Schiffer&amp;author=J.+Zielinski&amp;author=T.+Wiehe&amp;author=G.K.+Laird&amp;volume=6&amp;publication_year=2016&amp;pages=160009&amp;pmid=27248802&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B77">77. <span class="mixed-citation">
Matsushima N., Takatsuka S., Miyashita H., Kretsinger R.H.
<span class="ref-title">Leucine rich repeat proteins: sequences, mutations, structures and diseases</span>. <span class="ref-journal">PPL</span>. 2019; <span class="ref-vol">26</span>:108&#x02013;131. [<a href="/pubmed/30526451" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PPL&amp;title=Leucine+rich+repeat+proteins:+sequences,+mutations,+structures+and+diseases&amp;author=N.+Matsushima&amp;author=S.+Takatsuka&amp;author=H.+Miyashita&amp;author=R.H.+Kretsinger&amp;volume=26&amp;publication_year=2019&amp;pages=108-131&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B78">78. <span class="mixed-citation">
Boehm T., McCurley N., Sutoh Y., Schorpp M., Kasahara M., Cooper M.D.
<span class="ref-title">VLR-based adaptive immunity</span>. <span class="ref-journal">Annu. Rev. Immunol.</span> 2012; <span class="ref-vol">30</span>:203&#x02013;220. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3526378/">PMC free article</a>]</span> [<a href="/pubmed/22224775" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Annu.+Rev.+Immunol.&amp;title=VLR-based+adaptive+immunity&amp;author=T.+Boehm&amp;author=N.+McCurley&amp;author=Y.+Sutoh&amp;author=M.+Schorpp&amp;author=M.+Kasahara&amp;volume=30&amp;publication_year=2012&amp;pages=203-220&amp;pmid=22224775&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B79">79. <span class="mixed-citation">
Das S., Hirano M., Aghaallaei N., Bajoghli B., Boehm T., Cooper M.D.
<span class="ref-title">Organization of lamprey variable lymphocyte receptor C locus and repertoire development</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 2013; <span class="ref-vol">110</span>:6043&#x02013;6048. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3625321/">PMC free article</a>]</span> [<a href="/pubmed/23487799" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Organization+of+lamprey+variable+lymphocyte+receptor+C+locus+and+repertoire+development&amp;author=S.+Das&amp;author=M.+Hirano&amp;author=N.+Aghaallaei&amp;author=B.+Bajoghli&amp;author=T.+Boehm&amp;volume=110&amp;publication_year=2013&amp;pages=6043-6048&amp;pmid=23487799&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B80">80. <span class="mixed-citation">
Smith J.J., Timoshevskaya N., Ye C., Holt C., Keinath M.C., Parker H.J., Cook M.E., Hess J.E., Narum S.R., Lamanna F. et al. .
<span class="ref-title">The sea lamprey germline genome provides insights into programmed genome rearrangement and vertebrate evolution</span>. <span class="ref-journal">Nat. Genet.</span> 2018; <span class="ref-vol">50</span>:270&#x02013;277. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5805609/">PMC free article</a>]</span> [<a href="/pubmed/29358652" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Genet.&amp;title=The+sea+lamprey+germline+genome+provides+insights+into+programmed+genome+rearrangement+and+vertebrate+evolution&amp;author=J.J.+Smith&amp;author=N.+Timoshevskaya&amp;author=C.+Ye&amp;author=C.+Holt&amp;author=M.C.+Keinath&amp;volume=50&amp;publication_year=2018&amp;pages=270-277&amp;pmid=29358652&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B81">81. <span class="mixed-citation">
Wrobel A., Ottoni C., Leo J.C., Gulla S., Linke D.
<span class="ref-title">The repeat structure of two paralogous genes, <em>Yersinia ruckeri</em> invasin (<em>yrInv</em>) and a &#x02018;<em>Y. ruckeri</em> invasin-like molecule&#x02019;, (<em>yrIlm</em>) sheds light on the evolution of adhesive capacities of a fish pathogen</span>. <span class="ref-journal">J. Struct. Biol.</span> 2018; <span class="ref-vol">201</span>:171&#x02013;183. [<a href="/pubmed/28888816" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Struct.+Biol.&amp;title=The+repeat+structure+of+two+paralogous+genes,+Yersinia+ruckeri+invasin+(yrInv)+and+a+&#x02018;Y.+ruckeri+invasin-like+molecule&#x02019;,+(yrIlm)+sheds+light+on+the+evolution+of+adhesive+capacities+of+a+fish+pathogen&amp;author=A.+Wrobel&amp;author=C.+Ottoni&amp;author=J.C.+Leo&amp;author=S.+Gulla&amp;author=D.+Linke&amp;volume=201&amp;publication_year=2018&amp;pages=171-183&amp;pmid=28888816&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B82">82. <span class="mixed-citation">
Franzen O., Jerlstr&#x000f6;m-Hultqvist J., Castro E., Sherwood E., Ankarklev J., Reiner D.S., Palm D., Andersson J.O., Andersson B., Sv&#x000e4;rd S.G.
<span class="ref-title">Draft genome sequencing of giardia intestinalis assemblage B isolate GS: is human giardiasis caused by two different species</span>. <span class="ref-journal">PLoS Pathog.</span> 2009; <span class="ref-vol">5</span>:e1000560. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2723961/">PMC free article</a>]</span> [<a href="/pubmed/19696920" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+Pathog.&amp;title=Draft+genome+sequencing+of+giardia+intestinalis+assemblage+B+isolate+GS:+is+human+giardiasis+caused+by+two+different+species&amp;author=O.+Franzen&amp;author=J.+Jerlstr&#x000f6;m-Hultqvist&amp;author=E.+Castro&amp;author=E.+Sherwood&amp;author=J.+Ankarklev&amp;volume=5&amp;publication_year=2009&amp;pages=e1000560&amp;pmid=19696920&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B83">83. <span class="mixed-citation">
Khatri I., Tomar R., Ganesan K., Prasad G.S., Subramanian S.
<span class="ref-title">Complete genome sequence and comparative genomics of the probiotic yeast <em>Saccharomyces boulardii</em></span>. <span class="ref-journal">Sci. Rep.</span> 2017; <span class="ref-vol">7</span>:371. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5428479/">PMC free article</a>]</span> [<a href="/pubmed/28336969" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Sci.+Rep.&amp;title=Complete+genome+sequence+and+comparative+genomics+of+the+probiotic+yeast+Saccharomyces+boulardii&amp;author=I.+Khatri&amp;author=R.+Tomar&amp;author=K.+Ganesan&amp;author=G.S.+Prasad&amp;author=S.+Subramanian&amp;volume=7&amp;publication_year=2017&amp;pages=371&amp;pmid=28336969&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B84">84. <span class="mixed-citation">
Romero V., Hosomichi K., Nakaoka H., Shibata H., Inoue I.
<span class="ref-title">Structure and evolution of the filaggrin gene repeated region in primates</span>. <span class="ref-journal">BMC Evol. Biol.</span> 2017; <span class="ref-vol">17</span>:10. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5225520/">PMC free article</a>]</span> [<a href="/pubmed/28077068" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Evol.+Biol.&amp;title=Structure+and+evolution+of+the+filaggrin+gene+repeated+region+in+primates&amp;author=V.+Romero&amp;author=K.+Hosomichi&amp;author=H.+Nakaoka&amp;author=H.+Shibata&amp;author=I.+Inoue&amp;volume=17&amp;publication_year=2017&amp;pages=10&amp;pmid=28077068&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B85">85. <span class="mixed-citation">
Schmid M., Frei D., Patrignani A., Schlapbach R., Frey J.E., Remus-Emsermann M.N.P., Ahrens C.H.
<span class="ref-title">Pushing the limits of de novo genome assembly for complex prokaryotic genomes harboring very long, near identical repeats</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2018; <span class="ref-vol">46</span>:8953&#x02013;8965. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC6158609/">PMC free article</a>]</span> [<a href="/pubmed/30137508" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=Pushing+the+limits+of+de+novo+genome+assembly+for+complex+prokaryotic+genomes+harboring+very+long,+near+identical+repeats&amp;author=M.+Schmid&amp;author=D.+Frei&amp;author=A.+Patrignani&amp;author=R.+Schlapbach&amp;author=J.E.+Frey&amp;volume=46&amp;publication_year=2018&amp;pages=8953-8965&amp;pmid=30137508&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B86">86. <span class="mixed-citation">
Guo S., Stevens C.A., Vance T.D.R., Olijve L.L.C., Graham L.A., Campbell R.L., Yazdi S.R., Escobedo C., Bar-Dolev M., Yashunsky V. et al. .
<span class="ref-title">Structure of a 1.5-MDa adhesin that binds its Antarctic bacterium to diatoms and ice</span>. <span class="ref-journal">Sci. Adv.</span> 2017; <span class="ref-vol">3</span>:e1701440. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5550230/">PMC free article</a>]</span> [<a href="/pubmed/28808685" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Sci.+Adv.&amp;title=Structure+of+a+1.5-MDa+adhesin+that+binds+its+Antarctic+bacterium+to+diatoms+and+ice&amp;author=S.+Guo&amp;author=C.A.+Stevens&amp;author=T.D.R.+Vance&amp;author=L.L.C.+Olijve&amp;author=L.A.+Graham&amp;volume=3&amp;publication_year=2017&amp;pages=e1701440&amp;pmid=28808685&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B87">87. <span class="mixed-citation">
Guo S., Garnham C.P., Whitney J.C., Graham L.A., Davies P.L.
<span class="ref-title">Re-evaluation of a bacterial antifreeze protein as an adhesin with ice-binding activity</span>. <span class="ref-journal">PLoS One</span>. 2012; <span class="ref-vol">7</span>:e48805. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3492233/">PMC free article</a>]</span> [<a href="/pubmed/23144980" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=PLoS+One&amp;title=Re-evaluation+of+a+bacterial+antifreeze+protein+as+an+adhesin+with+ice-binding+activity&amp;author=S.+Guo&amp;author=C.P.+Garnham&amp;author=J.C.+Whitney&amp;author=L.A.+Graham&amp;author=P.L.+Davies&amp;volume=7&amp;publication_year=2012&amp;pages=e48805&amp;pmid=23144980&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B88">88. <span class="mixed-citation">
St&#x000e5;lhammar-Carlemalm M., Areschoug T., Larsson C., Lindahl G.
<span class="ref-title">The R28 protein of Streptococcus pyogenes is related to several group B streptococcal surface proteins, confers protective immunity and promotes binding to human epithelial cells</span>. <span class="ref-journal">Mol. Microbiol.</span> 1999; <span class="ref-vol">33</span>:208&#x02013;219. [<a href="/pubmed/10411737" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Microbiol.&amp;title=The+R28+protein+of+Streptococcus+pyogenes+is+related+to+several+group+B+streptococcal+surface+proteins,+confers+protective+immunity+and+promotes+binding+to+human+epithelial+cells&amp;author=M.+St&#x000e5;lhammar-Carlemalm&amp;author=T.+Areschoug&amp;author=C.+Larsson&amp;author=G.+Lindahl&amp;volume=33&amp;publication_year=1999&amp;pages=208-219&amp;pmid=10411737&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B89">89. <span class="mixed-citation">
Roche F.M., Massey R., Peacock S.J., Day N.P.J., Visai L., Speziale Pietro, Lam A., Pallen M., Foster T.J.
<span class="ref-title">Characterization of novel LPXTG-containing proteins of Staphylococcus aureus identified from genome sequences</span>. <span class="ref-journal">Microbiology</span>. 2003; <span class="ref-vol">149</span>:643&#x02013;654. [<a href="/pubmed/12634333" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Microbiology&amp;title=Characterization+of+novel+LPXTG-containing+proteins+of+Staphylococcus+aureus+identified+from+genome+sequences&amp;author=F.M.+Roche&amp;author=R.+Massey&amp;author=S.J.+Peacock&amp;author=N.P.J.+Day&amp;author=L.+Visai&amp;volume=149&amp;publication_year=2003&amp;pages=643-654&amp;pmid=12634333&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B90">90. <span class="mixed-citation">
Anisimova M., Pe&#x0010d;erska J., Schaper E.
<span class="ref-title">Statistical approaches to detecting and analyzing tandem repeats in genomic sequences</span>. <span class="ref-journal">Front. Bioeng. Biotechnol.</span> 2015; <span class="ref-vol">3</span>:31. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4362331/">PMC free article</a>]</span> [<a href="/pubmed/25853125" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Front.+Bioeng.+Biotechnol.&amp;title=Statistical+approaches+to+detecting+and+analyzing+tandem+repeats+in+genomic+sequences&amp;author=M.+Anisimova&amp;author=J.+Pe&#x0010d;erska&amp;author=E.+Schaper&amp;volume=3&amp;publication_year=2015&amp;pages=31&amp;pmid=25853125&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B91">91. <span class="mixed-citation">
Schaper E., Korsunsky A., Pe&#x0010d;erska J., Messina A., Murri R., Stockinger H., Zoller S., Xenarios I., Anisimova M.
<span class="ref-title">TRAL: tandem repeat annotation library</span>. <span class="ref-journal">Bioinformatics</span>. 2015; <span class="ref-vol">31</span>:3051&#x02013;3053. [<a href="/pubmed/25987568" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioinformatics&amp;title=TRAL:+tandem+repeat+annotation+library&amp;author=E.+Schaper&amp;author=A.+Korsunsky&amp;author=J.+Pe&#x0010d;erska&amp;author=A.+Messina&amp;author=R.+Murri&amp;volume=31&amp;publication_year=2015&amp;pages=3051-3053&amp;pmid=25987568&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B92">92. <span class="mixed-citation">
Conesa A., Madrigal P., Tarazona S., Gomez-Cabrero D., Cervera A., McPherson A., Szcze&#x0015b;niak M.W., Gaffney D.J., Elo L.L., Zhang X. et al. .
<span class="ref-title">A survey of best practices for RNA-seq data analysis</span>. <span class="ref-journal">Genome Biol.</span> 2016; <span class="ref-vol">17</span>:13. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4728800/">PMC free article</a>]</span> [<a href="/pubmed/26813401" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.&amp;title=A+survey+of+best+practices+for+RNA-seq+data+analysis&amp;author=A.+Conesa&amp;author=P.+Madrigal&amp;author=S.+Tarazona&amp;author=D.+Gomez-Cabrero&amp;author=A.+Cervera&amp;volume=17&amp;publication_year=2016&amp;pages=13&amp;pmid=26813401&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B93">93. <span class="mixed-citation">
Yandell M., Ence D.
<span class="ref-title">A beginner's guide to eukaryotic genome annotation</span>. <span class="ref-journal">Nat. Rev. Genet</span>. 2012; <span class="ref-vol">13</span>:329&#x02013;342. [<a href="/pubmed/22510764" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Rev.+Genet&amp;title=A+beginner's+guide+to+eukaryotic+genome+annotation&amp;author=M.+Yandell&amp;author=D.+Ence&amp;volume=13&amp;publication_year=2012&amp;pages=329-342&amp;pmid=22510764&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B94">94. <span class="mixed-citation">
Hoff K.J., Stanke M.
<span class="ref-title">Current methods for automated annotation of protein-coding genes</span>. <span class="ref-journal">Curr.Opin. Insect. Sci.</span> 2015; <span class="ref-vol">7</span>:8&#x02013;14. <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Curr.Opin.+Insect.+Sci.&amp;title=Current+methods+for+automated+annotation+of+protein-coding+genes&amp;author=K.J.+Hoff&amp;author=M.+Stanke&amp;volume=7&amp;publication_year=2015&amp;pages=8-14&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B95">95. <span class="mixed-citation">
Bergman C.M., Quesneville H.
<span class="ref-title">Discovering and detecting transposable elements in genome sequences</span>. <span class="ref-journal">Brief. Bioinform.</span> 2007; <span class="ref-vol">8</span>:382&#x02013;392. [<a href="/pubmed/17932080" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Brief.+Bioinform.&amp;title=Discovering+and+detecting+transposable+elements+in+genome+sequences&amp;author=C.M.+Bergman&amp;author=H.+Quesneville&amp;volume=8&amp;publication_year=2007&amp;pages=382-392&amp;pmid=17932080&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B96">96. <span class="mixed-citation">
Stanke M., Diekhans M., Baertsch R., Haussler D.
<span class="ref-title">Using native and syntenically mapped cDNA alignments to improve de novo gene finding</span>. <span class="ref-journal">Bioinformatics</span>. 2008; <span class="ref-vol">24</span>:637&#x02013;644. [<a href="/pubmed/18218656" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioinformatics&amp;title=Using+native+and+syntenically+mapped+cDNA+alignments+to+improve+de+novo+gene+finding&amp;author=M.+Stanke&amp;author=M.+Diekhans&amp;author=R.+Baertsch&amp;author=D.+Haussler&amp;volume=24&amp;publication_year=2008&amp;pages=637-644&amp;pmid=18218656&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B97">97. <span class="mixed-citation">
Lomsadze A., Ter-Hovhannisyan V., Chernoff Y.O., Borodovsky M.
<span class="ref-title">Gene identification in novel eukaryotic genomes by self-training algorithm</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2005; <span class="ref-vol">33</span>:6494&#x02013;6506. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC1298918/">PMC free article</a>]</span> [<a href="/pubmed/16314312" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=Gene+identification+in+novel+eukaryotic+genomes+by+self-training+algorithm&amp;author=A.+Lomsadze&amp;author=V.+Ter-Hovhannisyan&amp;author=Y.O.+Chernoff&amp;author=M.+Borodovsky&amp;volume=33&amp;publication_year=2005&amp;pages=6494-6506&amp;pmid=16314312&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B98">98. <span class="mixed-citation">
Grabherr M.G., Haas B.J., Yassour M., Levin J.Z., Thompson D.A., Amit I., Adiconis X., Fan L., Raychowdhury R., Zeng Q. et al. .
<span class="ref-title">Full-length transcriptome assembly from RNA-Seq data without a reference genome</span>. <span class="ref-journal">Nat. Biotechnol.</span> 2011; <span class="ref-vol">29</span>:644&#x02013;652. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3571712/">PMC free article</a>]</span> [<a href="/pubmed/21572440" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Biotechnol.&amp;title=Full-length+transcriptome+assembly+from+RNA-Seq+data+without+a+reference+genome&amp;author=M.G.+Grabherr&amp;author=B.J.+Haas&amp;author=M.+Yassour&amp;author=J.Z.+Levin&amp;author=D.A.+Thompson&amp;volume=29&amp;publication_year=2011&amp;pages=644-652&amp;pmid=21572440&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B99">99. <span class="mixed-citation">
Pertea M., Kim D., Pertea G.M., Leek J.T., Salzberg S.L.
<span class="ref-title">Transcript-level expression analysis of RNA-seq experiments with HISAT, StringTie and Ballgown</span>. <span class="ref-journal">Nat. Protoc.</span> 2016; <span class="ref-vol">11</span>:1650&#x02013;1667. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5032908/">PMC free article</a>]</span> [<a href="/pubmed/27560171" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Protoc.&amp;title=Transcript-level+expression+analysis+of+RNA-seq+experiments+with+HISAT,+StringTie+and+Ballgown&amp;author=M.+Pertea&amp;author=D.+Kim&amp;author=G.M.+Pertea&amp;author=J.T.+Leek&amp;author=S.L.+Salzberg&amp;volume=11&amp;publication_year=2016&amp;pages=1650-1667&amp;pmid=27560171&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B100">100. <span class="mixed-citation">
Gonzalez-Garay M.L.
<span class="ref-title">Introduction to isoform sequencing using pacific biosciences technology (Iso-Seq)</span>. <span class="ref-journal">Transcriptomics and Gene Regulation, Translational Bioinformatics</span>. 2016; <span class="ref-vol">9</span>:Dordrecht: Springer; 141&#x02013;160. <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?title=Transcriptomics+and+Gene+Regulation,+Translational+Bioinformatics&amp;author=M.L.+Gonzalez-Garay&amp;publication_year=2016&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B101">101. <span class="mixed-citation">
UniProt Consortium
<span class="ref-title">UniProt: a hub for protein information</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2015; <span class="ref-vol">43</span>:D204&#x02013;D212. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4384041/">PMC free article</a>]</span> [<a href="/pubmed/25348405" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=UniProt:+a+hub+for+protein+information&amp;volume=43&amp;publication_year=2015&amp;pages=D204-D212&amp;pmid=25348405&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B102">102. <span class="mixed-citation">
Campbell M.S., Law M., Holt C., Stein J.C., Moghe G.D., Hufnagel D.E., Lei J., Achawanantakun R., Jiao D., Lawrence C.J. et al. .
<span class="ref-title">MAKER-P: a tool kit for the rapid creation, management, and quality control of plant genome annotations</span>. <span class="ref-journal">Plant Physiol.</span> 2014; <span class="ref-vol">164</span>:513&#x02013;524. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3912085/">PMC free article</a>]</span> [<a href="/pubmed/24306534" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Plant+Physiol.&amp;title=MAKER-P:+a+tool+kit+for+the+rapid+creation,+management,+and+quality+control+of+plant+genome+annotations&amp;author=M.S.+Campbell&amp;author=M.+Law&amp;author=C.+Holt&amp;author=J.C.+Stein&amp;author=G.D.+Moghe&amp;volume=164&amp;publication_year=2014&amp;pages=513-524&amp;pmid=24306534&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B103">103. <span class="mixed-citation">
Holt C., Yandell M.
<span class="ref-title">MAKER2: an annotation pipeline and genome-database management tool for second-generation genome projects</span>. <span class="ref-journal">BMC Bioinformatics</span>. 2011; <span class="ref-vol">12</span>:491. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3280279/">PMC free article</a>]</span> [<a href="/pubmed/22192575" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Bioinformatics&amp;title=MAKER2:+an+annotation+pipeline+and+genome-database+management+tool+for+second-generation+genome+projects&amp;author=C.+Holt&amp;author=M.+Yandell&amp;volume=12&amp;publication_year=2011&amp;pages=491&amp;pmid=22192575&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B104">104. <span class="mixed-citation">
Haas B.J., Salzberg S.L., Zhu W., Pertea M., Allen J.E., Orvis J., White O., Buell C.R., Wortman J.R.
<span class="ref-title">Automated eukaryotic gene structure annotation using EVidenceModeler and the program to assemble spliced alignments</span>. <span class="ref-journal">Genome Biol.</span> 2008; <span class="ref-vol">9</span>:R7. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC2395244/">PMC free article</a>]</span> [<a href="/pubmed/18190707" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Biol.&amp;title=Automated+eukaryotic+gene+structure+annotation+using+EVidenceModeler+and+the+program+to+assemble+spliced+alignments&amp;author=B.J.+Haas&amp;author=S.L.+Salzberg&amp;author=W.+Zhu&amp;author=M.+Pertea&amp;author=J.E.+Allen&amp;volume=9&amp;publication_year=2008&amp;pages=R7&amp;pmid=18190707&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B105">105. <span class="mixed-citation">
Jones P., Binns D., Chang H.-Y., Fraser M., Li W., McAnulla C., McWilliam H., Maslen J., Mitchell A., Nuka G. et al. .
<span class="ref-title">InterProScan 5: genome-scale protein function classification</span>. <span class="ref-journal">Bioinformatics</span>. 2014; <span class="ref-vol">30</span>:1236&#x02013;1240. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3998142/">PMC free article</a>]</span> [<a href="/pubmed/24451626" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Bioinformatics&amp;title=InterProScan+5:+genome-scale+protein+function+classification&amp;author=P.+Jones&amp;author=D.+Binns&amp;author=H.-Y.+Chang&amp;author=M.+Fraser&amp;author=W.+Li&amp;volume=30&amp;publication_year=2014&amp;pages=1236-1240&amp;pmid=24451626&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B106">106. <span class="mixed-citation">
Mier P., Paladin L., Tamana S., Petrosian S., Hajdu-Solt&#x000e9;sz B., Urbanek A., Gruca A., Plewczynski D., Grynberg M., Bernad&#x000f3; P. et al. .
<span class="ref-title">Disentangling the complexity of low complexity proteins</span>. <span class="ref-journal">Brief. Bioinform.</span> 2019; <span class="ref-vol">27</span>:331. <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Brief.+Bioinform.&amp;title=Disentangling+the+complexity+of+low+complexity+proteins&amp;author=P.+Mier&amp;author=L.+Paladin&amp;author=S.+Tamana&amp;author=S.+Petrosian&amp;author=B.+Hajdu-Solt&#x000e9;sz&amp;volume=27&amp;publication_year=2019&amp;pages=331&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B107">107. <span class="mixed-citation">
Chen L., DeVries A.L., Cheng C.-H.C.
<span class="ref-title">Evolution of antifreeze glycoprotein gene from a trypsinogen gene in Antarctic notothenioid fish</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 1997; <span class="ref-vol">94</span>:3811&#x02013;3816. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC20523/">PMC free article</a>]</span> [<a href="/pubmed/9108060" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Evolution+of+antifreeze+glycoprotein+gene+from+a+trypsinogen+gene+in+Antarctic+notothenioid+fish&amp;author=L.+Chen&amp;author=A.L.+DeVries&amp;author=C.-H.C.+Cheng&amp;volume=94&amp;publication_year=1997&amp;pages=3811-3816&amp;pmid=9108060&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B108">108. <span class="mixed-citation">
Chen L., DeVries A.L., Cheng C.-H.C.
<span class="ref-title">Convergent evolution of antifreeze glycoproteins in Antarctic notothenioid fish and Arctic cod</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 1997; <span class="ref-vol">94</span>:3817&#x02013;3822. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC20524/">PMC free article</a>]</span> [<a href="/pubmed/9108061" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Convergent+evolution+of+antifreeze+glycoproteins+in+Antarctic+notothenioid+fish+and+Arctic+cod&amp;author=L.+Chen&amp;author=A.L.+DeVries&amp;author=C.-H.C.+Cheng&amp;volume=94&amp;publication_year=1997&amp;pages=3817-3822&amp;pmid=9108061&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B109">109. <span class="mixed-citation">
Baalsrud H.T., T&#x000f8;rresen O.K., Hongr&#x000f8;Solbakken M., Salzburger W., Hanel R., Jakobsen K.S., Jentoft S.
<span class="ref-title">De novo gene evolution of antifreeze glycoproteins in codfishes revealed by whole genome sequence data</span>. <span class="ref-journal">Mol. Biol. Evol.</span> 2017; <span class="ref-vol">35</span>:593&#x02013;606. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5850335/">PMC free article</a>]</span> [<a href="/pubmed/29216381" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Biol.+Evol.&amp;title=De+novo+gene+evolution+of+antifreeze+glycoproteins+in+codfishes+revealed+by+whole+genome+sequence+data&amp;author=H.T.+Baalsrud&amp;author=O.K.+T&#x000f8;rresen&amp;author=M.+Hongr&#x000f8;Solbakken&amp;author=W.+Salzburger&amp;author=R.+Hanel&amp;volume=35&amp;publication_year=2017&amp;pages=593-606&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B110">110. <span class="mixed-citation">
Zakin M.M., Duchange N., Ferrara P., Cohen G.N.
<span class="ref-title">Nucleotide sequence of the metL gene of Escherichia coli. Its product, the bifunctional aspartokinase ii-homoserine dehydrogenase II, and the bifunctional product of the thrA gene, aspartokinase I-homoserine dehydrogenase I, derive from a common ancestor</span>. <span class="ref-journal">J. Biol. Chem.</span> 1983; <span class="ref-vol">258</span>:3028&#x02013;3031. [<a href="/pubmed/6298218" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Biol.+Chem.&amp;title=Nucleotide+sequence+of+the+metL+gene+of+Escherichia+coli.+Its+product,+the+bifunctional+aspartokinase+ii-homoserine+dehydrogenase+II,+and+the+bifunctional+product+of+the+thrA+gene,+aspartokinase+I-homoserine+dehydrogenase+I,+derive+from+a+common+ancestor&amp;author=M.M.+Zakin&amp;author=N.+Duchange&amp;author=P.+Ferrara&amp;author=G.N.+Cohen&amp;volume=258&amp;publication_year=1983&amp;pages=3028-3031&amp;pmid=6298218&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B111">111. <span class="mixed-citation">
Ferone R., Roland S.
<span class="ref-title">Dihydrofolate reductase: thymidylate synthase, a bifunctional polypeptide from Crithidia fasciculata</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 1980; <span class="ref-vol">77</span>:5802&#x02013;5806. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC350159/">PMC free article</a>]</span> [<a href="/pubmed/6934511" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Dihydrofolate+reductase:+thymidylate+synthase,+a+bifunctional+polypeptide+from+Crithidia+fasciculata&amp;author=R.+Ferone&amp;author=S.+Roland&amp;volume=77&amp;publication_year=1980&amp;pages=5802-5806&amp;pmid=6934511&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B112">112. <span class="mixed-citation">
Marcotte E.M., Pellegrini M., Thompson M.J., Yeates T.O., Eisenberg D.
<span class="ref-title">A combined algorithm for genome-wide prediction of protein function</span>. <span class="ref-journal">Nature</span>. 1999; <span class="ref-vol">402</span>:83&#x02013;86. [<a href="/pubmed/10573421" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=A+combined+algorithm+for+genome-wide+prediction+of+protein+function&amp;author=E.M.+Marcotte&amp;author=M.+Pellegrini&amp;author=M.J.+Thompson&amp;author=T.O.+Yeates&amp;author=D.+Eisenberg&amp;volume=402&amp;publication_year=1999&amp;pages=83-86&amp;pmid=10573421&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B113">113. <span class="mixed-citation">
Enright A.J., Iliopoulos I., Kyrpides N.C., Ouzounis C.A.
<span class="ref-title">Protein interaction maps for complete genomes based on gene fusion events</span>. <span class="ref-journal">Nature</span>. 1999; <span class="ref-vol">402</span>:86&#x02013;90. [<a href="/pubmed/10573422" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nature&amp;title=Protein+interaction+maps+for+complete+genomes+based+on+gene+fusion+events&amp;author=A.J.+Enright&amp;author=I.+Iliopoulos&amp;author=N.C.+Kyrpides&amp;author=C.A.+Ouzounis&amp;volume=402&amp;publication_year=1999&amp;pages=86-90&amp;pmid=10573422&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B114">114. <span class="mixed-citation">
Zhao X., Oh S.-H., Coleman D.A., Hoyer L.L.
<span class="ref-title">ALS51, a newly discovered gene in the Candida albicans ALS family, created by intergenic recombination: analysis of the gene and protein, and implications for evolution of microbial gene families</span>. <span class="ref-journal">FEMS Immunol. Med. Microbiol.</span> 2011; <span class="ref-vol">61</span>:245&#x02013;257. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3842030/">PMC free article</a>]</span> [<a href="/pubmed/21208290" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=FEMS+Immunol.+Med.+Microbiol.&amp;title=ALS51,+a+newly+discovered+gene+in+the+Candida+albicans+ALS+family,+created+by+intergenic+recombination:+analysis+of+the+gene+and+protein,+and+implications+for+evolution+of+microbial+gene+families&amp;author=X.+Zhao&amp;author=S.-H.+Oh&amp;author=D.A.+Coleman&amp;author=L.L.+Hoyer&amp;volume=61&amp;publication_year=2011&amp;pages=245-257&amp;pmid=21208290&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B115">115. <span class="mixed-citation">
Nagy A., Szl&#x000e1;ma G., Szarka E., Trexler M., B&#x000e1;nyai L., Patthy L.
<span class="ref-title">Reassessing domain architecture evolution of metazoan proteins: major impact of gene prediction errors</span>. <span class="ref-journal">Genes (Basel)</span>. 2011; <span class="ref-vol">2</span>:449&#x02013;501. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC3927609/">PMC free article</a>]</span> [<a href="/pubmed/24710207" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genes+(Basel)&amp;title=Reassessing+domain+architecture+evolution+of+metazoan+proteins:+major+impact+of+gene+prediction+errors&amp;author=A.+Nagy&amp;author=G.+Szl&#x000e1;ma&amp;author=E.+Szarka&amp;author=M.+Trexler&amp;author=L.+B&#x000e1;nyai&amp;volume=2&amp;publication_year=2011&amp;pages=449-501&amp;pmid=24710207&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B116">116. <span class="mixed-citation">
Promponas V.J., Iliopoulos I., Ouzounis C.A.
<span class="ref-title">Annotation inconsistencies beyond sequence similarity-based function prediction&#x02014;phylogeny and genome structure</span>. <span class="ref-journal">Standards Genomic Sci.</span> 2015; <span class="ref-vol">10</span>:108. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC4653902/">PMC free article</a>]</span> [<a href="/pubmed/26594309" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Standards+Genomic+Sci.&amp;title=Annotation+inconsistencies+beyond+sequence+similarity-based+function+prediction&#x02014;phylogeny+and+genome+structure&amp;author=V.J.+Promponas&amp;author=I.+Iliopoulos&amp;author=C.A.+Ouzounis&amp;volume=10&amp;publication_year=2015&amp;pages=108&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B117">117. <span class="mixed-citation">
Jurka J., Walichiewicz J., Milosavljevic A.
<span class="ref-title">Prototypic sequences for human repetitive DNA</span>. <span class="ref-journal">J. Mol. Evol.</span> 1992; <span class="ref-vol">35</span>:286&#x02013;291. [<a href="/pubmed/1404414" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=J.+Mol.+Evol.&amp;title=Prototypic+sequences+for+human+repetitive+DNA&amp;author=J.+Jurka&amp;author=J.+Walichiewicz&amp;author=A.+Milosavljevic&amp;volume=35&amp;publication_year=1992&amp;pages=286-291&amp;pmid=1404414&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B118">118. <span class="mixed-citation">
Jurka J.
<span class="ref-title">Repbase update: a database and an electronic journal of repetitive elements</span>. <span class="ref-journal">Trends Genet.</span> 2000; <span class="ref-vol">16</span>:418&#x02013;420. [<a href="/pubmed/10973072" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Trends+Genet.&amp;title=Repbase+update:+a+database+and+an+electronic+journal+of+repetitive+elements&amp;author=J.+Jurka&amp;volume=16&amp;publication_year=2000&amp;pages=418-420&amp;pmid=10973072&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B119">119. <span class="mixed-citation">
Kent W.J., Sugnet C.W., Furey T.S., Roskin K.M., Pringle T.H., Zahler A.M., Haussler D.
<span class="ref-title">The human genome browser at UCSC</span>. <span class="ref-journal">Genome Res.</span> 2002; <span class="ref-vol">12</span>:996&#x02013;1006. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC186604/">PMC free article</a>]</span> [<a href="/pubmed/12045153" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=The+human+genome+browser+at+UCSC&amp;author=W.J.+Kent&amp;author=C.W.+Sugnet&amp;author=T.S.+Furey&amp;author=K.M.+Roskin&amp;author=T.H.+Pringle&amp;volume=12&amp;publication_year=2002&amp;pages=996-1006&amp;pmid=12045153&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B120">120. <span class="mixed-citation">
Ruitberg C.M., Reeder D.J., Butler J.M.
<span class="ref-title">STRBase: a short tandem repeat DNA database for the human identity testing community</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2001; <span class="ref-vol">29</span>:320&#x02013;322. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC29767/">PMC free article</a>]</span> [<a href="/pubmed/11125125" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=STRBase:+a+short+tandem+repeat+DNA+database+for+the+human+identity+testing+community&amp;author=C.M.+Ruitberg&amp;author=D.J.+Reeder&amp;author=J.M.+Butler&amp;volume=29&amp;publication_year=2001&amp;pages=320-322&amp;pmid=11125125&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B121">121. <span class="mixed-citation">
Gelfand Y., Rodriguez A., Benson G.
<span class="ref-title">TRDB&#x02013;the tandem repeats database</span>. <span class="ref-journal">Nucleic Acids Res.</span> 2007; <span class="ref-vol">35</span>:D80&#x02013;D87. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC1781109/">PMC free article</a>]</span> [<a href="/pubmed/17175540" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=TRDB&#x02013;the+tandem+repeats+database&amp;author=Y.+Gelfand&amp;author=A.+Rodriguez&amp;author=G.+Benson&amp;volume=35&amp;publication_year=2007&amp;pages=D80-D87&amp;pmid=17175540&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B122">122. <span class="mixed-citation">
Hussing C., Bytyci R., Huber C., Morling N., B&#x000f8;rsting C.
<span class="ref-title">The Danish STR sequence database: duplicate typing of 363 Danes with the ForenSeq&#x02122; DNA Signature Prep Kit</span>. <span class="ref-journal">Int. J. Legal Med.</span> 2018; <span class="ref-vol">18</span>:100. [<a href="/pubmed/29797283" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Int.+J.+Legal+Med.&amp;title=The+Danish+STR+sequence+database:+duplicate+typing+of+363+Danes+with+the+ForenSeq&#x02122;+DNA+Signature+Prep+Kit&amp;author=C.+Hussing&amp;author=R.+Bytyci&amp;author=C.+Huber&amp;author=N.+Morling&amp;author=C.+B&#x000f8;rsting&amp;volume=18&amp;publication_year=2018&amp;pages=100&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B123">123. <span class="mixed-citation">
Adnan A., Zhan X., Kasim K., Rakha A., Xin X.J.
<span class="ref-title">Population data and phylogenetic structure of Han population from Jiangsu province of China on GlobalFiler STR loci</span>. <span class="ref-journal">Int. J. Legal Med.</span> 2018; <span class="ref-vol">132</span>:1301&#x02013;1304. [<a href="/pubmed/29500610" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Int.+J.+Legal+Med.&amp;title=Population+data+and+phylogenetic+structure+of+Han+population+from+Jiangsu+province+of+China+on+GlobalFiler+STR+loci&amp;author=A.+Adnan&amp;author=X.+Zhan&amp;author=K.+Kasim&amp;author=A.+Rakha&amp;author=X.J.+Xin&amp;volume=132&amp;publication_year=2018&amp;pages=1301-1304&amp;pmid=29500610&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B124">124. <span class="mixed-citation">
Ossowski A., Piatek J., Parafiniuk M., Pudlo A., Pepinski W., Skawronska M., Szeremeta M., Niemcunowicz-Janica A., Soltyszewski I.
<span class="ref-title">Genetic variation of 15 autosomal STRs in a population sample of Bedouins residing in the area of the Fourth Nile Cataract, Sudan</span>. <span class="ref-journal">Anthropol. Anz.</span> 2017; <span class="ref-vol">74</span>:263&#x02013;268. [<a href="/pubmed/28817155" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Anthropol.+Anz.&amp;title=Genetic+variation+of+15+autosomal+STRs+in+a+population+sample+of+Bedouins+residing+in+the+area+of+the+Fourth+Nile+Cataract,+Sudan&amp;author=A.+Ossowski&amp;author=J.+Piatek&amp;author=M.+Parafiniuk&amp;author=A.+Pudlo&amp;author=W.+Pepinski&amp;volume=74&amp;publication_year=2017&amp;pages=263-268&amp;pmid=28817155&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B125">125. <span class="mixed-citation">
Kim E.H., Lee H.Y., Kwon S.Y., Lee E.Y., Yang W.I., Shin K.-J.
<span class="ref-title">Sequence-based diversity of 23 autosomal STR loci in Koreans investigated using an in-house massively parallel sequencing panel</span>. <span class="ref-journal">Forensic Sci. Int. Genet.</span> 2017; <span class="ref-vol">30</span>:134&#x02013;140. [<a href="/pubmed/28728056" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Forensic+Sci.+Int.+Genet.&amp;title=Sequence-based+diversity+of+23+autosomal+STR+loci+in+Koreans+investigated+using+an+in-house+massively+parallel+sequencing+panel&amp;author=E.H.+Kim&amp;author=H.Y.+Lee&amp;author=S.Y.+Kwon&amp;author=E.Y.+Lee&amp;author=W.I.+Yang&amp;volume=30&amp;publication_year=2017&amp;pages=134-140&amp;pmid=28728056&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B126">126. <span class="mixed-citation">
Pamjav H., F&#x000f3;thi &#x000c1;., Feh&#x000e9;r T., F&#x000f3;thi E.
<span class="ref-title">A study of the Bodrogk&#x000f6;z population in north-eastern Hungary by Y chromosomal haplotypes and haplogroups</span>. <span class="ref-journal">Mol. Genet. Genomics</span>. 2017; <span class="ref-vol">292</span>:883&#x02013;894. [<a href="/pubmed/28409264" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Mol.+Genet.+Genomics&amp;title=A+study+of+the+Bodrogk&#x000f6;z+population+in+north-eastern+Hungary+by+Y+chromosomal+haplotypes+and+haplogroups&amp;author=H.+Pamjav&amp;author=&#x000c1;.+F&#x000f3;thi&amp;author=T.+Feh&#x000e9;r&amp;author=E.+F&#x000f3;thi&amp;volume=292&amp;publication_year=2017&amp;pages=883-894&amp;pmid=28409264&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B127">127. <span class="mixed-citation">
Wang X., Yang S., Chen Y., Zhang S., Zhao Q., Li M., Gao Y., Yang L., Bennetzen J.L.
<span class="ref-title">Comparative genome-wide characterization leading to simple sequence repeat marker development for Nicotiana</span>. <span class="ref-journal">BMC Genomics</span>. 2018; <span class="ref-vol">19</span>:500. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC6020451/">PMC free article</a>]</span> [<a href="/pubmed/29945549" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=BMC+Genomics&amp;title=Comparative+genome-wide+characterization+leading+to+simple+sequence+repeat+marker+development+for+Nicotiana&amp;author=X.+Wang&amp;author=S.+Yang&amp;author=Y.+Chen&amp;author=S.+Zhang&amp;author=Q.+Zhao&amp;volume=19&amp;publication_year=2018&amp;pages=500&amp;pmid=29945549&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B128">128. <span class="mixed-citation">
Franco M.E., Bitencourt T.A., Marins M., Fachin A.L.
<span class="ref-title">In silico characterization of tandem repeats in Trichophyton rubrum and related dermatophytes provides new insights into their role in pathogenesis</span>. <span class="ref-journal">Database (Oxford)</span>. 2017; <span class="ref-vol">2017</span>:1. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5502367/">PMC free article</a>]</span> [<a href="/pubmed/29220431" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Database+(Oxford)&amp;title=In+silico+characterization+of+tandem+repeats+in+Trichophyton+rubrum+and+related+dermatophytes+provides+new+insights+into+their+role+in+pathogenesis&amp;author=M.E.+Franco&amp;author=T.A.+Bitencourt&amp;author=M.+Marins&amp;author=A.L.+Fachin&amp;volume=2017&amp;publication_year=2017&amp;pages=1&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B129">129. <span class="mixed-citation">
Houston R., Birck M., LaRue B., Hughes-Stamm S., Gangitano D.
<span class="ref-title">Nuclear, chloroplast, and mitochondrial data of a US cannabis DNA database</span>. <span class="ref-journal">Int. J. Legal Med.</span> 2018; <span class="ref-vol">132</span>:713&#x02013;725. [<a href="/pubmed/29464313" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Int.+J.+Legal+Med.&amp;title=Nuclear,+chloroplast,+and+mitochondrial+data+of+a+US+cannabis+DNA+database&amp;author=R.+Houston&amp;author=M.+Birck&amp;author=B.+LaRue&amp;author=S.+Hughes-Stamm&amp;author=D.+Gangitano&amp;volume=132&amp;publication_year=2018&amp;pages=713-725&amp;pmid=29464313&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B130">130. <span class="mixed-citation">
Benson G.
<span class="ref-title">Tandem repeats finder: a program to analyze DNA sequences</span>. <span class="ref-journal">Nucleic Acids Res.</span> 1999; <span class="ref-vol">27</span>:573&#x02013;580. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC148217/">PMC free article</a>]</span> [<a href="/pubmed/9862982" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nucleic+Acids+Res.&amp;title=Tandem+repeats+finder:+a+program+to+analyze+DNA+sequences&amp;author=G.+Benson&amp;volume=27&amp;publication_year=1999&amp;pages=573-580&amp;pmid=9862982&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B131">131. <span class="mixed-citation">
Teeling E.C., Vernes S.C., D&#x000e1;valos L.M., Ray D.A., Gilbert M.T.P., Myers E. Bat1K Consortium .
<span class="ref-title">Bat biology, genomes, and the Bat1K Project: to generate chromosome-level genomes for all living bat species</span>. <span class="ref-journal">Annu. Rev. Anim. Biosci.</span> 2018; <span class="ref-vol">6</span>:23&#x02013;46. [<a href="/pubmed/29166127" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Annu.+Rev.+Anim.+Biosci.&amp;title=Bat+biology,+genomes,+and+the+Bat1K+Project:+to+generate+chromosome-level+genomes+for+all+living+bat+species&amp;author=E.C.+Teeling&amp;author=S.C.+Vernes&amp;author=L.M.+D&#x000e1;valos&amp;author=D.A.+Ray&amp;author=M.T.P.+Gilbert&amp;volume=6&amp;publication_year=2018&amp;pages=23-46&amp;pmid=29166127&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B132">132. <span class="mixed-citation">
Lewin H.A., Robinson G.E., Kress W.J., Baker W.J., Coddington J., Crandall K.A., Durbin R., Edwards S.V., Forest F., Gilbert M.T.P. et al. .
<span class="ref-title">Earth BioGenome Project: sequencing life for the future of life</span>. <span class="ref-journal">Proc. Natl Acad. Sci. U.S.A.</span> 2018; <span class="ref-vol">115</span>:4325&#x02013;4333. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5924910/">PMC free article</a>]</span> [<a href="/pubmed/29686065" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Proc.+Natl+Acad.+Sci.+U.S.A.&amp;title=Earth+BioGenome+Project:+sequencing+life+for+the+future+of+life&amp;author=H.A.+Lewin&amp;author=G.E.+Robinson&amp;author=W.J.+Kress&amp;author=W.J.+Baker&amp;author=J.+Coddington&amp;volume=115&amp;publication_year=2018&amp;pages=4325-4333&amp;pmid=29686065&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B133">133. <span class="mixed-citation">
Koren S., Phillippy A.M., Simpson J.T., Loman N.J., Loose M.
<span class="ref-title">Reply to &#x02018;Errors in long-read assemblies can critically affect protein prediction&#x02019;</span>. <span class="ref-journal">Nat. Biotechnol.</span> 2019; <span class="ref-vol">30</span>:1. [<a href="/pubmed/30670797" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Biotechnol.&amp;title=Reply+to+&#x02018;Errors+in+long-read+assemblies+can+critically+affect+protein+prediction&#x02019;&amp;author=S.+Koren&amp;author=A.M.+Phillippy&amp;author=J.T.+Simpson&amp;author=N.J.+Loman&amp;author=M.+Loose&amp;volume=30&amp;publication_year=2019&amp;pages=1&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B134">134. <span class="mixed-citation">
Watson M., Warr A.
<span class="ref-title">Errors in long-read assemblies can critically affect protein prediction</span>. <span class="ref-journal">Nat. Biotechnol.</span> 2019; <span class="ref-vol">37</span>:124. [<a href="/pubmed/30670796" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Biotechnol.&amp;title=Errors+in+long-read+assemblies+can+critically+affect+protein+prediction&amp;author=M.+Watson&amp;author=A.+Warr&amp;volume=37&amp;publication_year=2019&amp;pages=124&amp;pmid=30670796&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B135">135. <span class="mixed-citation">
Weissensteiner M.H., Pang A.W.C., Bunikis I., H&#x000f6;ijer I., Vinnere-Pettersson O., Suh A., Wolf J.B.W.
<span class="ref-title">Combination of short-read, long-read and optical mapping assemblies reveals large-scale tandem repeat arrays with population genetic implications</span>. <span class="ref-journal">Genome Res.</span> 2017; <span class="ref-vol">27</span>:116&#x02013;708. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC5411765/">PMC free article</a>]</span> [<a href="/pubmed/28360231" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Genome+Res.&amp;title=Combination+of+short-read,+long-read+and+optical+mapping+assemblies+reveals+large-scale+tandem+repeat+arrays+with+population+genetic+implications&amp;author=M.H.+Weissensteiner&amp;author=A.W.C.+Pang&amp;author=I.+Bunikis&amp;author=I.+H&#x000f6;ijer&amp;author=O.+Vinnere-Pettersson&amp;volume=27&amp;publication_year=2017&amp;pages=116-708&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div><div class="ref-cit-blk half_rhythm" id="B136">136. <span class="mixed-citation">
Wenger A.M., Peluso P., Rowell W.J., Chang P.-C., Hall R.J., Concepcion G.T., Ebler J., Fungtammasan A., Kolesnikov A., Olson N.D. et al. .
<span class="ref-title">Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome</span>. <span class="ref-journal">Nat. Biotechnol.</span> 2019; <span class="ref-vol">74</span>:1&#x02013;8. <span class="nowrap">[<a class="int-reflink" href="/pmc/articles/PMC6776680/">PMC free article</a>]</span> [<a href="/pubmed/31406327" target="pmc_ext" ref="reftype=pubmed&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Entrez%7CPubMed%7CRecord">PubMed</a>] <span class="nowrap">[<a href="https://scholar.google.com/scholar_lookup?journal=Nat.+Biotechnol.&amp;title=Accurate+circular+consensus+long-read+sequencing+improves+variant+detection+and+assembly+of+a+human+genome&amp;author=A.M.+Wenger&amp;author=P.+Peluso&amp;author=W.J.+Rowell&amp;author=P.-C.+Chang&amp;author=R.J.+Hall&amp;volume=74&amp;publication_year=2019&amp;pages=1-8&amp;" target="pmc_ext" ref="reftype=other&amp;article-id=6868369&amp;issue-id=346115&amp;journal-id=4&amp;FROM=Article%7CCitationRef&amp;TO=Content%20Provider%7CLink%7CGoogle%20Scholar">Google Scholar</a>]</span></span></div></div></div></div><!--post-content--><div class="courtesy-note whole_rhythm small"><hr />Articles from <span class="acknowledgment-journal-title">Nucleic Acids Research</span> are provided here courtesy of <strong>Oxford University Press</strong></div></div>
            
            
        
            
        </div>
        <!-- Book content -->
    </div>
    
    <div id="rightcolumn" class="four_col col last">
        <!-- Custom content above discovery portlets -->
        <div class="col6">
            
        </div>
        
        <div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><div class="format-menu"><h2>Formats:</h2><ul><li class="selected">Article</li> | <li><a href="/pmc/articles/PMC6868369/?report=reader">PubReader</a></li> | <li class="epub-link"><a href="/pmc/articles/PMC6868369/epub/">ePub (beta)</a></li> | <li><a href="/pmc/articles/PMC6868369/pdf/gkz841.pdf">PDF (880K)</a></li> | <li><a href="#" data-citationid="PMC6868369" class="citationexporter ctxp">Citation</a></li></ul></div></div><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="share-buttons"><h2>Share</h2><ul><li class="facebook"><a href="https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC6868369%2F"><img src="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/img/4047626" alt="Share on Facebook" />
                             Facebook
                        </a></li><li class="twitter"><a href="https://twitter.com/intent/tweet?url=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC6868369%2F&amp;text=Tandem%20repeats%20lead%20to%20sequence%20assembly%20errors%20and%20impose%20multi-level%20challenges%20for%20genome%20and%20protein%20databases"><img src="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/img/4047627" alt="Share on Twitter" />
                             Twitter
                        </a></li><li class="gplus"><a href="https://plus.google.com/share?url=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC6868369%2F"><img src="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/img/4047628" alt="Share on Google Plus" />
                             Google+
                        </a></li></ul></div>
        
        <div id="ajax-portlets" data-pmid="31584084" data-aiid="6868369" data-aid="6868369" data-iid="346115" data-domainid="4" data-domain="nar" data-accid="PMC6868369" data-md5="3574a1c920cee2c22b1a9a08a58f09d4"></div>
                
        <!-- Custom content below discovery portlets -->
        <div class="col7">
            
        </div>
    </div>
</div>

<!-- Custom content after all -->
<div class="col8">
    
</div>
<div class="col9">
    
</div>

<script src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
<script>
    (function($){
        $('.skiplink').each(function(i, item){
            var href = $($(item).attr('href'));
            href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
            $(item).on('click', function(event){
                event.preventDefault();
                $.scrollTo(href, 0, {
                    onAfter: function(){
                        href.focus();
                    }
                });
            });
        });
    })(jQuery);
</script>



<div id="body-link-poppers"></div>
                        </div>
                        <div class="bottom">
                            
                            <div id="NCBIFooter_dynamic">
    <a id="help-desk-link" class="help_desk" href="" target="_blank">Support Center</a>
    <a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/KBList.asp?Time=2019-12-10T09:23:54-05:00&amp;Snapshot=%2Fprojects%2FPMC%2FPMCViewer@4.46&amp;Host=portal101&amp;ncbi_phid=CE88F247DEFA80310000000001E70130&amp;ncbi_session=CE8914FDDEFAA781_1315SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC6868369%2F&amp;Db=pmc&amp;folderID=132&amp;Ncbi_App=pmc&amp;Page=literature&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
    
</div>

                            <div class="footer" id="footer">
    
    <div class="subfooter"> </div><script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/preloaderWidget.js"> </script>
    <div id="external-disclaimer" class="offscreen_noflow">
        External link. Please review our <a href="https://www.nlm.nih.gov/privacy.html">privacy policy</a>.
    </div>    
    <div id="ncbifooter" class="contact_info">      
        <div id="footer-contents-right">
            <div id="nlm_thumb_logo">
                <a href="https://www.nlm.nih.gov" title="NLM">NLM</a>
            </div>
            <div id="nih_thumb_logo">
                <a href="https://www.nih.gov" title="NIH">NIH</a>
            </div>
            <div id="hhs_thumb_logo">
                <a href="https://www.hhs.gov" title="DHHS">DHHS</a>
            </div>
            <div id="usagov_thumb_logo">
                <a href="https://www.usa.gov" title="USA.gov">USA.gov</a>
            </div>         
        </div>
        
        <div id="footer-contents-left">
            <p class="address vcard">
                <span class="url">
                    <a class="fn url newdomain" href="https://www.ncbi.nlm.nih.gov">National Center for
                        Biotechnology Information</a>,
                </span> <span class="org url newdomain"><a href="https://www.nlm.nih.gov/">U.S. National Library of Medicine</a></span>
                <span class="adr">
                    <span class="street-address">8600 Rockville Pike</span>, <span class="locality">Bethesda</span>
                    <span class="region">MD</span>, <span class="postal-code">20894</span>
                    <span class="country-name">USA</span>
                </span>
            </p>
            
            <a href="/home/about/policies.shtml">Policies and Guidelines</a> | <a href="/home/about/contact.shtml">Contact</a>
        </div>
    </div>
    <script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>    
    <script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
                        </div>
                    </div>
                    <!--/.page-->
                </div>
                <!--/.wrap-->
            </div><!-- /.twelve_col -->
        </div>
        <!-- /.grid -->

        <span class="PAFAppResources"></span>
        
        <!-- BESelector tab -->
        
        
        
        <noscript><img alt="statistics" src="/stat?jsdisabled=true&amp;ncbi_db=pmc&amp;ncbi_pdid=article&amp;ncbi_acc=&amp;ncbi_domain=nar&amp;ncbi_report=record&amp;ncbi_type=fulltext&amp;ncbi_objectid=&amp;ncbi_pcid=/articles/PMC6868369/&amp;ncbi_app=pmc" /></noscript>
        
        
        <!-- usually for JS scripts at page bottom -->
        <!--<component id="PageFixtures" label="styles"></component>-->
    

<!-- CE8914FDDEFAA781_1315SID /projects/PMC/PMCViewer@4.46 portal101 v4.1.r585844 Mon, May 06 2019 02:53:16 -->

<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4160049/js/3879255/4121861/3818874/4168176/3821238/4117325/4087685/4072593/4076480/3921943/4105668/4065628.js" snapshot="pmc"></script></body>
</html>