import warnings

from unittest import TestCase, main

import numpy

from numpy import array, dot, empty, ones
from numpy.testing import assert_allclose

from cogent3 import DNA, make_aligned_seqs, make_tree
from cogent3.evolve.ns_substitution_model import (
    DiscreteSubstitutionModel,
    General,
    GeneralStationary,
    NonReversibleCodon,
    NonReversibleDinucleotide,
    NonReversibleNucleotide,
    NonReversibleProtein,
    NonReversibleTrinucleotide,
    StrandSymmetric,
)
from cogent3.evolve.predicate import MotifChange
from cogent3.evolve.substitution_model import TimeReversibleNucleotide


warnings.filterwarnings("ignore", "Motif probs overspecified")
warnings.filterwarnings("ignore", "Model not reversible")


__author__ = "Peter Maxwell and  Gavin Huttley"
__copyright__ = "Copyright 2007-2020, The Cogent Project"
__credits__ = ["Gavin Huttley", "Ananias Iliadis"]
__license__ = "BSD-3"
__version__ = "2020.12.21a"
__maintainer__ = "Gavin Huttley"
__email__ = "gavin.huttley@anu.edu.au"
__status__ = "Production"


def _make_likelihood(model, tree, results, is_discrete=False):
    """creates the likelihood function"""
    # discrete model fails to make a likelihood function if tree has
    # lengths
    if is_discrete:
        kwargs = {}
    else:
        kwargs = dict(expm="pade")

    lf = model.make_likelihood_function(tree, optimise_motif_probs=True, **kwargs)

    if not is_discrete:
        for param in lf.get_param_names():
            if param in ("length", "mprobs"):
                continue
            lf.set_param_rule(param, is_independent=True, upper=5)

    lf.set_alignment(results["aln"])
    return lf


_aln = """>Human
ATGCGGCTCGCGGAGGAGCGGGCCGCGCTC------GCGGCGGAGAACGCGGATGGGGAACCCGGC---GCCGACCGACGACTGCGACTCCTGGGGACCTACGTGGCCATGAGCCTGCGGCCGGCTGCGGGCGCCTGGGAGCGTTGCGCGGGGAGTGCTGAGGCGGAGCAGCTGCTCCAGGCCTTCCTG---GGCCGCGATGCTGCCGAGGGGCCGCGGCCG------CTGCTGGTGGTGCGGCCCGGGCCCAGGGGCCTGGCAATACGCCCCGGGCTGGAGGTGGGACCTGAGTCGGGCCTGGCTGGCGCTAAGGCGCTTTTTTTCCTTCGCACCGGG---CCCGAGCCTCCAGGGCCCGACAGCTTCCGCGGCGCAGTGGTCTGCGGGGACCTGCCCGCGGCACCTCTGGAGCACCTAGCCGCGCTGTTCTCGGAGGTTGTTCTACCCGTCCTGGCCAATGAGAAGAATCGCCTAAACTGGCCCCACATGATATGTGAGGATGTCAGGCGGCACGCCCACAGCCTCCAATGTGACCTCTCAGTTATACTTGAGCAAGTGAAGGGAAAAACTTTGCTGCCTCTTCCAGCAGGCTCAGAAAAAATGGAGTTTGCGGATTCCAAAAGTGAGACAGTCTTGGATTCTATAGATAAGTCAGTCATCTATGCCATTGAGTCTGCAGTGATCAAATGGAGCTACCAAGTCCAGGTGGTACTCAAGAGAGAGTCTTCCCAGCCACTCTTACAAGGGGAGAATCCCACCCCTAAGGTGGAGTTGGAGTTCTGGAAGAGCAGGTATGAAGATCTGAAATACATCTATAATCAACTGAGAACAATAACGGTGAGGGGCATGGCCAAGCTCCTGGACAAGCTTCAGAGTAGCTACTTTCCAGCTTTCAAAGCCATGTACAGAGATGTTGTTGCAGCTCTAGCAGAGGCACAGGACATCCATGTGCACCTGATACCGCTCCAGCGCCACCTGGAAGCTCTGGAGAATGCAGAATTTCCGGAGGTGAAGCCCCAGCTGCGGCCCCTGCTCCACGTGGTCTGTCTGATTTGGGCCACATGCAAGTCCTACCGCTCCCCGGGAAGGCTGACTGTGCTGCTCCAGGAGATTTGCAACCTTCTCATCCAGCAGGCCTCTAATTATCTCAGCCCAGAAGACCTGCTGAGAAGTGAGGTAGAAGAAAGTCAGAGAAAACTGCAAGTGGTCTCAGACACTTTGAGCTTCTTCAAGCAAGAGTTTCAGGACAGAAGGGAGAATCTCCACACTTACTTCAAAGAGAACCAGGAAGTCAAGGAATGGGATTTCCAGTCTTCTTTGGTCTTTGTGCGATTGGATGGCTTCCTGGGACAACTGCACGTGGTGGAGGGTCTTCTGAAGACGGCCCTGGATTTCCACAAACTGGGAAAGGTGGAGTTCAGCGGCGTCAGAGGGAATGCTCTGAGTCAGCAGGTCCAGCAAATGCATGAAGAATTTCAAGAGATGTACAGGCTTCTCTCAGGATCCTCCTCCGACTGCCTGTACCTCCAAAGCACGGACTTTGAAAATGACGTCTCTGAATTTAACCAGAAAGTAGAAGATCTTGACCGAAGATTGGGGACTATCTTTATTCAAGCTTTTGATGATGCACCTGGCTTGGAGCATGCCTTTAAGCTGCTAGACATAGCAGGAAACCTCCTTGAAAGACCGCTGGTAGCGAGGGATACATCTGATAAATACCTGGTCCTCATCCAAATGTTCAACAAAGATCTGGATGCAGTGAGGATGATCTACAGTCAGCACGTCCAGGAGGAAGCAGAACTTGGGTTCTCCCCGGTGCACAAGAACATGCCCACCGTGGCTGGCGGCCTCCGCTGGGCACAGGAGCTGAGGCAGCGCATCCAGGGTCCTTTCAGCAACTTTGGACGCATCACACACCCTTGCATGGAATCTGCAGAAGGAAAGCGAATGCAACAAAAATATGAAGATATGCTGTCATTGCTAGAAAAGTATGAGACAAGACTTTATGAGGATTGGTGCCGGACAGTATCAGAGAAGTCACAGTACAATCTTTCCCAACCACTTCTAAAACGTGACCCAGAGACGAAGGAGATCACTATCAACTTTAACCCACAGCTGATTTCAGTGCTGAAAGAAATGAGCTATCTTGAACCCAGAGAGATGAAACACATGCCTGAGACAGCAGCAGCCATGTTCTCCTCCAGGGATTTCTATCGGCAGCTTGTGGCTAATTTAGAGTTGATGGCAAATTGGTACAACAAGGTTATGAAAACTCTGCTGGAGGTGGAATTTCCATTAGTGGAGGAAGAGCTGCAAAATATTGATCTCCGCCTCAGAGCAGCAGAGGAGACTTTGAACTGGAAAACAGAAGGCATTTGCGATTATGTCACTGAAATCACCAGTAGTATTCATGATCTTGAACAAAGAATTCAGAAAACTAAAGACAATGTGGAAGAGATCCAAAACATCATGAAAACATGGGTGACTCCAATATTTAAGACAAAAGATGGAAAAAGGGAATCCCTTCTTTCTCTGGATGATCGGCATGATCGAATGGAAAAATATTACAATCTCATCAAGGAATCTGGCCTTAAGATCCACGCCCTTGTTCAGGAAAACCTGGGTCTATTTTCAGCAGACCCAACCTCCAATATCTGGAAGACTTATGTTAACTCTATTGACAATTTGTTGCTGAATGGATTCTTTCTTGCCATTGAGTGCTCCCTCAAGTATCTTCTGGAAAATACTGAGTGTAAGGCAGGACTTACCCCAATATTTGAAGCACAACTGAGTCTAGCCATCCCAGAGCTAGTTTTCTATCCGTCTCTGGAGTCTGGAGTGAAGGGGGGTTTCTGTGACATTGTTGAGGGTCTCATCACCAGCATTTTTAGGATACCATCTCTGGTGCCACGGCTTTCCCCACAAAATGGCTCTCCTCACTATCAGGTCGACCTGGACGGTATACCAGATTTGGCAAACATGCGGCGCACACTCATGGAGAGAGTCCAGAGAATGATGGGCCTCTGCTGTGGCTATCAGAGCACCTTCAGCCAGTATTCGTACCTCTATGTGGAGGACCGGAAGGAGGTTCTGGGTCAGTTTCTGCTGTACGGGCACATCCTCACTCCGGAAGAAATTGAAGACCATGTGGAAGATGGCATCCCAGAGAACCCTCCCCTCCTTTCTCAGTTTAAAGTGCAAATCGACTCCTATGAAACGCTCTATGAAGAGGTGTGCAGGCTGGAACCCATCAAGGTGTTTGACGGCTGGATGAAAATTGATATTCGACCCTTTAAGGCATCTCTGCTGAATATTATTAAGAGGTGGAGCCTCCTGTTCAAACAGCATCTTGTGGACCACGTCACTCACAGCTTGGCCAACCTGGATGCGTTTATAAAGAAGAGTGAGAGCGGCTTACTCAAGAAAGTTGAAAAAGGAGATTTCCAAGGCTTGGTTGAGATCATGGGACACCTTATGGCTGTTAAAGAACGGCAGAGTAACACTGATGAGATGTTTGAGCCCTTAAAGCAGACTATTGAATTGCTGAAGACCTATGAACAAGAATTGCCAGAAACAGTGTTTAAGCAGCTGGAGGAGCTGCCTGAGAAATGGAACAACATAAAAAAGGTGGCCATTACTGTGAAGCAGCAGGTGGCCCCACTGCAGGCAAATGAAGTGACACTCCTCCGCCAGAGGTGCACAGCCTTCGATGCAGAACAGCAGCAATTCTGGGAGCAATTCCACAAAGAAGCCCCGTTCAGGTTTGATAGCATCCACCCTCATCAAATGCTGGATGCCAGGCACATCGAGATCCAGCAGATGGAATCCACTATGGCCTCCATTTCTGAGTCTGCCAGCTTATTTGAAGTCAATGTCCCTGACTATAAGCAGCTGAGGCAGTGCAGGAAGGAGGTCTGCCAGCTGAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCATGCCTGGGAGACCACACCCTGGAGGAATATCAACGTGGAAGCCATGGAGTTGGAGTGCAAACAGTTTGCCCGGCATATCCGAAACCTGGACAAGGAGGTCAGGGCCTGGGATGCATTCACAGGCCTGGAAAGCACTGTGTGGAACACGCTGAGCTCCCTGAGGGCAGTAGCTGAGCTGCAGAATCCAGCCATCCGGGAGCGGCACTGGAGGCAGCTGATGCAGGCCACCGGTGTGAGCTTCACTATGGACCAGGACACCACCCTAGCGCACCTGCTGCAGCTCCAGCTGCACCACTATGAGGATGAGGTCCGGGGCATTGTGGACAAAGCTGCAAAAGAGATGGGTATGGAGAAAACCTTAAAGGAGCTGCAGACTACCTGGGCTGGCATGGAATTCCAGTATGAGCCCCACCCACGGACCAATGTCCCCCTCCTGTGCTCTGATGAGGACCTCATAGAGGTTCTGGAGGATAATCAAGTTCAACTTCAGAACCTGGTGATGTCCAAGTATGTTGCTTTCTTCTTGGAGGAGGTGTCGGGCTGGCAGAAGAAGCTGTCCACAGTGGACGCTGTCATCTCTATCTGGTTTGAAGTGCAGCGAACATGGACTCACCTGGAAAGCATATTCACTGGATCTGAAGATATTCGGGCACAGCTACCCCAGGATTCTAAAAGGTTTGAAGGCATCGACATTGACTTTAAAGAGCTAGCTTATGATGCCCAGAAAATTCCAAATGTAGTGCAAACCACCAACAAGCCAGGCCTGTATGAAAAGCTGGAGGATATTCAGGGCAGATTGTGCCTGTGTGAGAAGGCCCTGGCAGAGTACCTCGACACCAAGAGGCTTGCCTTCCCGCGGTTTTACTTTCTCTCCTCCTCCGATCTGTTAGACATCCTTTCCAACGGCACAGCTCCACAACAGGTTCAACGTCACCTTTCCAAACTCTTTGACAACATGGCCAAGATGCGATTCCAGCTAGATGCCAGTGGGGAACCAACCAAGACAAGCCTCGGCATGTACAGCAAAGAAGAGGAGTATGTGGCTTTCAGTGAGCCCTGTGACTGCAGCGGGCAGGTAGAAATATGGCTGAACCATGTCCTTGGTCACATGAAGGCCACTGTGAGGCATGAGATGACAGAAGGTGTAACTGCCTATGAAGAAAAGCCGAGGGAGCAGTGGCTTTTTGACCACCCAGCTCAGGTGGCCCTGACCTGTACTCAGATCTGGTGGACAACAGAAGTGGGCATGGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTATTATAAGAAGCAAGTGGCCCAGCTCAAAACCCTTATCACCATGCTGATTGGCCAGCTCTCCAAGGGAGACCGGCAGAAGATTATGACTATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAGAAGGTAGACAATGCCCAGGCTTTCCTCTGGCTGTCTCAGCTGCGCCATCGTTGGGATGACGAGGTCAAACACTGCTTTGCCAACATCTGTGATGCCCAGTTTTTGTATTCCTATGAGTACCTGGGAAACACACCTCGCTTGGTGATCACACCTTTGACT------GACAGGTGCTACATCACCCTCACCCAGTCCCTGCACCTGACCATGAGTGGGGCTCCCGCAGGACCTGCAGGCACAGGCAAGACCGAGACCACCAAGGACCTGGGCCGCGCACTGGGCATCCTGGTCTATGTGTTCAACTGCTCGGAGCAGATGGATTACAAGTCTTGTGGCAACATCTACAAAGGCCTTGCTCAGACTGGTGCCTGGGGCTGCTTTGATGAGTTTAATCGAATCTCCGTGGAGGTCTTGTCAGTGGTGGCAGTGCAGGTAAAAAGCATTCAAGATGCGATTAGAGATAAGAAGCAGTGGTTCAGCTTCCTTGGGGAGGAGATCAGCCTGAATCCTTCTGTCGGTATCTTCATCACCATGAACCCAGGCTATGCTGGCCGCACAGAGCTGCCAGAGAATCTCAAGTCTCTCTTCAGGCCTTGTGCAATGGTGGTTCCAGACTTTGAATTGATCTGTGAAATCATGCTGGTGGCAGAAGGATTCATTGAAGCCCAGTCATTAGCCAGAAAGTTCATCACTCTTTACCAGTTGTGCAAAGAGCTTCTCTCCAAACAGGATCACTACGACTGGGGCCTACGGGCCATCAAGTCCGTGCTGGTGGTGGCAGGATCCCTGAAGAGAGGAGACCCTGACCGGCCTGAGGACCAGGTCCTGATGCGCTCCTTGCGGGATTTCAACATCCCCAAGATTGTGACTGATGACATGCCCATCTTCATGGGCCTGATCGGGGACCTCTTTCCCGCCCTGGATGTCCCCCGGAGGAGAGACCCCAACTTCGAAGCTTTGGTTAGGAAGGCGATAGTGGATCTGAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTCCTGGCTGTGCGGCACTCTGTA---TTTGTGGTGGGTGGCGCTGGT------ACCGGCAAGTCACAGGTGCTGAGGTCCTTGCACAAGACCTATCAG------ATCATGAAACGGCGCCCCGTCTGGACTGACCTCAATCCCAAAGCAGTCACAAATGATGAGCTCTTT---------------------------------------GGCATCATCAATCCAGCCACAGGAGAATGGAAGGATGGA---TTGTTCTCTTCCATCATGCGGGAGCTTGCCAACATCACCCATGATGGGCCCAAGTGGATTTTACTGGATGGCGACATAGATCCAATGTGGATTGAATCCCTGAATACTGTCATGGATGATAACAAGGTGCTGACATTGGCCAGCAATGAGAGGATTCCTCTGAACCCCACCATGAAGCTCCTCTTTGAGATCAGCCACCTGCGCACAGCCACTCCAGCAACTGTCTCTAGAGCAGGGATCTTGTACATCAACCCGGCAGACTTGGGATGGAACCCTCCAGTGAGCAGCTGGATTGAGAAGAGGGAAATCCAGACAGAGAGAGCCAACTTAACCATTTTGTTCGACAAGTATCTTCCAACCTGCCTAGACACACTCAGAACCAGGTTTAAGAAGATCATTCCCATCCCAGAGCAGAGCATGGTTCAGATGGTGTGTCACCTTCTGGAATGTCTCCTGACCACGGAGGACATCCCTGCAGACTGCCCTAAGGAAATTTATGAGCATTATTTTGTGTTTGCTGCCATCTGGGCTTTCGGCGGAGCAATGGTCCAAGATCAGCTTGTGGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTCAAAACAGTCAAGTTTCCTTCCCAAGGAACCATCTTTGACTATTACATCGACCCAGAGACCAAGAAATTCGAGCCTTGGTCCAAGCTCGTCCCCCAGTTCGAATTTGACCCCGAGATGCCCTTGCAGGCGTGTTTGGTGCACACGAGTGAGACCATCCGTGTGTGCTACTTCATGGAGCGGTTGATGGCGCGGCAGCGGCCTGTCATGCTGGTGGGCACGGCTGGCACTGGCAAGTCGGTGCTGGTGGGAGCTAAGCTGGCCAGCCTTGACCCCGAGGCATACCTGGTGAAAAACGTGCCATTCAACTACTACACCACGTCAGCAATGCTGCAGGCTGTCCTGGAGAAGCCTCTGGAAAAGAAGGCTGGCAGAAACTATGGCCCTCCAGGGAACAAGAAACTCATCTATTTCATTGATGACATGAACATGCCTGAGGTGGATGCCTACGGGACGGTGCAGCCCCACACCATCATCCGGCAGCATCTGGACTATGGCCACTGGTATGATCGGAGCAAGCTGTCCCTAAAGGAGATCACAAATGTACAGTATGTTTCCTGTATGAACCCCACGGCAGGCAGCTTCACCATCAACCCCCGGCTTCAGCGTCACTTCAGCGTGTTTGTCCTCTCCTTCCCGGGGGCAGATGCCCTGTCCTCTATCTACAGCATCATCCTCACTCAGCATCTGAAGCTCGGAAACTTCCCGGCGTCCCTGCAGAAATCCATCCCCCCACTGATCGATCTGGCCCTCGCCTTCCACCAGAAAATTGCTACCACCTTCCTACCCACAGGAATCAAATTCCACTACATCTTCAACCTCAGAGATTTTGCCAACATTTTCCAGGGCATTCTCTTCTCCTCAGTGGAATGTGTGAAATCCACATGGGATCTTATAAGGCTCTATCTGCATGAATCAAATCGAGTTTATCGGGATAAGATGGTAGAAGAAAAGGACTTTGATCTTTTTGATAAAATCCAGACAGAAGTGCTCAAGAAAACTTTTGATGATATTGAAGACCCTGTGGAGCAGACCCAAAGCCCGAACCTGTATTGTCACTTTGCAAATGGTATTGGGGAGCCCAAATACATGCCTGTACAGTCTTGGGAACTTTTGACCCAGACTCTGGTGGAGGCCTTGGAGAACCACAATGAAGTCAACACAGTGATGGACCTAGTTCTCTTTGAGGATGCCATGCGCCATGTCTGCCATATCAATCGCATCTTGGAGTCCCCGCGGGGAAATGCTCTGCTGGTTGGTGTAGGTGGGAGCGGCAAGCAGAGCCTGACAAGGCTGGCAGCTTTCATCAGCTCCATGGATGTCTTCCAGATCACACTGCGCAAAGGCTACCAGATCCAGGACTTCAAGATGGACCTGGCCAGCCTGTGTCTGAAAGCTGGAGTGAAGAATCTCAACACAGTGTTTCTCATGACTGATGCCCAAGTGGCTGATGAGAGGTTCCTTGTGCTCATCAATGATCTTTTGGCATCTGGGGAGATCCCAGATCTCTACTCTGATGATGAAGTTGAAAACATCATAAGCAATGTGAGGAATGAAGTCAAGAGCCAGGGTCTGGTTGACAACAGAGAGAACTGTTGGAAGTTCTTTATAGATCGGATCCGGCGACAGCTGAAGGTGACTCTCTGTTTCTCCCCTGTGGGAAACAAGCTAAGAGTCCGCAGCAGGAAGTTCCCAGCCATTGTGAACTGCACAGCCATCCACTGGTTCCACGAGTGGCCTCAGCAAGCATTGGAGTCTGTCAGCCTCCGCTTCTTGCAGAACACAGAGGGCATTGAGCCCACAGTAAAGCAGTCGATTAGCAAATTCATGGCCTTTGTCCACACAAGTGTCAACCAAACATCCCAGTCTTATCTGAGCAATGAACAGCGCTACAACTATACAACTCCCAAGTCCTTTCTGGAGTTCATCAGACTCTACCAGAGCTTGTTGCACAGGCACAGAAAAGAGCTCAAGTGCAAGACAGAGCGGTTGGAGAACGGGCTGCTGAAGCTGCATAGCACCTCTGCCCAGGTGGATGATCTGAAAGCAAAGCTGGCTGCCCAGGAAGTAGAGCTGAAGCAGAAAAATGAAGATGCAGACAAACTGATTCAGGTCGTGGGTGTGGAGACTGACAAAGTGAGCAGAGAGAAAGCCATGGCAGATGAAGAGGAGCAGAAGGTGGCCGTCATCATGCTAGAGGTGAAACAGAAGCAGAAGGACTGTGAGGAGGACCTGGCAAAGGCTGAGCCAGCACTCACAGCAGCGCAGGCAGCTCTCAACACCCTGAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGCTCTCCGCCTCTGGCCGTCAGCAATGTCAGCGCTGCGGTGATGGTACTGATGGCTCCCAGGGGTAGGGTGCCCAAGGACCGGAGCTGGAAGGCTGCTAAGGTCACCATGGCCAAAGTGGATGGCTTCCTGGACTCGCTAATAAACTTCAACAAAGAGAACATTCACGAGAACTGCCTCAAAGCCATCAGGCCGTATCTGCAAGACCCCGAGTTCAATCCTGAGTTTGTGGCCACCAAATCCTATGCGGCTGCAGGCCTCTGCTCCTGGGTCATCAATATTGTGAGATTTTATGAGGTGTTCTGTGATGTGGAACCCAAGCGCCAGGCACTGAACAAAGCCACCGCGGACCTCACAGCTGCCCAGGAGAAGCTGGCTGCCATCAAAGCCAAGATCGCTCACCTTAATGAAAACCTGGCAAAGCTCACAGCCAGGTTTGAGAAAGCAACAGCAGACAAACTCAAATGTCAGCAAGAAGCCGAAGTGACCGCAGTCACCATCTCCCTTGCCAACCGCCTGGTTGGAGGACTCGCTTCTGAAAACGTGAGGTGGGCAGATGCCGTGCAGAACTTCAAACAGCAGGAAAGGACGTTATGTGGAGACATTTTACTTATAACGGCTTTCATTTCCTACCTTGGCTTCTTCACAAAGAAATACCGGCAGAGCCTCCTGGACAGAACTTGGAGGCCCTACCTGAGCCAGCTGAAAACTCCCATTCCAGTCACCCCAGCCCTGGATCCCCTGAGGATGCTGATGGATGATGCTGACGTGGCTGCCTGGCAGAACGAGGGCCTCCCAGCCGACCGCATGTCCGTGGAGAATGCCACCATTCTCATCAACTGTGAGCGCTGGCCACTCATGGTTGACCCTCAGCTACAAGGCATCAAATGGATCAAGAATAAATATGGTGAAGATCTCCGGGTCACGCAGATTGGTCAGAAAGGCTACCTTCAAATCATAGAGCAGGCCCTGGAAGCTGGAGCTGTGGTGCTGATTGAAAATCTAGAGGAGTCCATTGATCCTGTTCTGGGACCCCTGCTTGGGAGAGAAGTCATTAAAAAAGGACGATTCATTAAAATTGGAGACAAAGAATGTGAATACAATCCCAAGTTCCGGCTCATCCTCCACACCAAGCTGGCTAATCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCCACCCTGATCAACTTCACCGTGACCAGGGATGGCCTGGAGGACCAGTTGCTGGCCGCTGTGGTCAGCATGGAGAGGCCAGACTTGGAGCAGCTGAAGTCCGATCTCACAAAGCAGCAGAATGGATTCAAAATTACCCTGAAAACGTTGGAAGACAGTCTTCTCTCTCGCCTCTCCTCCGCCTCTGGGAACTTCCTGGGAGAAACAGTGCTGGTGGAAAACCTAGAGATCACCAAGCAGACTGCTGCCGAAGTTGAGAAAAAGGTCCAGGAGGCCAAGGTGACTGAAGTGAAAATCAACGAGGCCCGAGAGCACTACCGGCCAGCAGCTGCCAGGGCCTCACTGCTCTACTTCATCATGAACGACCTCAGCAAGATCCATCCAATGTACCAGTTTTCTCTCAAGGCCTTCAGTATCGTCTTCCAGAAGGCTGTGGAG------AGGGCTGCTCCTGACGAAAGCCTCAGGGAGCGGGTGGCCAACCTAATAGACAGCATAACCTTCTCTGTGTACCAGTACACCATCCGCGGGCTCTTTGAGTGTGATAAGCTGACCTACCTTGCCCAGCTCACCTTTCAGATTCTCCTCATGAACCGAGAAGTCAATGCAGTGGAGTTGGATTTCCTGCTTCGATCTCCAGTGCAGACGGGCACCGCCAGCCCCGTGGAGTTCCTCTCCCATCAGGCGTGGGGAGCTGTCAAGGTACTTTCATCAATGGAAGAATTCTCTAATCTGGATCGGGACATAGAGGGATCTGCTAAGAGCTGGAAAAAGTTTGTGGAGTCCGAATGTCCTGAGAAAGAGAAGCTCCCACAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGCTGAGAGCCATGCGGCCCGACCGGATGACCTATGCTTTGCGAGATTTTGTTGAAGAGAAGTTAGGAAGCAAATACGTGGTGGGAAGAGCCCTAGATTTTGCAACCTCATTTGAAGAATCGGGACCAGCCACTCCTATGTTTTTCATCCTGTCTCCAGGGGTGGACCCACTGAAGGATGTAGAAAGTCAAGGAAGAAAACTTGGATACACCTTCAACAATCAGAACTTTCACAACGTGTCTTTGGGGCAAGGACAGGAAGTGGTGGCTGAGGCTGCGCTGGACCTCGCTGCCAAGAAAGGTCACTGGGTTATTTTGCAGAACATTCACCTGGTGGCCAAGTGGCTCAGCACCCTGGAGAAGAAGCTGGAGGAGCACAGTGAGAACAGCCACCCAGAGTTCAGGGTCTTCATGAGTGCAGAGCCAGCACCCTCCCCTGAGGGCCACATCATCCCCCAGGGCATCCTGGAGAACTCCATTAAGATCACCAATGAGCCCCCCACGGGCATGCATGCCAACCTGCACAAGGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCTCGGGAGACGGAGTTTAAGAGCATCCTCTTTGCTCTTTGTTACTTCCATGCGGTGGTGGCAGAAAGACGAAAATTTGGGCCCCAGGGATGGAATCGCTCATACCCCTTTAACACTGGAGACCTCACTATCTCTGTGAATGTCCTCTACAACTTCCTGGAGGCCAACGCAAAGGTCCCCTATGATGATTTGCGCTACCTGTTTGGAGAGATCATGTATGGAGGCCATATCACAGATGACTGGGACAGAAGACTCTGCAGAACCTACCTGGGGGAATTCATTCGACCAGAAATGTTAGAAGGAGAACTGTCTTTGGCCCCAGGGTTCCCACTCCCAGGCAACATGGACTACAATGGTTATCATCAGTACATCGATGCTGAGCTGCCCCCAGAATCCCCCTACCTCTATGGCCTCCACCCGAACGCAGAGATTGGCTTCCTGACCCAAACCTCAGAAAAGCTCTTCCGCACTGTGCTGGAGCTGCAGCCTCGGGACAGCCAGGCCAGAGACGGAGCGGGCGCCACAAGAGAAGAAAAGGTCAAGGCACTTCTGGAAGAAATATTGGAGCGGGTGACAGACGAGTTTAACATCCCAGAACTGATGGCCAAAGTGGAGGAGCGCACCCCTTACATTGTAGTTGCCTTCCAGGAGTGTGGCCGGATGAATATCCTCACCAGAGAGATTCAGCGCTCACTGAGGGAGCTGGAGCTCGGCTTAAAGGGGGAGCTGACTATGACCAGCCACATGGAGAACTTACAGAATGCCCTGTACTTCGATATGGTGCCAGAGTCCTGGGCTAGACGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCCAGACCTCCTCAACAGAATCAAGGAGCTAGAGGCTTGGACGGGTGACTTTACAATGCCCTCCACTGTGTGGCTGACAGGCTTCTTCAACCCCCAGTCGTTCCTGACTGCCATCATGCAGTCCACGGCTCGCAAGAATGAGTGGCCACTGGACCAGATGGCCCTGCAATGTGACATGACGAAGAAGAACAGAGAAGAGTTTAGGAGTCCTCCTCGGGAAGGGGCCTACATCCATGGCCTCTTCATGGAAGGTGCCTGCTGGGACACACAGGCTGGGATCATTACAGAGGCAAAGCTGAAGGATCTGACACCCCCTATGCCTGTGATGTTCATCAAGGCCATTCCTGCAGATAAGCAGGACTGCCGCAGTGTCTATTCCTGTCCTGTGTACAAGACTAGTCAGCGGGGACCCACCTACGTGTGGACTTTCAACCTGAAGACTAAGGAAAACCCATCCAAGTGGGTTCTGGCTGGAGTAGCCTTGCTTCTCCAGATT
>Mouse
ATGCCCGGCGCCAAGGAGCAGGCAGCGCTG---------GCGGAGTCTGGGGACGAGGAGCCTGGA------GACCCGAGGCTGCGGCTTCTGGGGACTTTTGTGGCTCGGAGCCTGCGTCCGGCCGCGGGCACCTGGGAGCGCTGTGCAGGCACAGCCGAGGCGGGGAGGCTGCTGCAGGCCTTCCTG---GATCACAACGCTGCCTCGGATCCGCGGCCA------CTGCTGGTGGTTCAGTCCGGGCCCGGGGGCCTGGTGGTGACACCCGGTCTAGACGCAGGACCAGAGCCCAGCCGAGCTCGCGCCAAGGGGCTCTTTTTCTTGCGCACTAAG---TCCGAGCCTCCGGGAAATCACAGCCTCCGCGGCACGGTGCTCTGCGGGGACCTACCCGCGGTGCCACTGGAGCACCTGGCCCCGCTGCTCTCAGAGGTCATTATTCCTGTCCTGGCAAATGAAAAGAACCATTTAGAATGGCCCCACATGGTATGTCAAGACATCAGACATCATGCCCACACCCTGAAGTCTGACCTCCTAGTGATCTTTGAGCACATGAAGGGGAGAACCTTGCTGCCTCTTCCAGTTGGCTCAGAAAAACTGGAGTTTGTGGATGGCCACAGTGAGCCAGTCTCAGATGCCATAGACAAGTCAACTCTCTATGCTGTGGAGTCTGCAGTGATCAAATGGAGCCACCAAGTCCAGGTGGTACTCAAGAGGGAGTCTTCTCAGGCACTCATACAAGGACAAAATCCCACCCCCAAGGTGGAGCTGGAGTTCTGGAAGAGCAGGTGTGAGGACCTGGAACACATTTATAATCAACTAATGACAATCAAGGTGAAGGGAATGGCTGAACTCCTGGACAAACTTCAGAGCAGCTACTTGCCAGCTTTCAAAGCCATGTTCAGAGACGTTGAAGCAGCCCTGACCGAGGCCCAGGACATCCATGTGCACCTGTTACCTCTCCAGCAACACCTGGACATCCTGGAAAACGTGGAGTTTCCCAAGGTGAAGGGCAGGCTGCGGCCTCTGCTCCATGTGGTCTGTCTGATTTGGGCCACCTGCAAATGGTACCGTTCCCCTGGGAGGCTCACAGTGCTGCTCCAAGAAATCTGCAACCTCCTCATCCAGCAGGCCTCTAATTACCTCAGCCCAGAAGACCTCCTGAGAAGTGAGGTGGAAGAGAGTCAGAAAAAACTGCAAGTGGTCTCAGATACCTTAAGCTTCTTCAAACAGGCATTCCAGGACAGAAGGGAGCACCTCCACACTTACTTCAAGGAGGATTCTGAAGTCAGGGTGTGGGATTTCCAAGCATCTCTGGTGTTTGTGCGACTGGATGGCTTTCTGGGCCGAGTGCACATGGTGGAGGATCTTCTGAAGACAGCCTTGGATCTCAACAATCTGGAAAAGCTTGAGTTCAGTGGCCTCAGAGGAAACTCCCTGAGTCAGAAAGTCCAACGCATGCATGAGGAATTTGAGGAGATGTACAAGGTCTTCTTGGACTGCTCCTATGACTGTTTGGACCCCAAGGGCACGGAATTTGAAAATGATGTCTGTGAGTTTAACAAAAGAGTGGAAGATCTTGACCGGAGACTGGGGACTATCTTAATTCAAGCTTTTGATGATGCACCTGATGTGGAACATGCCTTTAAGCTACTGGACATCACAGGAACCCTCATCAAAAGACCCCTGGTAGCACAGGATGTATCACAAAAATACCTGGCCCTCATCCGAATGTTCAGCACAGAACTGGATGCTGTGAGGGTCATCTACAGTCAGCACATCCAGAAGGAGGCAGAGCATGGATTCTCCCCCGTGCACAAGAACATGCCCACTATGGCTGGCGGCATCTGCTGGGCACAGGAACTGAGGCAGCGCGTCAAGGGTCCCTTTGGCAACTTCAAAAACATACCACATCTGTACTTGCAATCTGCTGAAGGAAAGCGAATGATACAAAAATACGAAGACTTGCTCTCCCTGCTAGAAGAGTATGAGAGAAGACTTTATGAGGACTGGTGTCAGACGGTATCTGAAAAGTCACAGTACAATCTTTCCCTACCTCTTTTGCATCGTGACCCCAACACAAAGCAGCTCTCTGTCAACTTTAACCCACAGCTGATTTCAGTGTTGAAAGAAATGAACTATCTTCAGCCCAGTGAGGTGAAAACCATCCCCGAGACCGCAGCAGCCATGTTCTCCTCCAGGGAATTCTATCGTCAGCTTGTGGCCAACTTGGAGTTGATGGCAAATTGGTACAACAAGGTTATAAAAATTCTGCTGGAGGTGGAATTTCCACTAGTGGAGGAAGAACTGCAAAATATTGATCTCCGCCTGAGAGCTGCAGAGGAGACTCTGAGCTGGAAAACAGAAGGCATTTGGGATTATGCTATGCAAATAACCAATAGCATTCATGACCTGGAACAAAGAATTCAGAAGACAAAAGACAATGTGGAAGAGATTCAAAACATCATGAAAACATGGGTGTCTCCAATATTCAAGAGAAAAGATGGGAAAAAAGAATGGCCCCTTTCTCTGGATGATCAGCAGGATCACATGGAAAAATACTACAGTCTCATCCAGGAATCTGGCCTTAAGATTCACGCTCTTGTTCAGGAAAACCTGGTTCTGTTTGCAGCAGACCCAGCATCCAGCATTTGGAAGTCTTACGTGAACTACATTGATTCCATGTTGTTGGATGGATTTTTTCTTGCCATTGAGTGTTCTCTCAAATATCTATTGGAAAACACTGAATGCAAGCCTGGACTCACCCCAGTATTTGAAGCACAGCTCAACCTTGTCACCCCAGAATTAGTTTTCCACCCCTCTCTGGACTCTGGGGTAAAGGGAGGCTTATATGACATTGTCCAGAGTCTTGTCACCAGAATTTTTGCTATGCCATCCCTCGTGCCACGGCTTTCCCCACACAGTGGCTCTCCTCACTATCAGGGTGACCTAGAGGACATGGCCGACTTAGCTGGTCTTCGGAGTGTGCTCATGGAGAGGGTACAGAATATGATGACCCTCTGCTGTGGCTATAGAAATACCCTCAGCCAGTATTCTTACCTGTATGTGGAGGATAGGAAGGAGATTCTTGGTCAGTTTCTGCTCTATGGGCATGTCCTCACACCTGAAGAGATAGAAGCCCATGCCGAAGACGGCATTCCAGAAAATCCACCCCTCCTCCATCACTTCAAAGACCAGATAGACTCCTATGAAAAGCTCTATGAGGAGGTGGTCAGCCTGGAACCCACCAAGGTGTTTGATGGCTGGATGCGAGTGGATGTGAGACCCTTCAAAGCATCTCTGCTGAACACAATAAAGAAGTGGAGCCTCATGTTCAAGCAACATCTTGTTGACTTTGTCACAAACAGCCTGTCTGACCTTGACTCATTCATAAGGAGCACCGAGAGTGGTTTGCTCAAGAGGGTGGAGAAAGGAGATTTCCAAGGATTGGTTGAGATCATGGGACATCTTGTCACCCTTAAAGAACGGCAGAGCAGCACCGATGACATGTTTGAGCCCCTGAAGCAAACGATTGAACTGCTGAAGTCCTACGAACAAGAGCTGCCAGAAACCGTGTTTAAGCAACTGGAGGAGCTTCCTGAGAAGTGGAAGAACATGAAGAAGATGGCCATCACTGTGAGGCAACAGGTGGCCCCTCTGCAGGCAAATGAAGTGGCCCTACTCCGCCAGCGGTGCTCAGCCTTCGATGATGAGCAGCAGCAATTCCAGGAGAGGTTCCGCAAAGAGGCCCCTTTCAGGTTTGATAGTATCAATCCACACCAAATGCTGGATGCCTGGCACGTGGAGATCCAGCACATGGAATCCACCATGGCAACCATCTCTAAGTCGGCTGATTTGTTTGAAGTCAATGTTCCTGACTACAAGCAGCTGAGGCAGTGCAGGAAGGAGGCCTGCCAGTTAAAGGAGCTCTGGGACACCATTGGAATGGTGACCTCCAGCATCCGTGCCTGGGAGGCCACCAGCTGGAGGAATATCAGTGTGGAAGCCATGGACTCAGAGTGCAAGCAGTTCGCCCGGCACATCCGCAACCTAGATAAGGAGTTCAGGTCCTGGGATGCATTCACGGGCCTGGAAAGCACAGTGTTGAACACCCTGACGTCCCTGAGGGCTGTGGCGGAGCTGCAGAATCCTGCCATCCGGGATCGGCACTGGAGGCAGCTGATGCAGGCCACCGGGGTGAATTTCACCATGAATCAGGATACCACCTTAGCTCATCTCCTGCAGCTTCAGCTCCACCACTTCGAGGATGAGGTCCGAGGCATTGTGGACAGAGCTGTCAAAGAGATGAGTATGGAGAAGACCTTAAAGGAATTGCAGACTACCTGGGCCAGCATGGAATTCCAGTACGAGTCCCACGCAAGAACCCGCGTACCCTTGCTGCAGTCAGATGAGGATCTCATTGAGGTCCTAGAGGACAATCAAGTGCAACTTCAGAACCTGATGATGTCCAAATATGTTGCTTTCTTCCTGGAAGAAGTGTCGAGCTGGCAGAAGAAGCTGTCCACGGCTGACTCGGTCATCTCTATCTGGTTTGAGGTGCAGCGCACCTGGTCTCACCTAGAGAGCATATTCATTGGCTCAGAAGATATCCGGGCTCAGCTACCCCAGGACTCTAAGAGATTTGAAGGCATTGACTCTGACTTCAGAGAGCTGGCGTATGATGCTCAGAAAACCCCAAATGTGGTGGAAGCCACAAATAAGTCAGGTCTCTATGAAAAGCTGGAGGATATACAAAGCAGATTATGCCTGTGTGAGAAAGCCCTAGCAGAGTATCTAGACACCAAGAGGCTCAGCTTCCCTCGCTTTTACTTCCTGTCCTCCTCTGACCTGCTGGACATCCTTTCCAATGGCACAGCTCCACAACAGGTTCAACGGCACCTCTCCAAGCTCTTTGACAACATGGCTAAGATGCAGTTCCAGTTAGATGCCAGTCAGAACCCAACCAAGACGAGCCTTGGCATGTACAGCAAAGAGGAGGAATATGTGGCCTTCAGTGAGGCCTGTGACTGCAGTGGGCAGGTTGAAATATGGCTGAACCGTGTTCTTCGTCACATGAAAGCCACTGTGAGGCATGAGATGACAGAGGGGGTCACTGCCTATGAGGAAAAGCCCAGGGATCAGTGGCTGTTTGATTACCCGGCTCAGGTGGCTCTGACCTGCACTCAGATCTGGTGGACGACAGAGGTGGGCATTGCATTTGCCAGGCTGGAGGAAGGCTATGAGAGTGCCATGAAGGACTACTATAAGAAGCAAGTGGCCCAACTCAAAACCCTTATCACCATGCTAATTGGGCCGCTCTCCAAGGGGGACAGGCAAAAGATCATGACCATATGCACCATCGATGTGCATGCCCGGGATGTGGTAGCCAAGATGATTGCTCAAAAGGTTGACAATGCCCAGGCTTTCCTCTGGCTGTCACAGCTACGACATCGTTGGGATGATGAGGCCAAGCACTGCTTTGCTAACATCTGTGACGCCCAGTTTCTATATTCCTATGAGTACTTGGGAAATACACCTCGCCTAGTGATCACGCCTCTGACT------GATAGGTGCTACATCACTCTCACCCAGTCTCTACACTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACAGGCAAGACAGAGACCACCAAGGACCTGGGCAGAGCACTCGGCATCATGGTCTATGTGTTTAACTGTTCTGAGCAGATGGACTACAAGTCCTGTGGCAACATCTACAAAGGCCTGGCTCAGACTGGTGCCTGGGGCTGTTTTGATGAGTTTAACCGAATCTCTGTGGAGGTCTTGTCGGTGGTGGCTGTCCAGGTAAAAAGCATCCAGGATGCAATCAGAGACAAGAAGCAGAGGTTCAGCTTCCTTGGAGAGGAGATTAGCCTTGACCCTTCAGTGGGCATCTTCATTACCATGAACCCAGGCTATGCTGGCCGCACAGAACTGCCAGAGAACCTCAAGGCCCTTTTCAGGCCCTGTGCAATGGTAGTTCCAGACTTTGAGCTGATCTCTGAGATTATGCTGGTAGCAGAAGGATTCATTGAAGCCCGGTTGTTGGCCAGGAAGTTCATTACCCTTTACCGGCTGTGTAAAGAACTTCTCTCCAAACAGGATCACTATGACTGGGGACTTCGTGCCATTAAGTCTGTCCTCGTGGTAGCAGGATCACTGAAACGGGGAGACCCTGACCGCCCAGAGGACCAAGTCCTGATGCGTTCTTTGAGAGACTTCAACATCCCAAAGATCGTGACAGATGACATGCCGGTGTTCATGGGTCTGATAGGTGACCTCTTTCCTGCTTTGGATGTCCCCAGGAAGAGAGATCTGGACTTTGAGGCTGTGGTTCGGAAAGCAATCGTGGACCTTAAGCTCCAGGCTGAGGACAACTTTGTGCTCAAGGTGGTCCAGCTGGAGGAGCTGCTAGCTGTAAGGCACTCTGTG---TTCGTGGTGGGCGGTGCTGGT------ACCGGGAAGTCACAGGTACTGAGGTCTTTACACAAGACCTATCAG------ATCATGAGACGTCGCCCTGTGTGGACTGACCTCAACCCCAAAGCTGTCACAAATGATGAACTCTTT---------------------------------------GGCATCATCAATCCAGCCACTCGAGAATGGAAGGATGGA---CTGTTCTCTTCCATCATGAGAGAGCTTGCCATCATCTCTCATGATGGGCCCAAGTGGATCTTACTGGATGGCGATATAGACCCGATGTGGATAGAGTCTCTGAACACAGTCATGGATGATAACAAGGTACTGACCCTGGCAAGCAACGAGAGAATCCCCCTTAACCCCACAATGCGTCTTCTTTTCGAGATCAGCCACCTGCGCACAGCCACACCAGCAACCGTCTCCAGAGCAGGGATCCTGTACATAAACCCTGCAGACCTGGGATGGAACCCTCCAGTAAGCAGCTGGATTGATCAGAGAGAAGTCCAGACTGAGAGAGCCAACTTGACCATCCTGTTTGACAAATATCTTCCTACCTGCTTGGACACCCTCAGAACCAGATTTAAGAAAATAATTCCAGTCCCAGAGCAGAGTATGATCCAGATGCTGTGCTACCTCCTTGAGTGCCTCCTGACAAAGGAGGATATCCCTGCAGACTGCCCCAAGGAAATATATGAACTCTATTTTGTGTTTGCTGCCATCTGGGCATTTGGCAGTGCTGTGATCCAAGATCAGCTTGTAGACTACCGGGCAGAGTTCAGCAAATGGTGGCTGACTGAGTTTAAAACAGTCAAGTTTCCTTCCCAAGGAACTGTCTTTGACTACTACATAGACCCAGAGACCAAGAAATTTGAGCCCTGGGCCAAGCTCATCCCCCAGTTTGAATTTGACCCAGAGATGCCTTTGCAGGCTTGTTTGGTACACACAAGTGAGACCATCCGGGTGTGCTACTTCATGGAGCGGCTCATGCAATGGAGGCGGCCGGTTATGCTGGTTGGCCCTGCAGGCTCAGGCAAGTCTGTGCTGGTGGGAGCAAAGCTGTCCAGCCTTAACCCTGAGGAATACATGGTGAAAAATGTGCCCTTCAACTACTATACTACGTCAGCAATGCTGCAAGCTGTCTTGGAGAAACCTCTAGAAAAGAAAGCTGGCAGGAATTATGGCCCTCCAGGCAACAGGAAACTCATCTATTTCATCGATGACATGAATATGCCCGAGGTGGATGCCTATGGCACAGTACAGCCCCACACTGTCATCAGGCAGCACCTAGACTATGGCCACTGGTATGATCGGAACAAGCTGTCTCTGAAGGAGATCATGAATGTACAATACATCTCCTGTATGAACCCCACTGCAGGCAGCTTTACCATCAACCCAAGGCTTCAGCGCCACTTCAGCGTGTTTGCCCTCTGCTTCCCAGGAGCTGATGCCCTCTCTTCCATCTATAGCACCATCTTGACCCATCATCTGAAGTTTGGAAACTTTCCCACCACCCTGCAGAAATCCATCCCTCCTCTGATAAACCTGGCTGTCACCTTCCATCAGAAAATTGCCACCACGTTTCTGCCCACAGCAATCAAATTTCACTACATCTTCAATCTCAGAGATTTTGCCAATATTTTCCAAGGCATTCTTTTCTCCTCCGTGGAATGTGTAAAGTCCACACAGGACCTAGTGAAACTCTATCTGCACGAGTCAAGTCGGGTTTATCGGGATAAGATGGTGGAAGAAAAGGATTTCAATCTTTTTGACAAAATCCAAACAGAATTCCTCAAGAAAAATTTTGATGATAGTGAAGAGGTGCTGAAGCAGACCCAGAACCTGAACATGTATTGTCACTTTGCAAATGGCATTGGTGAGCCCAAGTACATGCCTGTGCAATCATGGGACCTTCTGAATCAGACTCTGGTGGAAGCCCTGGAGAGCCACAATGAAGTGAATGCTGTGATGGACCTAGTTCTCTTTGAGGATGCCATACGTCACATCTGTCACATCAACCGAATCCTGGAGTCCCCTCGAGGAAATGCCCTGCTAGTTGGTGTAGGTGGGAGTGGTAAGCAGAGTCTGACAAAGCTGGCAGCTTTTATCAGTTCCATGGATGTATTCCAGATCACCCTTCGCAAAGGCTACCAAATCCCTGACTTCAAGGTGGACCTGGCCAGCCTCTGTCTGAAAGCTGGGGTAAAAAATCTCAGTACAGTGTTCCTTATGACTGATGCCCATGTGGCTGACGAGAGGTTCCTGGTGCTCATCAATGACCTCCTGGCATCTGGTGAGATCCCAGATCTCTACTCTGATGAGGAAGAGGAGAACATCATAAACAATGTGAGAAATGAGGTCAAAAGCCAGGGACTCATGGACAGCAGGGAGAACTGCTGGAAATTCTTCATAGAGAGAGTCCAGCGACAACTTAAGGTGACTCTCTGTTTCTCCCCTGTGGGGAACAAGCTGAGAATTCGAAGCAGGAAGTTCCCAGCCATTGTGAACTGTACTGCTATCAACTGGTTCCATGAGTGGCCTCAGGAGGCCCTAGAGTCTGTGAGCCTCCGATTCTTGCAGAATACAAAGAACATTGAGCCTGCGGTGAAGCAGTCAATTAGCAAGTTCATGGCCTTTGTCCACATAAGTGTCAACAAGACATCCCAGTCATACCTGACCAATGAGCAGCGATACAACTACACAACACCCAAGTCCTTTCTGGAGTTCATCAGACTGTACCAGAGCTTGCTGGAGAGAAATGGAAAAGAGCTCCAGGCCAAGGTGGAGAGGCTGGAGAACGGGCTGTTGAAACTGCACAGCACCTCGGCCCAGGTGGATGATCTGAAAGCGAAGCTTGCCACCCAGGAAGTGGAGCTGAGGCACAAGAATGAAGATACAGACAAGCTGATTCAGGTGGTGGGTGTGGAGACCAGCAAAGTGAGCAGAGAGAAAGCCATTGCTGACGAGGAGGAGCAGAAGGTGGCCCTGATCATGCTGGAGGTGCAGCAGAAACAGAAAGACTGTGAAGAGGACCTGGCTAAGGCCGAGCCAGCCCTGACCGCAGCGCAGGCGGCCCTCAACACTCTCAACAAGACCAACCTGACAGAGCTGAAGTCATTTGGTTCCCCACCTCTGGCTGTCAGCAATGTCAGCGCCGCGGTGATGGTTCTCATGGCCCCAGGGGGCAAGGTGCCCAAGGACCGCAGCTGGAAGGCTGCCAAAATCACCATGGCCAAGGTGGACAGCTTCCTGGATTCCCTAATCCACTTCGACAAGGAGAACATTCATGAGAATTGCCTCAAAGCCATCAGGCCATACCTGCAAGATCCTGCATTCAACCCAGAGTTTGTGGCCACCAAGTCCTATGCAGCTGCAGGCCTCTGCTCTTGGGTAATCAATATTGTGAGGTTCTATGAGGTCTTCTGTGATGTGGAACCAAAGCGCCAGGCTTTGAACAAAGCCACCTCAGACCTCACAACTGCCCAAGAGAAGCTGGCAGCCATCAAAGCCAAGATCACACACCTTAATGAAAACCTGGCGAAGCTCACCACCAAGTTTGAGAAAGCAACAGCAGAGAAGCTCAAGTGTCAGCAAGAAGCTGAACTGACCGCAGGCACCATTTCGCTTGCAAACCGTCTGGTTGGAGGCCTTGCATCTGAGAACATAAGGTGGGCAGAGGCTGTGCAGAACTTCAGACAGCAGGAAAGGACGTTATGTGGCGACATTCTGCTTACTACAGCTTTCATCTCCTACTTGGGCTTCTTTACCAAAAAGTACCGAAAGAGCCTCATGGATGGGACCTGGAGACCCTATCTGAGCCAACTGAAAGTTCCCATTCCAACCACCCCAACTCTGGACCCCCTGAGGATGCTAACCGATGATGCTGAAGTGGCTGCCTGGCAGAATGAGGGTCTCCCTGCTGACCGCATGTCCATGGAGAATGCTACCATCCTCATCAACTGTGAGCGCTGGCCTCTCATGGTCGACCCTCAACTGCAAGGCATTAAATGGATCAAGAACAAATATGGAGAAGAACTCCGGGTCACCCAGATTGGCCAAAAGGGCTGCCTTCAAACCATAGAGCGAGCCCTGGAAGCTGGAGATGTGGTACTGATTGAGAACCTTGAGGAGTCCATTGATCCCGTCCTGGGACCTCTGCTTGGGAGAGAAGTCATTAAGAAAGGACGGTTTATCAAGATTGGAGACAAGGAGTGTGAATTCAATCCCAAGTTCCGGCTCATCCTTCATACCAAGCTGGCCAACCCTCACTACCAGCCTGAGCTGCAGGCTCAGGCTACCCTGATCAACTTCACGGTGACCAGGGATGGCCTGGAGGACCAGCTGCTGGCTGCTGTGGTCAGCATGGAGAGACCAGACCTGGAACAGCTGAAGTCCGATCTCACAAAGCAGCAGAACGGGTTCAAAATCACCCTCAAAACCTTAGAGGACAACCTGCTATCTCGCCTCTCTTCAGCCTCGGGGAACTTCCTGGGAGAAACAGCCTTGGTGGAGAACCTGGAGGTCACCAAGCAGACTGCTGCAGATGTGGAGGAAAAGGTCCAAGAAGCCAAATTGACAGAAGTAAAAATTAATGAGGCCCGAGAGCACTATAGGCCAGCAGCTGCCCGGGCATCTCTGCTCTACTTCATCATGAATGACCTCAGCAAGATCCATCCAATGTATCAGTTCTCCCTCAAGGCCTTCAGCATTGTCTTCCAGAAAGCTGTGGAG------AAGGCAGCTCCCAGTGAAAGTGTCACAGAGCGAGTGACTAATCTAATAGACAGCATAACTTTCTCAGTGTACCAGTATACCACACGTGGCCTCTTTGAGTGTGATAAGCTGACCTACCTAGCCCAGCTCACCTTTCAGATTCTCCTCGTGAACCAGGAAGTTAATGCAGCAGAGTTGGATTTCTTGCTTAGGGCTCCAGTACAGACAGGGACTCCCAGCCCAATGGAGTTCCTGTCCCACCAGGCCTGGGGAGGCATCAAGGCACTCTCATCAATGGAGGAATTCTGCAATCTGGACCGAGACATTGAAGGCTCTGCCAAGAGCTGGAAAAAGTTTGTGGAGTCAGAGTGTCCCGAGAAGGAGAAGTTTCCCCAGGAGTGGAAGAACAAGACAGCCCTGCAGCGCCTCTGCATGATGAGAGCCATGAGGCCTGACCGGATGACCTATGCCATGCGAGATTTTGTTGAGGAGAAGTTGGGAAGCAAATACGTGATGGGAAGAGCACTCGATTTTGTAACCTCATTTGAAGAGTCAGGACCAGCCACTCCCATGTTTTTCATCCTGTCTCCAGGGGTGGATCCACTGAAGGATGTGGAAAATCAAGGAAAGAAACTTGGATATACATTCAACAATCGGAACTTCCACAACGTGTCCCTAGGGCAAGGACAAGAGGTAGTTGCTGAGGCTGCACTGGACTTGGCTGCTAAGAAGGGTCACTGGGTGATTCTGCAGAACATCCACCTGGTGGCCAAGTGGCTCAGTACCCTGGAGAAGAAACTGGAGGAGCTCAGCGAGGAAAGTCACCCAGACTTCAGGGTCTTCATCAGCGCAGAGCCTGCACCCTCCCCTGAGGGCCACATCATTCCCCAGGGCATTCTGGAAAACTCCATTAAGATCACCAATGAGCCTCCCACAGGCATGCACGCCAACTTACACAAAGCCCTGGACAACTTCACTCAGGACACTCTGGAGATGTGTTCCCGGGAGACAGAGTTCAAGACCATCCTCTTTGCTTTGTGCTACTTTCATGCGGTGGTTGCCGAGAGACGGAAGTTTGGGCCACAGGGCTGGAATCGGTCCTATCCATTTAACACTGGGGACCTCACCATCTCTGTGAATGTGCTATATAATTTCCTGGAGGCTAACACAAAGGTACCCTATGACGACTTGCGTTACCTGTTCGGTGAGATCATGTACGGTGGCCATATCACAGATGACTGGGACAGGAGACTCTGCAGAACCTATTTAGAGGAATTCATTCGGCCAGAGATGCTAGAAGGAGAGCTCTCCCTGGCCCCGGGGTTCCCACTCCCAGGAAACATGGACTACAGTGGCTATCACCAGTATATTGATGCTGAGCTGCCCCCTGAGTCTCCCTACCTATATGGCCTCCATCCAAACGCCGAGATTGGCTTCCTGACCCAGACATCGGAAAAACTCTTCCGAACCGTGCTGGAGATGCAGCCTCGGGACAGCCAGGCTGGAGATGGAGCTGGCATCACAAGGGAAGAAAAGGTCAAAACCTTTCTGGAAGAAATACTGGATCGGATGACAGATGAATTTAACATCGCGGAGCTAATGGCTAAGGTGGAGGAACGCACCCCCTACATTGTAGTTGCCCTCCAGGAGTGTGAACGCATGAACATCCTTACCAGAGAGATCCAGCGCTCACTAAGAGAGCTGCATCTTGGCTTACAGGGGGAGCTGACCATGACCAGTGAGATGGAAAACCTACAGAATGCCCTATATCTAGATGTGGTCCCAGAGTCCTGGGCCAGGCGAGCCTACCCTTCCACAGCAGGCCTGGCAGCCTGGTTTCTAGACCTGCTTAACAGAATCAAAGAGCTGGAGTCCTGGACAGGCGACTTCTTGATGCCCTCAACTGTGTGGCTGACGGGCTTCTTCAACCCCCAGTCCTTCCTGACTGCCATCATGCAGTCCATGGCCCGCAAGAATGAATGGCCACTAGACCAGATGGCCCTGCAGTGTGATGTGACAAAGAAGAACAGAGAGGAGTTCCGGAGCCCTCCTCGGGAAGGGGCCTACATCTATGGGCTCTTCATGGAAGGTGCCTGCTGGGACACACAGACTGGGATCATTGCAGAGGCGAAACTGAAGGACCTGACACCCCCCATGCCTGTGATGTTCCTCAAGGCCATTCCAGCAGATAAGCAAGATTGTCGAAGTGTCTATGCTTGTCCTGTGTACAAGACTTGTCAGCGGGGACCCACCTACGTGTGGACTTTCAATCTGAAGACTAAAGAAAACCCATCCAAGTGGGTTCTGGCTGGTGTTGCCTTGCTTCTCCAGATT
>Opossum
ATGCCAGTGAAAGTG------GCGGCGGTGTTCTCCGAGGCTGAGGATGGAGATAGCGAGAAAGGG---CCAGATCCTAGAATACGGCTCCTGGGCACCTACGTGATTCGGTGCTTGCGGCCAGCGGCTGGCGCCTGGAAGCGCTGTGTGAGCACGGCCGAATCTGAGCAGCTGCTCCAGGCCTTCCTGACAGGCTACTGCCCTCAAGAG---------CCG------ATGTTGGTGGTGCGTCCTGGGGGTGGGGGTCTGCAGCTGAGCTCCGGG------------CCATACCCCGATTTGTCTCGAGCCAAAGCGCTGTTCTTCCTGCGC---GGTTCCCCCAATCCCCCAGGACCCTTGGGTCCCAAAGGCGCAGTGCTCTGCGGGGAGCTGGCCCCGGCACCGTTGGAGCATCTGGCCACGCTGGTGGCTGAGGTAGTGATGCCTGTCCTGACCAATGAAAAGAATCACCAAGACTGGCCAAATGCTTTATATCAGGATGTAAGACGACACATCCACCACCTTCAAAGTGACCTCATTATCTTCCTGGGTCAAGTGACAGGAAAAACATTGCTGCCTCTTCCAGTAGGCTCAGAAAACATGGAATGTGCGGATTATGAAAATGAAAAAGACTTGGATTTCACAGATAAATCAATTGTCTATGCTATTGAGTCCACAGTGATCAAATGGAGCCACCAGATTCAAATAGTATTAAAGAGGGAATCTTCAGAACCACTCTTGCAGGGAGGAAATCCCACCCCAAAAGTGGAGCTGGAATTCTGGAAGAGAAGGTGTGAAGATCTGGAATATATTTATAATCAGCTGAGAGCCATAGAAGTGAGGGGCATAGCTGCACTTTTAAACAGACTTCAGAGCAGCTACTTCCCAGCCTTCAAAACCATGTTCAAAGATGTAATAGCAGCTTTAAGAGAGGCCCAGGATATCTATGTACATCTGAAGCCCCTGCAACGCCATGTGGAAAACATAGAGAATGTGGAATTTAGTGAAGTGAAGCCACTGGTGGGACCCCTGCTTCACGTAATCTTCTTGATTTGGTCCACATCTAAATACTACTGCTGTCCAGTACGGATCATTGTGCTACTGCAGGAAATATATAACCTTCTCATTCAGCAGGCCTGTACATATCTTAGCCCAGAAGATCTACTGAAAGGGGAGATAGAAGAGAGTCAGAGGAAAATACAGGTGGTGATGGACATTCTTAACTTTTCCAAAGAGATGTTTGAGAATAGGAGGAACAATCTCAAGACTTACTTCAAACAGGTCCGGGAGGTGAAGGAATGGGATTTCAATTCTTCTATGATCTTTGTGCGACTAAATAACTTCCTCAAAAGACTCAAGATGGTAGAGAATCTTCTGAAGACAACCTTGGATTTTCTCAAGCTAGAGAAACTTGAATTCAGTGGAATTAGAGGGAATGCTTTGAGTCAACAAGTCCAGGGCATGTATGAAGAATTCCAGAATGTGTACAAAGCCTTTTCAGAATGCTTCTATGACTGTCTGGACCCTAAACACACAGAATTCGAAAATTATGTTGCTGAATTCAACCACAAAGTAGAAGATTTGGACCGAAGATTAGGGATGGTCTTCCTCCAAGCTTTTAATGATGTATCTGGCTTAGAGCATGCTTTTAAGTTGCTTGAAATATTTGGGAGTCTGCTTGAAAGACCAGTAGTAGCCATGGATGTATTTGATAAATACCCAAGACTGATCACAATGTTCAGCAGTGACTTGGATGCTGTTAAGACAATCTATTGTCAGCATGTCCAGGAGGAGTCAGAACTTGGGTTTTCCCAATTACACAAGAACATGCCGGCAGTAGCTGGGGGTCTCCGCTGGGCTCAGGAACTCAGAGAACGAATTGAAATTTCATTCAATAACCTAAGACATATCAATCACCCCTATATGGAGTCCACTGAAAGTAAAGAGATGTTCCAAAAGTATGGTGAAATGTTTACATTGTTAGAAAAGTATGAGACAAAACTTTATGATAGCTGGTGCCAGACAGTGTCAGAGAAGTCACAATACAATCTCACTCAGCCACTCCTCTGTCATGATCCAGAAACTAAGCAGATCATTGTCAACTTTAACCCACAG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAAAAC---ATGAAA------------------------------------------------------------------------------------------------------------------------------------------------------CTCTTCCTTGCAGATCCAGCATCCAGTATGTGGAAGGCATACATTGACTACCTTGATAACATGGTTCTTCATGGATTCTTCATTGCTATTGAATCTTCTCTAAATTATCTCCTGGAAAATACTGATTCTAAGAACAGACTTCCTCCCTTTTTTGAAACACAATTGGATCTAGTTATACCAGAACTGATCTTTTGTCCCTCTCTGGACCCTTTTGCTAATGGTGGTTTTCAAAGCATTGTGGAGGGACTTATCAATGACATTTTCAAAATATCTTCTATGGTCCCACGACTT---GCACAGAAAAGTTCCCCTCATTATCAGGCTGACATGGAGGACACAGAAGATCTGTCAAACCTGAGGAATATATTAATGGAGAGAGTCAAGAATATGATGACCATCTGCTGTGACTATCGGAACTCCTTTGACCATTACTCTTTCCTTTACATGGATGACCGAAAGGAATTTATGCGACAATTTCTTTTATATGGACATATGCTCACACCAGAGGATTTAGAAGCCCATGGAGAAGATGAGATCCCTGAAAATCCTCCCACTCTTCAGCAATTTAAATTGCAGATTGATTCCTATGAAAAGATTTATGAAGAGGTGAACCAGCTAGAGCCTTTCAGAATTTTTGACAACTGGATGAAAATTGATATTCGTCCATTTAAGATGACTCTACTAAATGTAATTAAGAGATGGAGCTTCATGCTTAAACAGTATCTCATTGATCATGTCACTCACAGTTTGGCAGACCTTGAAGTCTTTATAAGAAATGGTGAGAGTGGTTTAAACAAGAAGATAGAAAAAGGTGATTTTGCTGGTTTGGTTGAAATTATGAGAACTCTAATGGCTCTTAAAGAACGACAGAGCAGCACTGATGAGATGTTTGAGCCACTGAAACACACAATTGATTTACTGAAGATCTATGAACAAGAATTACCTGATTCAGTATTTAAACAGCTAGAGGAATTGCCAGAAAAATGGAGCAACATAAAGAAGATGGCAGTCATTGTAAAACAGCATGTAGCTCCCCTGCAGGAAGATGAAGTAACAGCTCTCCGTCAAAAATGTGCCATTTTTGCTATTGAACAGAATATATTCCAAGATCAGTTCCACAAAGAAGCTCCTTTCAGGTATGATAGCATTAAGCCTCACCAAGTCTTGGATGCCAAAAACATTCAAATCCAACAGATGGAATCTTTCATGTCCTCCATTTCAGACTCTACAAATTTGTTTGAAGTCAACATTCCTGATTACAAACATTTAAAACAATGCAGGAAGGAGATCTGCTTGTTGAAGGAGCTTTGGGATATGATTGACCTAGTGAATTTTAGCATCAATAACTGGAAGGTAACCAAATGGAGGAATATTAATGTAGAGAATATGGACTTAGAATGTAAAAGTTTTGCCAAACAAATATGGAAACTTGGCAAGGATGTGAGAGCCTGGGATGCTTTCATGGGGCTGGACAACACTCTGAAGAACATCCTAACATCCTTACGGGCTGTGTCTGAACTTCAAAATCCAGCCATAAGAGAGAGGCACTGGAACCAGCTGATGCAGGCTACAGGTATGAAATTCATCATGAATGATGATACCACCCTTGAAGACTTACTTAAGCTTGAGCTACACAACTTTGAAGAGGCGATCCAGGGCATTGTAGACAAAGCTGTGAAAGAGATGGACATGGAAAAAGTTCTAAAGGAACTAAAAGCCACTTGGGCAGGGATGGAATTTCATTATGAGCCCCACCCCCGGACAATGGTCCCACTGATGAGGTCTGATGAAGACCTTATTGAAACTCTTGAAGATAACCAAGTCCAGCTGCAGAATCTAATGACATCCAAGTACATAGCCTTCTTCCTGGAGGAAGTATCTGACTGGCAGAAGAAGCTCTCAATAGCTGATGCTGTCATTTCAATCTGGTTTGAAGTACAGCGTACATGGTCTCATCTTGAAAGCATTTTCATTGGATCTGAAGATATACGAGCACAGCTTCCTAAGGACTCTAAACATTTTGAAGACATTGATACTGATTTTAAAGAGTTAGTTTGTGATGCTCAGAAGACACCAAATGTGGTAGAAGCTACCAACAAACCAGGTGTTCATGAACAATTAGAAGATATTCAGAACAGGTTGAGCCAGTGTGAGAAAGCCTTGACTGAATATCTAGATACCAAGAGACTGGTCTTCCCCAGGTTTTATTTCCTCTCCTCTTCCGACCTTTTAGACATCCTTTCCAACGGCACAAATCCACAACAAGTTCAACGCCATCTTTCTAAGTTATTTGACAGTATGGCTAGGATGAAGTTCCAGGTGGACTCCAGTCAAAAACCAACCAAGAAAAGCCTCGGCATGTATAGCAAAGAGGAAGAGTATGTGAATTTCAGTGAGCCTTGTGACTGTAGTGGCCAGGTTGAACTCTGGTTGAATAATGTGCTCAATCACATGAGGGCCACCGTGAGACATGAAATGACAGAAGGTGTCACTGCCTATGAAGAGAAACCTAGGGAACAGTGGCTCTTTGACTACCCTGCTCAGGTGGCCTTGACATGTACCCAGATCTGGTGGACCACTGAGGTGGGAATTTCCTTTGCCAGGCTGGAAGAAGGATATGAAAGTGCCATGAAGGACTATTATAAGAAACAAGTTACCCAACTGAACACCCTGATCACCATGCTAATTGGCCAGCTCTCCATGGGTGACCGTCAAAAAATCATGACCATCTGTACTATTGATGTGCATGCTCGAGATGTAGTTGCCAAGATGATCTCTCAGAAGGTAGAGAACACTCAGGCTTTTCTCTGGATGTCCCAGATGCGCCATCGGTGGGATGATGAGAAAAAGCACTGCTTTGCCAATATTTGTGATGCTGAATTCCTGTATTCCTATGAGTACCTGGGTAACACACCTCGCCTGGTGATTACCCCTCTGACA------GACAGATGTTACATTACCCTCACCCAGTCTTTGCATTTGACCATGAGTGGGGCTCCAGCAGGACCTGCAGGCACTGGCAAAACTGAGACTACCAAAGACCTGGGTCGAGCATTGGGTATCATGGTATATGTGTTTAACTGTTCTGAGCAAATGGACTACAAGTCTTGTGGCAATATCTACAAAGGCCTTTCTCAGACTGGTGCCTGGGGCTGTTTTGATGAATTTAACAGAATTTCTGTGGAGGTTCTTTCCGTGGTGGCAGTACAGGTGAAAAGTATTCAAGATGCAATCAGAGATAAGAAGCTGAATTTCAATTTCCTTGGAGAGGAGATTAAATTGAATCCCTCAGTGGGAATCTTCATCACTATGAACCCAGGATATGCTGGCCGTACAGAGCTTCCAGAGAACCTTAAGGCTCTCTTCAGGCCTTGTGCTATGGTGGTACCAGATTTTGAATTGATCTGTGAAATCATGTTGGTTGCTGAAGGATTCATCGAGGCACAATCATTGGCCAAAAAATTCATTACCCTTTATCAGCTTTGCAAAGAGCTCCTCTCCAAACAGGACCATTATGACTGGGGTCTGCGAGCTATCAAGTCTGTGCTTGTGGTAGCAGGATCTCTGAAGAGAGGAGACCCTGACCGACCTGAAGACCAAGTCCTAATGCGTTCCCTCAGAGACTTCAATATTCCCAAAATCACAACTGATGACATGCCTGTGTTTATAGGCCTAATTGGAGACCTTTTCCCTGCCCTTGACATCCCCAGAAAGAGAGACCTTCACTTTGAAAGTTTTGTAAAACAGGCAATATTAGAGCTAAAACTTCAGGCTGAGGACAACTTTATGCTCAAAGTGGTACAGCTAGAAGAACTTTTGGCTGTGCGACACTCTGTG---TTTGTGGTGGGGAATGCTGGT------ACAGGAAAATCCCAGGTACTAAGATCTTTGCAAAAGACATACCAG------AACATGAAACGACGACCAGTCTGGACTGATCTCAACCCCAAAGCTGTCACTAATGATGAACTATTT---------------------------------------GGCATCATCAACCCAGCCACAAGAGAATGGAAAGATGGA---CTTTTCTCATCAATCTTGCGTGAACTTGCCAATATAATACATGATGGGCCCAAGTGGATTTTACTAGATGGCGATATTGATCCAATGTGGATTGAATCTTTGAACACTGTCATGGATGATAACAAGGTTCTGACTCTGGCAAGCAATGAAAGAATCTCTCTCAATCCAACTATGCGGCTCCTGTTTGAGATCAACCACTTGCACACAGCTACCCCTGCCACAGTCTCCAGAGCAGGAATACTATACATCAATCCTGCAGACCTGGGTTGGAATCCACCAGTGAGTAGTTGGATTGACAAGAGAGAGATACAGTCTGAACGAGCTAACCTGACCATCCTATTTGACAAATATTTGCCACCTTGCCTGGATGTAGTCAAAACAAGATTTAAAAAGATTGTTCCAATACCAGAGCAGAGCATGATTCAAATGCTGTGTTACATTCTTGAGTGTCTTCTAACAAAGGAAAACAGTCCCCCAGACTCTCCCAAGGAACTTCATGAACTTTATTTTGTGTTTGCTTCTATCTGGGCTTTTGGTGGAGTATTGATCCAAGACCAGCTTGTGGATTACAGAGCAGAGTTCAGTAAATGGTGGATAACTGAATTCAAGACAATCAAGTTTCCTTCCCAAGGAACAATCTTTGACTTTTACATAGAACCAGAAACAAAGAAATTTGAGCCGTGGTCCAAACTTATCCCCAAATTTGAATTTGACCCAGACCTACCTTTGCAGACTTGCCTGGTACACACTGTTGAGACCATTCGTGTGTGCTATTTCATGGAGCAGCTCCTGAAACACCGGAGACCTATCATGTTGGTGGGGAATGCAGGCACTGGGAAATCTGTTCTGGTAGGGGCAAAATTGGCTACCCTGGATGCAGATGAATACATGGTGAAGAATATCCCATTTAATTATTATACTACATCTGCAATGCTACAAGCTGTCCTAGAGAAACCTCTAGAAAAGAAAGCTGGAAGAAATTATGGCCCACCTGGCACCAAGAAACTCATCTATTTCATTGATGATATGAACATGCCTGAGGTAGATGCATATGGAACAGTGCAACCCCACACACTCATCAGACAGCATATGGACTATGGGCACTGGTATGACAGAAATAAACTGTTCCTCAAAGAAATCATGAATGTACAATATGTGGCCTGTATGAATCCGACAGCTGGTGGCTTCTCTATCAATCCTCGGCTACAGCGTCATTTTAGCATCTTTGTGCTCTCCTTCCCTGGAGTAGATGCATTGTATTCAATCTATAGCACCATCTTGACTCAGCATCTAAAACTTGGTAACTTTCCAGCATCACTGCTGAATTCCACCCCCCGACTCATTAACTTGGCCATTACCTTCCATCAGAAGATTGCTGCCACTTTTCTCCCCACAGCAATAAAATTCCATTATATCTTCAATCTCCGAGATTTCTCCAACATTTTCCAAGGCATTCTCTTTTCAACAGTGGAAAGTGTTAAAACCACATCAGACCTTGTGAAGCTCTATCTTCATGAGTCCAATCGGGTTTATCGCGATAAGATGGTTGAAGACAATGACTTTGACAACTTTGATAAAATCCAAATTGAAGTGGTAAATAATTTCTTTGATGATATGGACAAGACTCTAGAGGAAATCAAGAGATTGAATATGTACTGCCACTTTGCAAATGGTATTGGTGAGCCCAAATATATGCCAGTGAAGACATGGGAACTGCTTACCCAAATCCTGGTGGAAGCCTTAGAGAACCACAATGAAGTCAATCAAGTGATGAATCTGGTTCTTTTTGAGGATGCCATGTGCCATGTTTGTCGTATCAATCGCATCCTAGAATCCCCAAGGGGGAATGCTTTGCTGGTTGGAGTAGGTGGAAGTGGCAAACAGAGCCTGACAAGACTTGCAGCTTTTATCAGCTCCATGGATGTTTTCCAGATCACTCTAAGAAAAGGTTATTGTATTTCTGATCTTAAGATGGATTTAGCCAATCAGTGCCTAAAAGCTGGAGTAAAGAATGTAAGCACTGTATTTCTCATGACAGATGCCCAAGTTGCTGATGAAAAGTTCCTTGTCCTTATCAATGATCTGTTGGCATCTGGAGAGATTCCAGATCTATACTCTGATGATGAAGTTGAAAACATCATAAACAATATGAGAAATGAAGTCAAGAGCCTGGGTTTGTTTGACAGCAGGGAAACTTGCTGGAAAGTCTTCATAGAAAGGGTCCAAAAACAATTAAAGGTTATACTCTGCTTCTCCCCTGTGGGGAATAAGCTAAGAGTCCGCAGCAGGAAATTTCCAGCCATTGTGAACTGCACAGCCATTGATTGGTTTCATGAGTGGCCTCAGCAAGCACTAGAATCAGTGAGCCTGCGCTTCTTACAAAACATAGAGAATATTGATCCAGCAGTAAAAGAGTCAATTAGTAAATTCATGGCTTATGTACACACGAGTGTCAACCAAATGTCCCAGTCCTACCTGAGCAATGAACGGCGCTATAATTACACCACCCCAAAATCCTTCCTGGAACAGATCAGACTCTATCAGAACCTACTGGTCAAGAATGGCAATGAGCTGACATCTAAAATGAAGAGGCTTAAGAATGGACTGCAGAAGCTTCACAGTACATCTTCCCAGGTAGATGACCTGAAAGCTAAACTGGCACTCCAGGAAATAGAGCTCAGGCAGAAGAATGAAGATGCAGACAAACTAATTCAAGTGGTTGGAGTAGAGACAGAGAAAGTAAGCAAAGAGAAAGCCATTGCTGATGAAGAGGAACATAAAGTGGCTCTAATCATGTTGGAGGTCAAGCAGAAGCAAAAGGACTGTGAGGAAGATCTGGCCAAAGCAGAGCCATCTCTCACAGCAGCCCAAGAAGCTCTCAACACACTCAATAAGACCAACCTAACAGAGCTCAAGTCTTTTGGTTCACCACCTTTGGCTGTCAGCAATGTCACTGCTGCAGTAATGGTTCTCATGGCCCCTGGAGGGAAGGTACCCAGAGATCGAAGTTGGAAAGCTGCCAAGGCTACCATGGCCCGAGTTGATGGCTTTCTGGACGCCCTGGTCAACTTTAACAAAGAGAACATACCTGAGAGCTGCCTCAAAGCTATCCAACCATATATTCAAGATCCAGAATTTAAACCTGAGTTTGTGGCCTCTAAGTCTTTTGCAGCAGCTGGCCTCTGTTCCTGGGTCATAAATATTGTGAGGTTTTATGAGGTTTTCTGTGATGTGGAACCCAAGCGGCAAGCCCTGAGTAAAGCAAGTTTGGATCTTGCTATTGCCCAAGAAAAATTGGCAACCATTAAAATCAAGATTGCTCACCTTAATGAAAACTTGGCAAAACTTACAACCAAATTTGAGAAAGCAACTGCAGAAAAACTCAAATGTCAGCAAGAAGCTGAACTGACCACAGGTACCATCTCACTTGCAAATCGCCTGGTTGGAGGTCTTGCCTCTGAAAATGTAAGATGGGCAGAAGCTATTAAGGACTTCAGACAGCAAGAGAATACATTGTGTGGAGATATTTTACTGATTACAGCTTTCATTTCTTACCTGGGATACTTTACCAAGAAATATCGTCAAAATCTTATGGATTGTAGCTGGAGACCTTACCTGAATCAGTTAAAAGTGCCTTTTCCAGTCACCCCTACTTTAGATCCTCTAAAGATGCTTACTGATGATGTAGACATAGCCACCTGGCAAAATGAGGGTCTTCCTGCTGACCGTGTGTCCACAGAGAATGCTACAATCCTTATCAACAGTGAACGATGGCCACTTATAGTTGACCCTCAACTACAAGGAGTCAAATGGATTAAGAAGAAATATGGCAAAGACCTTCGAGTCATCCAGATTGGAGAGAAAGGATACCTTGATATCATAGAACATGCCCTAGCAGATGGTGATGTAGTGCTGATTGAAAACATAGGAGAGACAGTGGACCCTGTTCTAGGACCCTTGTTGGGAAGAGAAGTAATTAAAAAAGGAAGATTCATTAAAATTGGAGACAAGGAGTGTGAATATAATCCCAGGTTCCGCCTCATTCTTCACACCAAGCTTGCCAATCCTCACTATCAGCCTGAGCTCCAAGCCCAGGCAACGCTTATCAACTTCACTGTGACTAGAGATGGCTTAGAGGATCAGCTTCTGGCAGCAGTGGTCAACATGGAGAGGCCAGACTTAGAAGAACTGAGATCAGATCTGACAAAGCAACAGAATGCATTCAAGATCACACTGAAAACCTTGGAAGACAACTTGCTGTCTTGCCTCTCCTCCGCATCTGGGAACTTCCTGGGAGATATGGCTTTAGTAGAAAACTTAGAGATCACCAAAAAGACAGCTACAGAAATTCAGAAGAAGGTCCAGGAAGCCAAGATAACAGAAATAAAAATTAATGATGCCAGAGAGCATTACCGGCCAGTGGCAGCACGTGCGTCTTTGCTGTACTTCATCATGAATGACCTCAGTAAAATCCATCCCATGTATCAGTTTTCTCTCAAGGCTTTCAGCCTTGTCTTCCAAAAGGCTATACAGAAGACCAAGGCAGATGCTGATGAGAACCTCCAACAGAGAGTAGTCAGTTTGATTGACAACATCACCTTCTCTGTATATCAGTATACTACTAGGGGACTTTTTGAATGTGATAAGCTGACTTACATCACCCAGGTCACCTTTCAGACACTCATGATGAATCAGGAAATCAATGCTGCTGAATTGGACTTCCTTCTCCGGTACCCAGCACAGTCCAATGTTCTGAGCCCTGTGGATTTCCTTTCCAACCAATCCTGGGGAGGTATCAAGACTCTTTCATTAATGGAAGAATTTTGTAATCTGGATCGAGACATTGAAGGGTCTGCCAAACATTGGAAAAAATTTGTGGACTCAGAGTGTCCTGAAAAGGAGAAATTCCCCCAAGCATGGAAAAACAAGTCATCTCTACAAAAACTGTGTATGATGAGGGCAATGAGACCTGACCGAATGACATATGCTATGAGAGATTTTGTGGAAGAAAAGCTAGGAAGCAAATATGTGGTGGGCAGAGCATTAGATCTCTCTACCTCTTTAGAAGAATCAGGGTCTGCAACTCCCATGTTCTTCATACTGTCGCCAGGTGTTGACCCACTGAAAGATGTGGAGAAGGAAGGGAAGAAACTTGGTTATACCTTCAACAACCAGAACTTCCATAATGTGTCCTTGGGTCAAGGACAAGAGGTAGTAGCAGAGGCTGCATTGGATCTGGCTGCCAAGAATGGCCACTGGGTTATCTTACAGAACATCCATCTGGTAGCCAAATGGCTTGGATCCCTTGAGAAGAAGTTGGAACAACATAGCAAGAGCAGTCATCATGAGTTCAGAGTCTTCATGAGTGCTGAGCCTGCAGCTTCTCCTGATGGTCACATCATTCCCCAAGGCATCCTAGAAAATTCAATAAAGATCACTAATGAGCCTCCCATGGGCATGCATGCCAAACTGCACAAGGCCCTGGACAACTTCACTCAGAATACTCTGGAAATGTGTACCCGAGAAACAGAGTTTAAGAGTATTTTATTTGCACTTTGTTACTTCCACGCTGTTGTAGCAGAAAGGCACAAATTTGGACCTCAAGGCTGGAATCGCACTTACCCTTTTAACACTGGAGACCTTACCATCTCAGTGAATGTGCTCTATAACTTTCTTGAGGCCAATGCCAAGGTACCCTATGATGACTTGCGCTATCTCTTTGGTGAGATCATGTATGGAGGTCACATCACAGATAACTGGGACAGGAGGCTTTGTAGAACATACCTGGAGGAATTCATTAAGCCAGAAATGTTAGAAGGAGAAATGTTTCTGGCTCCAGGGTTCCCGATGCCAGGCAATATGGATTACAACAGTTATCACCAGTTCATTGATGACATGCTGCCAACAGAGTCACCATATTTGTATGGGCTTCACCCCAATGCTGAAATTGGCTTTCTGACCCACACCGCAGAAAAGCTCTTCCATACAGTGCTAGAAATGCAGCCTCGGGACAGCCAAGATGGAGATGGAGGAGGGATCACAAGAGAAGAAAAGGTAAAAGCCTTTCTGGATGAAATATTAGAGAAGATAACTGAAGAGTTTAACATTGCAGAGTTGATGGCTAGGGTAGAAGAACGTACCCCCTACGTTGTGGTTACCTTTCAAGAATGTGAAAGAATGAACCTCCTTATCAGAGAAATACAGCACTCGTTGAAAGAACTGGATCTGAGTCTGAAGGGTGAGCTAACAATGACCAGCAACATGGAGAGCTTACAGAGAGCTCTGTACCTAGATACTGTACCAGCGTCGTGGGCCAAGAAAGCATACCCATCAACAGCAGGCCTGGCAAGCTGGATTGTGGATCTGCTCACCCGAATTAAAGAGCTGGAAAGATGGATGGGAGACTTTGCATTACCCTCTGCTGTCTGGCTAGCAGGATTTTTTAACCCCCAATCATTCCTAACGGCCATCATGCAGTCCATGGCTCATAAGAATAAGTGGCCACTGGATAAGATAGCCCTGCAGTGTGAAGTGACTAAGAAGAATCGGGAAGATTTCAGTAGCCCACCTCGGGAAGGGGCCTATATCTATGGTCTGTTCATGGAAGGGGCCTGTTGGGATACACAGGCTGGAAATATCACAGAGGCTAGACTGAAGGATCTGACTCCACTCATGCCTGTGATATTCATCAAAGCTGTCCCTGTGGACAAGCAAGACAATCGAAATATCTATCCTTGTCCAGTATACAAGACTTGTCAGCGGGGACCCACTTATATTTGGACATTTGGTCTGAAAACTAAAGAAGCTCCATCCAAATGGGTATTAGCTGGTGTGGCCTTGCTTTTGCAGATT
"""


class MakeCachedObjects:
    def __init__(self, model, tree, seq_length, opt_args):
        """simulates an alignment under F81, all models should be the same"""
        self.lf = model.make_likelihood_function(tree)
        self.lf.set_motif_probs(dict(A=0.1, C=0.2, G=0.3, T=0.4))
        self.aln = self.lf.simulate_alignment(seq_length)
        self.results = dict(aln=self.aln)
        self.discrete_tree = make_tree(tip_names=self.aln.names)
        self.opt_args = opt_args
        self.tree = tree

    def fit_general(self, **kwargs):
        optargs = self.opt_args.copy()
        # optargs.update(kwargs)
        if "general" in self.results:
            return
        gen = General(DNA.alphabet)
        gen_lf = _make_likelihood(gen, self.tree, self.results)
        gen_lf.optimise(**optargs)
        self.results["general"] = gen_lf
        return

    def fit_gen_stat(self, **kwargs):
        optargs = self.opt_args.copy()
        # optargs.update(kwargs)
        if "gen_stat" in self.results:
            return
        gen_stat = GeneralStationary(DNA.alphabet)
        gen_stat_lf = _make_likelihood(gen_stat, self.tree, self.results)
        gen_stat_lf.optimise(**optargs)
        self.results["gen_stat"] = gen_stat_lf

    def fit_constructed_gen(self, **kwargs):
        optargs = self.opt_args.copy()
        optargs.update(kwargs)
        if "constructed_gen" in self.results:
            return
        preds = [
            MotifChange(a, b, forward_only=True).aliased(f"{a}/{b}")
            for a, b in [
                ["A", "C"],
                ["A", "G"],
                ["A", "T"],
                ["C", "A"],
                ["C", "G"],
                ["C", "T"],
                ["G", "C"],
                ["G", "T"],
                ["T", "A"],
                ["T", "C"],
                ["T", "G"],
            ]
        ]
        nuc = NonReversibleNucleotide(predicates=preds)
        nuc_lf = _make_likelihood(nuc, self.tree, self.results)
        nuc_lf.optimise(**optargs)
        self.results["constructed_gen"] = nuc_lf

    def fit_discrete(self, **kwargs):
        optargs = self.opt_args.copy()
        optargs.update(kwargs)
        if "discrete" in self.results:
            return
        dis_lf = _make_likelihood(
            DiscreteSubstitutionModel(DNA.alphabet),
            self.discrete_tree,
            self.results,
            is_discrete=True,
        )
        dis_lf.optimise(**optargs)
        self.results["discrete"] = dis_lf

    def __call__(self, obj_name, **kwargs):
        funcs = dict(
            general=self.fit_general,
            gen_stat=self.fit_gen_stat,
            discrete=self.fit_discrete,
            constructed_gen=self.fit_constructed_gen,
        )

        if obj_name not in self.results:
            funcs[obj_name](results=self.results, **kwargs)
        return self.results[obj_name]


class NonStatMarkov(TestCase):
    """test discrete and general markov"""

    tree = make_tree(treestring="(a:0.4,b:0.4,c:0.6)")
    opt_args = dict(max_restarts=1, local=True, show_progress=False)
    make_cached = MakeCachedObjects(TimeReversibleNucleotide(), tree, 100000, opt_args)

    def _setup_discrete_from_general(self, gen_lf):
        discrete_tree = self.make_cached.discrete_tree
        dis_lf = _make_likelihood(
            DiscreteSubstitutionModel(DNA.alphabet),
            discrete_tree,
            dict(aln=self.make_cached.aln),
            is_discrete=True,
        )

        for edge in self.tree:
            init = gen_lf.get_psub_for_edge(edge.name)
            dis_lf.set_param_rule("psubs", edge=edge.name, init=init)
        dis_lf.set_motif_probs(gen_lf.get_motif_probs())
        return dis_lf

    def test_discrete_vs_general1(self):
        """compares fully general models"""
        gen_lf = self.make_cached("general", max_evaluations=2)
        gen_lnL = gen_lf.get_log_likelihood()
        dis_lf = self._setup_discrete_from_general(gen_lf)
        assert_allclose(gen_lnL, dis_lf.get_log_likelihood())

    def test_paralinear_consistent_discrete_continuous(self):
        """paralinear masure should be consistent between the two classes"""
        gen_lf = self.make_cached("general", max_evaluations=2)
        gen_lnL = gen_lf.get_log_likelihood()
        dis_lf = self._setup_discrete_from_general(gen_lf)
        ct_para = gen_lf.get_paralinear_metric()
        dt_para = dis_lf.get_paralinear_metric()
        keys = sorted(ct_para)
        assert_allclose([ct_para[k] for k in keys], [dt_para[k] for k in keys])

    def test_general_vs_constructed_general(self):
        """a constructed general lnL should be identical to General"""
        sm_lf = self.make_cached("constructed_gen", max_evaluations=25)
        sm_lnL = sm_lf.get_log_likelihood()
        gen_lf = self.make_cached("general", max_evaluations=0)
        rules = sm_lf.get_param_rules()
        gen_lf.apply_param_rules(rules)
        gen_lnL = gen_lf.get_log_likelihood()
        assert_allclose(sm_lnL, gen_lnL, rtol=0.1)

    def test_general_stationary(self):
        """General stationary should be close to General"""
        gen_stat_lf = self.make_cached("gen_stat", max_evaluations=25)
        gen_lf = self.make_cached("general", max_evaluations=25)
        gen_stat_lnL = gen_stat_lf.get_log_likelihood()
        gen_lnL = gen_lf.get_log_likelihood()
        self.assertLess(gen_stat_lnL, gen_lnL)

    def test_general_stationary_is_stationary(self):
        """should be stationary"""
        gen_stat_lf = self.make_cached("gen_stat")
        mprobs = gen_stat_lf.get_motif_probs()
        mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
        for edge in self.tree:
            psub = gen_stat_lf.get_psub_for_edge(edge.name)
            pi = dot(mprobs, psub.array)
            assert_allclose(mprobs, pi)

    def test_general_is_not_stationary(self):
        """should not be stationary"""
        gen_lf = self.make_cached("general", max_evaluations=5)
        mprobs = gen_lf.get_motif_probs()
        mprobs = array([mprobs[nuc] for nuc in DNA.alphabet])
        for edge in self.tree:
            psub = gen_lf.get_psub_for_edge(edge.name)
            pi = dot(mprobs, psub.array)
            try:
                assert_allclose(mprobs, pi)
            except AssertionError:
                pass

    def test_strand_symmetric(self):
        """StrandSymmetric should fit a strand symmetric model"""
        warnings.filterwarnings("ignore", "Model not reversible", UserWarning)
        taxa = "Human", "Mouse", "Opossum"
        aln = make_aligned_seqs(data=_aln, moltype=DNA)
        aln = aln[2::3].no_degenerates()
        tree = make_tree(tip_names=taxa)
        model = StrandSymmetric(optimise_motif_probs=True)
        lf = model.make_likelihood_function(tree)
        lf.set_alignment(aln)
        for param, val in [
            ("(A>G | T>C)", 2.454),
            ("(A>T | T>A)", 1.5783),
            ("(C>G | G>C)", 0.6687),
            ("(C>T | G>A)", 6.7026),
            ("(G>T | C>A)", 0.9219),
        ]:
            lf.set_param_rule(param, init=val)

        order = "ACGT"
        S = array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]])
        P = empty((4, 4))
        for edge in taxa:
            Psub = lf.get_psub_for_edge(edge)
            for i in range(4):
                for j in range(4):
                    P[i, j] = Psub[order[i]][order[j]]
            numpy.testing.assert_almost_equal(P, S.dot(P).dot(S))

    def test_nsGN(self):
        """ssGN correctly ignores provided args"""
        kw = {
            "model_gaps": False,
            "name": "StrandSymmetric",
            "optimise_motif_probs": True,
            "recode_gaps": True,
        }
        sm = StrandSymmetric(**kw)

    def test_nr_nucleotide(self):
        """This is exercising a NonReversibleNucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
        ]
        sm = NonReversibleNucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A"])

    def test_nr_dinucleotide(self):
        """This is exercising a NonReversibleDinucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CG", "TG", forward_only=True),
        ]
        sm = NonReversibleDinucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CG>TG"])

    def test_nr_trinucleotide(self):
        """This is exercising a NonReversibleTrinucleotide"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CGA", "TGA", forward_only=True),
        ]
        sm = NonReversibleTrinucleotide(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CGA>TGA"])
        self.assertEqual(len(sm.get_motifs()), 64)

    def test_nr_codon(self):
        """This is exercising a NonReversibleCodon"""
        preds = [
            MotifChange("A", "C", forward_only=True),
            MotifChange("G", "A", forward_only=True),
            MotifChange("CG", "TG", forward_only=True),
            "replacement",
        ]
        sm = NonReversibleCodon(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["A>C", "G>A", "CG>TG", "replacement"])

    def test_nr_protein(self):
        """This is exercising a NonReversibleProtein"""
        preds = [
            MotifChange("D", "K", forward_only=True),
            MotifChange("R", "V", forward_only=True),
        ]
        sm = NonReversibleProtein(predicates=preds)
        got = sm.get_param_list()
        self.assertEqual(got, ["D>K", "R>V"])


if __name__ == "__main__":
    main()
