Amine GHOZLANE 5 år sedan
förälder
incheckning
9055bb490a
6 ändrade filer med 257 tillägg och 1 borttagningar
  1. 1 0
      tests/context.py
  2. Binär
      tests/kmer_comp.pck
  3. 8 0
      tests/test.fna
  4. 59 0
      tests/test_characteristics.py
  5. 19 1
      tests/test_construction_debruijn.py
  6. 170 0
      tests/test_decision.py

+ 1 - 0
tests/context.py Visa fil

@@ -7,3 +7,4 @@ import sys
7 7
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__),
8 8
                                                 '../debruijn')))
9 9
 import debruijn
10
+#import debruijn_comp

Binär
tests/kmer_comp.pck Visa fil


+ 8 - 0
tests/test.fna Visa fil

@@ -0,0 +1,8 @@
1
+>contig_0 len=8
2
+TCAGCGAT
3
+>contig_1 len=8
4
+TCAGCGAA
5
+>contig_2 len=8
6
+ACAGCGAT
7
+>contig_3 len=8
8
+ACAGCGAA

+ 59 - 0
tests/test_characteristics.py Visa fil

@@ -0,0 +1,59 @@
1
+"""Tests for graph characteristic"""
2
+import pytest
3
+import os
4
+import networkx as nx
5
+import hashlib
6
+from .context import debruijn
7
+#from .context import debruijn_comp
8
+from debruijn import get_starting_nodes
9
+from debruijn import get_sink_nodes
10
+from debruijn import get_contigs
11
+from debruijn import save_contigs
12
+
13
+
14
+def test_get_starting_nodes():
15
+    graph = nx.DiGraph()
16
+    graph.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
17
+    nodes = get_starting_nodes(graph)    
18
+    assert len(nodes) == 2
19
+    assert 1 in nodes
20
+    assert 3 in nodes
21
+
22
+def test_get_sink_nodes():
23
+    graph = nx.DiGraph()
24
+    graph.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
25
+    nodes = get_sink_nodes(graph)
26
+    assert len(nodes) == 2
27
+    assert 6 in nodes
28
+    assert 7 in nodes
29
+
30
+def test_get_contigs():
31
+    graph = nx.DiGraph()
32
+    graph.add_edges_from([("TC", "CA"), ("AC", "CA"), ("CA", "AG"), ("AG", "GC"), ("GC", "CG"), ("CG", "GA"), ("GA", "AT"), ("GA", "AA")])
33
+    contig_list = get_contigs(graph, ["TC", "AC"], ["AT" , "AA"])
34
+    results = ["TCAGCGAT", "TCAGCGAA", "ACAGCGAT", "ACAGCGAA"]
35
+    assert len(contig_list) == 4
36
+    for contig in contig_list:
37
+        assert contig[0] in results
38
+        assert contig[1] == 8
39
+
40
+
41
+# def test_get_contigs_comp():
42
+#     graph = nx.DiGraph()
43
+#     graph.add_edges_from([(("AG", "TC"), ("CA", "GT")), (("AC", "TG"), ("CA", "GT")), (("CA", "GT"), ("AG", "TC")), 
44
+#         (("AG", "TC"), ("CG", "GC")), (("CG", "GC"), ("CG", "GC")), (("CG", "GC"), ("CT", "GA")), (("CT", "GA"), ("AT", "TC")), 
45
+#         (("CT", "GA"), ("AA", "TT"))])
46
+#     contig_list = get_contigs(graph, ["TC", "AC"], ["AT" , "AA"])
47
+#     results = ["TCAGCGAT", "TCAGCGAA", "ACAGCGAT", "ACAGCGAA"]
48
+#     assert len(contig_list) == 4
49
+#     for contig in contig_list:
50
+#         assert contig[0] in results
51
+#         assert contig[1] == 8
52
+
53
+
54
+def test_save_contigs():
55
+    test_file = os.path.abspath(os.path.join(os.path.dirname(__file__), "test.fna"))
56
+    contig = [("TCAGCGAT", 8), ("TCAGCGAA",8), ("ACAGCGAT", 8), ("ACAGCGAA", 8)]
57
+    save_contigs(contig, test_file)
58
+    with open(test_file, 'rb') as contig_test:
59
+        assert hashlib.md5(contig_test.read()).hexdigest() == "ca84dfeb5d58eca107e34de09b3cc997"

+ 19 - 1
tests/test_construction_debruijn.py Visa fil

@@ -4,6 +4,7 @@ import os
4 4
 import networkx as nx
5 5
 import pickle
6 6
 from .context import debruijn
7
+#from .context import debruijn_comp
7 8
 from debruijn import read_fastq
8 9
 from debruijn import cut_kmer
9 10
 from debruijn import build_kmer_dict
@@ -48,4 +49,21 @@ def test_build_graph():
48 49
     assert graph.number_of_edges() == 4
49 50
     assert "AG" in graph
50 51
     assert "GA" in graph
51
-    assert graph.edges["AG", "GA"]['weight'] == 2
52
+    assert graph.edges["AG", "GA"]['weight'] == 2
53
+
54
+# def test_build_graph_comp():
55
+#     file = open(os.path.abspath(os.path.join(os.path.dirname(__file__), "kmer_comp.pck")),'rb')
56
+#     kmer_dict = pickle.load(file)
57
+#     graph = build_graph(kmer_dict)
58
+#     #TCAGAGA
59
+#     #TCA  TC CA
60
+#     #CAG CA AG
61
+#     #AGA AG GA
62
+#     #GAG GA AG
63
+#     #AGA AG GA
64
+#     # ((TC, AG), (CA, GT)), (CA, AG), (AG
65
+#     assert graph.number_of_nodes() == 4
66
+#     assert graph.number_of_edges() == 3
67
+#     assert "AG" in graph
68
+#     assert "GA" in graph
69
+#     assert graph.edges["AG", "GA"]['weight'] == 2

+ 170 - 0
tests/test_decision.py Visa fil

@@ -0,0 +1,170 @@
1
+"""Tests decision"""
2
+import pytest
3
+import os
4
+import networkx as nx
5
+import statistics
6
+from .context import debruijn
7
+#from .context import debruijn_comp
8
+from debruijn import std
9
+from debruijn import path_average_weight
10
+from debruijn import remove_paths
11
+from debruijn import select_best_path
12
+from debruijn import solve_bubble
13
+from debruijn import simplify_bubbles
14
+from debruijn import solve_entry_tips
15
+from debruijn import solve_out_tips
16
+
17
+def test_std():
18
+    assert round(std([9, 5, 15, 20]), 1) == 6.6
19
+
20
+
21
+def test_path_weight():
22
+    graph = nx.DiGraph()
23
+    graph.add_weighted_edges_from([(1, 2, 5), (3, 2, 10), (2, 4, 10), (4, 5, 3), 
24
+                                   (5, 6, 10), (5, 7, 10)])
25
+    assert path_average_weight(graph, [1, 2, 4, 5] ) == 6.0
26
+
27
+def test_remove_paths():
28
+    graph_1 = nx.DiGraph()
29
+    graph_2 = nx.DiGraph()
30
+    graph_3 = nx.DiGraph()
31
+    graph_4 = nx.DiGraph()
32
+    graph_1.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
33
+    graph_2.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
34
+    graph_3.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
35
+    graph_4.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
36
+    graph_1 = remove_paths(graph_1, [(1,2)], True, False)
37
+    graph_2 = remove_paths(graph_2, [(5,7)], False, True)
38
+    graph_3 = remove_paths(graph_3, [(2,4,5)], False, False)
39
+    graph_4 = remove_paths(graph_4, [(2,4,5)], True, True)
40
+    assert (1,2) not in graph_1.edges()
41
+    assert (3,2) in graph_1.edges()
42
+    assert (5,7) not in graph_2.edges()
43
+    assert (5,6) in graph_2.edges()
44
+    assert 4 not in graph_3.nodes()
45
+    assert (2,4) not in graph_4.edges()
46
+    assert (4, 5) not in graph_4.edges()
47
+    assert 2 not in graph_4.nodes()
48
+    assert 4 not in graph_4.nodes()
49
+    assert 5 not in graph_4.nodes()
50
+
51
+def test_select_best_path():
52
+    graph_1 = nx.DiGraph()
53
+    graph_1.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7)])
54
+    graph_1 = select_best_path(graph_1, [[1,2], [3,2]], [1, 1], [5, 10], delete_entry_node=True)
55
+    assert (1,2) not in graph_1.edges()
56
+    assert (3,2) in graph_1.edges()
57
+    assert 1 not in graph_1.nodes()
58
+    graph_2 = nx.DiGraph()
59
+    graph_2.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (5, 6), (5, 7) , (7, 8)])
60
+    graph_2 = select_best_path(graph_1, [[5, 6], [5, 7, 8]], [1, 2], [13, 10], delete_sink_node=True)
61
+    assert (5,7) not in graph_2.edges()
62
+    assert (7,8) not in graph_2.edges()
63
+    assert (5,6) in graph_2.edges()
64
+    assert 7 not in graph_2.nodes()
65
+    assert 8 not in graph_2.nodes()
66
+    #Select heavier
67
+    graph_3 = nx.DiGraph()
68
+    graph_3.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (2, 8), (8, 9),
69
+                            (9, 5), (5, 6), (5, 7)])
70
+    graph_3 = select_best_path(graph_3, [[2, 4, 5], [2, 8, 9, 5]],
71
+                                         [1, 4], [13, 10])
72
+    assert (2,8) not in graph_3.edges()
73
+    assert (8,9) not in graph_3.edges()
74
+    assert (9,5) not in graph_3.edges()
75
+    assert (2,4) in graph_3.edges()
76
+    assert (4,5) in graph_3.edges()
77
+    assert 8 not in graph_3.nodes()
78
+    assert 9 not in graph_3.nodes()
79
+    assert 2 in graph_3.nodes()
80
+    assert 5 in graph_3.nodes()
81
+    # Select longest
82
+    graph_4 = nx.DiGraph()
83
+    graph_4.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (2, 8), (8, 9), 
84
+                            (9, 5), (5, 6), (5, 7)])
85
+    graph_4 = select_best_path(graph_4, [[2, 4, 5], [2, 8, 9, 5]],
86
+                                         [1, 4], [10, 10])
87
+    assert (2,4) not in graph_4.edges()
88
+    assert (4,5) not in graph_4.edges()
89
+    assert (2,8) in graph_4.edges()
90
+    assert (8,9) in graph_4.edges()
91
+    assert (9,5) in graph_4.edges()
92
+    # Select random
93
+    graph_5 = nx.DiGraph()
94
+    graph_5.add_edges_from([(1, 2), (3, 2), (2, 4), (4, 5), (2, 8), (8, 9),
95
+                            (9, 5), (5, 6), (5, 7)])
96
+    graph_5 = select_best_path(graph_5, [[2, 4, 5], [2, 8, 9, 5]],
97
+                                         [1, 4], [10, 10])
98
+
99
+def test_solve_bubble():
100
+    graph_1 = nx.DiGraph()
101
+    graph_1.add_weighted_edges_from([(1, 2, 10), (3, 2, 10), (2, 4, 15), 
102
+                                     (4, 5, 15), (2, 10,10), (10, 5,10),
103
+                                     (2, 8, 3), (8, 9, 3), (9, 5, 3),
104
+                                     (5, 6, 10), (5, 7, 10)])
105
+    graph_1 = solve_bubble(graph_1, 2, 5)
106
+    assert (2,8) not in graph_1.edges()
107
+    assert (8,9) not in graph_1.edges()
108
+    assert (9,5) not in graph_1.edges()
109
+    assert (2,10) not in graph_1.edges()
110
+    assert (10, 5) not in graph_1.edges()
111
+    assert (2,4) in graph_1.edges()
112
+    assert (4,5) in graph_1.edges()
113
+    assert 8 not in graph_1.nodes()
114
+    assert 9 not in graph_1.nodes()
115
+    assert 10 not in graph_1.nodes()
116
+    assert 2 in graph_1.nodes()
117
+    assert 5 in graph_1.nodes()
118
+    graph_2 = nx.DiGraph()
119
+    graph_2.add_weighted_edges_from([(1, 2, 10), (3, 2, 10), (2, 4, 10), 
120
+                                     (4, 5, 10), (2, 10,10), (10, 5,10),
121
+                                     (2, 8, 10), (8, 9, 10), (9, 5, 10),
122
+                                     (5, 6, 10), (5, 7, 10)])
123
+    graph_2 = solve_bubble(graph_2, 2, 5)
124
+    assert (2,4) not in graph_2.edges()
125
+    assert (4,5) not in graph_2.edges()
126
+    assert (2,10) not in graph_1.edges()
127
+    assert (10, 5) not in graph_1.edges()
128
+    assert (2,8) in graph_2.edges()
129
+    assert (8,9) in graph_2.edges()
130
+    assert (9,5) in graph_2.edges()
131
+
132
+
133
+def test_simplify_bubbles():
134
+    graph_1 = nx.DiGraph()
135
+    graph_1.add_weighted_edges_from([(3, 2, 10), (2, 4, 15), (4, 5, 15),
136
+                                     (2, 10,10), (10, 5,10), (2, 8, 3),
137
+                                     (8, 9, 3), (9, 5, 3), (5, 6, 10),
138
+                                     (5, 7, 10)])
139
+    graph_1 = simplify_bubbles(graph_1)
140
+    assert (2,8) not in graph_1.edges()
141
+    assert (8,9) not in graph_1.edges()
142
+    assert (9,5) not in graph_1.edges()
143
+    assert (2,10) not in graph_1.edges()
144
+    assert (10, 5) not in graph_1.edges()
145
+
146
+def test_solve_entry_tips():
147
+    graph_1 = nx.DiGraph()
148
+    graph_1.add_weighted_edges_from([(1, 2, 10), (3, 2, 2), (2, 4, 15), (4, 5, 15)])
149
+    graph_1 = solve_entry_tips(graph_1, [1, 3])  
150
+    assert (3, 2) not in graph_1.edges()
151
+    assert (1, 2) in graph_1.edges()
152
+    graph_2 = nx.DiGraph()
153
+    graph_2.add_weighted_edges_from([(1, 2, 2), (6, 3, 2), (3, 2, 2),
154
+                                     (2, 4, 15), (4, 5, 15)])
155
+    graph_2 = solve_entry_tips(graph_2, [1, 6])  
156
+    assert (1, 2) not in graph_2.edges()
157
+    assert (6, 3) in graph_2.edges()
158
+    assert (3, 2) in graph_2.edges()
159
+
160
+def test_solve_out_tips():
161
+    graph_1 = nx.DiGraph()
162
+    graph_1.add_weighted_edges_from([(1, 2, 15), (2, 3, 15), (4, 5, 15), (4, 6, 2)])
163
+    graph_1 = solve_out_tips(graph_1, [5, 6])  
164
+    assert (4, 6) not in graph_1.edges()
165
+    assert (4, 5) in graph_1.edges()  
166
+    graph_2 = nx.DiGraph()
167
+    graph_2.add_weighted_edges_from([(1, 2, 15), (2, 3, 15), (4, 5, 2), (4, 6, 2) , (6, 7, 2)])
168
+    graph_2 = solve_out_tips(graph_2, [5, 7])  
169
+    assert (4, 5) not in graph_2.edges()
170
+    assert (6, 7) in graph_2.edges()