|
@@ -2,6 +2,7 @@
|
2
|
2
|
Small assembly module based on de bruijn graphs
|
3
|
3
|
"""
|
4
|
4
|
import networkx as nx
|
|
5
|
+from networkx import algorithms
|
5
|
6
|
|
6
|
7
|
def read_fastq(fichier):
|
7
|
8
|
"""
|
|
@@ -58,10 +59,107 @@ def build_graph(hash_table):
|
58
|
59
|
Arguments:
|
59
|
60
|
hash_table, dict: dictionnary obtained with build_kmer_dict() function
|
60
|
61
|
Return:
|
61
|
|
- graph, nx.Graph: the de Bruijn tree corresponding to hash_table
|
|
62
|
+ graph, nx.DiGraph: the de Bruijn tree corresponding to hash_table
|
62
|
63
|
"""
|
63
|
64
|
graph = nx.DiGraph()
|
64
|
65
|
for key in hash_table:
|
65
|
66
|
graph.add_edge(key[:-1], key[1:], weight=hash_table[key])
|
66
|
67
|
|
67
|
68
|
return graph
|
|
69
|
+
|
|
70
|
+def get_starting_nodes(graph):
|
|
71
|
+ """
|
|
72
|
+ Arguments:
|
|
73
|
+ graph, nx.DiGraph: de Bruijn tree
|
|
74
|
+
|
|
75
|
+ Return:
|
|
76
|
+ starting_nodes, list of strings: list of starting nodes
|
|
77
|
+ """
|
|
78
|
+ starting_nodes = []
|
|
79
|
+ for node in graph:
|
|
80
|
+ if graph.in_degree(node) == 0:
|
|
81
|
+ starting_nodes.append(node)
|
|
82
|
+
|
|
83
|
+ return starting_nodes
|
|
84
|
+
|
|
85
|
+def std():
|
|
86
|
+ pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+def get_sink_nodes(graph):
|
|
90
|
+ """
|
|
91
|
+ Arguments:
|
|
92
|
+ graph, nx.DiGraph: de Bruijn tree
|
|
93
|
+
|
|
94
|
+ Return:
|
|
95
|
+ sink_nodes, list of strings: list of terminal nodes
|
|
96
|
+ """
|
|
97
|
+ sink_nodes = []
|
|
98
|
+ for node in graph:
|
|
99
|
+ if graph.out_degree(node) == 0:
|
|
100
|
+ sink_nodes.append(node)
|
|
101
|
+
|
|
102
|
+ return sink_nodes
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+def path_average_weight():
|
|
106
|
+ pass
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+def remove_paths():
|
|
110
|
+ pass
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+def select_best_path():
|
|
114
|
+ pass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+def save_contigs(tuples, outname):
|
|
118
|
+ """
|
|
119
|
+ Arguments:
|
|
120
|
+ tuples, tuple: Obtained from get_contigs()
|
|
121
|
+ outname, str: name of the file to be written
|
|
122
|
+ """
|
|
123
|
+ with open(outname, "w") as outfile:
|
|
124
|
+ for duo in tuples:
|
|
125
|
+ outfile.write("{} {}".format(duo[0], duo[1]))
|
|
126
|
+
|
|
127
|
+ return
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+def get_contigs(graph, starting_nodes, sink_nodes):
|
|
131
|
+ """
|
|
132
|
+ Arguments:
|
|
133
|
+ graph, nx.DiGraph: de Bruijn tree
|
|
134
|
+ starting_nodes, list of strings: list of starting nodes
|
|
135
|
+ sink_nodes, list of strings: list of terminal nodes
|
|
136
|
+
|
|
137
|
+ Return:
|
|
138
|
+ contigs, list of tupple: list of tupple (contigs, len(contigs))
|
|
139
|
+ """
|
|
140
|
+ contigs = []
|
|
141
|
+ for starting_node in starting_nodes:
|
|
142
|
+ for sink_node in sink_nodes:
|
|
143
|
+ if algorithms.has_path(graph, starting_node, sink_node) == True:
|
|
144
|
+ path = algorithms.shortest_path(graph, starting_node, sink_node)
|
|
145
|
+ contig = path[0]
|
|
146
|
+ for i in range(len(path)-1):
|
|
147
|
+ contig += path[i+1][-1]
|
|
148
|
+ contigs.append((contig, len(contig)))
|
|
149
|
+
|
|
150
|
+ return contigs
|
|
151
|
+
|
|
152
|
+def solve_bubble():
|
|
153
|
+ pass
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+def simplify_bubbles():
|
|
157
|
+ pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+def solve_entry_tips():
|
|
161
|
+ pass
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+def solve_out_tips():
|
|
165
|
+ pass
|