Browse Source

last version still buggy

nicolas-zimmermann 4 years ago
parent
commit
e6337ad43e
1 changed files with 68 additions and 15 deletions
  1. 68 15
      debruijn/debruijn.py

+ 68 - 15
debruijn/debruijn.py View File

45
         hash_table, dict: dictionnary with key = k-mer as str
45
         hash_table, dict: dictionnary with key = k-mer as str
46
                           and value count of k-mer occurence
46
                           and value count of k-mer occurence
47
     """
47
     """
48
-    hash_table = {} # initialise empty hash table
48
+    hash_table = {}# initialise empty hash table
49
     it_fastq = read_fastq(fichier)
49
     it_fastq = read_fastq(fichier)
50
     for seq in it_fastq: # for each sequence
50
     for seq in it_fastq: # for each sequence
51
         it_kmer = cut_kmer(seq, k) # count each occurence of k-mer
51
         it_kmer = cut_kmer(seq, k) # count each occurence of k-mer
132
     """
132
     """
133
     weight = 0
133
     weight = 0
134
     for i in range(len(path)-1):
134
     for i in range(len(path)-1):
135
-        weight += graph[path[i][i+i][weight]
136
-    
137
-    return weight/(len(path)-1)
135
+        weight += graph.edges[path[i], path[i+1]]["weight"]
138
 
136
 
137
+    weight = weight/(len(path) - 1)
139
 
138
 
139
+    return weight
140
 
140
 
141
 def remove_paths(graph, paths, delete_entry_node=False, delete_sink_node=False):
141
 def remove_paths(graph, paths, delete_entry_node=False, delete_sink_node=False):
142
     """
142
     """
164
     return graph
164
     return graph
165
 
165
 
166
 
166
 
167
-def select_best_path(graph, paths, path_len, mean_weights,
167
+def select_best_path(graph, paths, path_lens, mean_weights,
168
                      delete_entry_node=False, delete_sink_node=False):
168
                      delete_entry_node=False, delete_sink_node=False):
169
     """
169
     """
170
-    
170
+    Given path list, their length and weight, keeps only the best path among
171
+    them considering the following priority : weight, length and randomly
172
+
173
+    Arguments:
174
+        graph, nx.DiGraph: a de bruijn graph
175
+        paths, list of str: list of paths
176
+        path_lens: lengths of the paths
177
+        mean_weights: mean weights of the paths
178
+        delete_entry_node, boolean: either or not if the entry node
179
+                                    should be deleted
180
+        delete_sink_node, boolean: either or not if the sink node
181
+                                   should be deleted
182
+
183
+    Returns:
184
+        graph, nx.DiGraph: graph with deleted paths
171
     """
185
     """
172
     max_weight = max(mean_weights)
186
     max_weight = max(mean_weights)
173
-    heaviest = [i for i, j in enumerate(mean_weights) if j == mean_weights]
187
+    heaviest = [i for i, j in enumerate(mean_weights) if j == max_weight]
174
     if len(heaviest) > 1:
188
     if len(heaviest) > 1:
175
-        max_len = max(path_lengths)
176
-        longest = [i for i in heaviest if path_len[i] == max_len]
189
+        max_len = max(path_lens)
190
+        longest = [i for i in heaviest if path_lens[i] == max_len]
177
         if len(longest) > 1:
191
         if len(longest) > 1:
178
             Random.seed(9001)
192
             Random.seed(9001)
179
             best = random.choice[longest]
193
             best = random.choice[longest]
181
             best = longest[0]
195
             best = longest[0]
182
     else:
196
     else:
183
         best = heaviest[0]
197
         best = heaviest[0]
184
-    paths.pop(best)
198
+    
199
+    for p in paths:
200
+        print(p)
185
 
201
 
186
-    return remove_paths(graph, paths, delete_entry_node, delete_sink_node)
202
+    paths2 = [p for p in paths]
203
+    paths2.pop(best)
204
+
205
+    return remove_paths(graph, paths2, delete_entry_node, delete_sink_node)
187
 
206
 
188
 
207
 
189
 def fill(text, width=80):
208
 def fill(text, width=80):
227
     
246
     
228
     return contigs
247
     return contigs
229
 
248
 
230
-def solve_bubble():
231
-    pass
249
+def solve_bubble(graph, ancestor_node, descendent_node):
250
+    """
251
+    solve a bubble
232
 
252
 
253
+    Arguments:
254
+        graph, nx.DiGraph: a de bruijn graph
255
+        ancestor_node, str: a node
256
+        descendent_node, str: a node
233
 
257
 
234
-def simplify_bubbles():
235
-    pass
258
+    Returns:
259
+        graph, nx.DiGraph: the same graph without the bubble
260
+    """
261
+    paths = algorithms.all_simple_paths(graph, ancestor_node, descendent_node)
262
+
263
+    weights = []
264
+    path_lens = []
265
+    for path in paths:# constituting weights and length lists
266
+        weights.append(path_average_weight(graph, path))
267
+        path_lens.append(len(path))
268
+
269
+    return select_best_path(graph, paths, weights, path_lens) # keep best path
270
+
271
+
272
+def simplify_bubbles(graph):
273
+    """
274
+    Returns a bubble-less graph 
275
+
276
+    Arguments:
277
+        graph, nx.DiGraph: a de bruijn graph
236
 
278
 
279
+    Returns:
280
+        graph, nx.DiGraph: a bubble-less de bruijn graph
281
+    """
282
+    fork_nodes = []# empty list containing nodes with 2 or more ancestors
283
+    for node in graph:
284
+        while graph.in_degree(node) >= 2: # if 2 or more ancestor add node
285
+            pred = [n for n in graph.predecessors(node)] 
286
+            ancestor = algorithms.lowest_common_ancestor(graph,pred[0], pred[1])
287
+            graph = solve_bubble(graph, ancestor, node)
288
+
289
+    return graph
237
 
290
 
238
 def solve_entry_tips():
291
 def solve_entry_tips():
239
     pass
292
     pass