|
@@ -2,6 +2,7 @@
|
2
|
2
|
Small assembly module based on de bruijn graphs
|
3
|
3
|
"""
|
4
|
4
|
import os
|
|
5
|
+import statistics
|
5
|
6
|
import networkx as nx
|
6
|
7
|
from networkx import algorithms
|
7
|
8
|
|
|
@@ -83,8 +84,15 @@ def get_starting_nodes(graph):
|
83
|
84
|
|
84
|
85
|
return starting_nodes
|
85
|
86
|
|
86
|
|
-def std():
|
87
|
|
- pass
|
|
87
|
+def std(values):
|
|
88
|
+ """
|
|
89
|
+ Arguments:
|
|
90
|
+ values, list: list of values
|
|
91
|
+
|
|
92
|
+ Returns :
|
|
93
|
+ standard deviation of the 'values' data list
|
|
94
|
+ """
|
|
95
|
+ return stdev(float(values))
|
88
|
96
|
|
89
|
97
|
|
90
|
98
|
def get_sink_nodes(graph):
|
|
@@ -103,12 +111,47 @@ def get_sink_nodes(graph):
|
103
|
111
|
return sink_nodes
|
104
|
112
|
|
105
|
113
|
|
106
|
|
-def path_average_weight():
|
107
|
|
- pass
|
|
114
|
+def path_average_weight(graph, path):
|
|
115
|
+ """
|
|
116
|
+ Arguments:
|
|
117
|
+ graph, nx.DiGraph: a de bruijn graph
|
|
118
|
+ path, list of str(nodes): list of nodes constituing a path
|
108
|
119
|
|
|
120
|
+ Return:
|
|
121
|
+ mean weight, float: the mean weight of the path
|
|
122
|
+ """
|
|
123
|
+ weight = 0
|
|
124
|
+ for i in range(len(path)-1):
|
|
125
|
+ weight += graph[path[i][i+i][weight]
|
|
126
|
+
|
|
127
|
+ return weight/(len(path)-1)
|
109
|
128
|
|
110
|
|
-def remove_paths():
|
111
|
|
- pass
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+def remove_paths(graph, paths, delete_entry_node=False, delete_sink_node=False):
|
|
132
|
+ """
|
|
133
|
+ Removes paths in 'paths' list from 'graph' object. Don't delete entry and
|
|
134
|
+ sink nodes unless specified.
|
|
135
|
+
|
|
136
|
+ Arguments:
|
|
137
|
+ graph, nd.DiGraph(): de bruijn graph
|
|
138
|
+ paths, list of string lists: list of paths composed of nodes
|
|
139
|
+ delete_entry_node, boolean: Delete entry if True
|
|
140
|
+ delete_sink_node, boolean: Delete sink node if True
|
|
141
|
+
|
|
142
|
+ Return:
|
|
143
|
+ graph, nd.DiGraph(): de bruijn graph with deletes paths.
|
|
144
|
+ """
|
|
145
|
+ entry = 1
|
|
146
|
+ sink = -1
|
|
147
|
+ if delete_entry_node == True:
|
|
148
|
+ entry = 0
|
|
149
|
+ if delete_sink_node == True:
|
|
150
|
+ sink = None
|
|
151
|
+ for path in paths:
|
|
152
|
+ graph.remove_nodes_from(path[entry:sink])
|
|
153
|
+
|
|
154
|
+ return graph
|
112
|
155
|
|
113
|
156
|
|
114
|
157
|
def select_best_path():
|
|
@@ -153,7 +196,7 @@ def get_contigs(graph, starting_nodes, sink_nodes):
|
153
|
196
|
for i in range(len(path)-1):
|
154
|
197
|
contig += path[i+1][-1]
|
155
|
198
|
contigs.append((contig, len(contig)))
|
156
|
|
-
|
|
199
|
+
|
157
|
200
|
return contigs
|
158
|
201
|
|
159
|
202
|
def solve_bubble():
|