Browse Source

added functions get_starting_nodes, get_sink_nodes, get_contigs, save_contigs

nicolas-zimmermann 4 years ago
parent
commit
2245289d91
1 changed files with 99 additions and 1 deletions
  1. 99 1
      debruijn/debruijn.py

+ 99 - 1
debruijn/debruijn.py View File

@@ -2,6 +2,7 @@
2 2
 Small assembly module based on de bruijn graphs
3 3
 """
4 4
 import networkx as nx
5
+from networkx import algorithms
5 6
 
6 7
 def read_fastq(fichier):
7 8
     """
@@ -58,10 +59,107 @@ def build_graph(hash_table):
58 59
     Arguments:
59 60
         hash_table, dict: dictionnary obtained with build_kmer_dict() function
60 61
     Return:
61
-        graph, nx.Graph: the de Bruijn tree corresponding to hash_table
62
+        graph, nx.DiGraph: the de Bruijn tree corresponding to hash_table
62 63
     """
63 64
     graph = nx.DiGraph()
64 65
     for key in hash_table:
65 66
         graph.add_edge(key[:-1], key[1:], weight=hash_table[key])
66 67
 
67 68
     return graph
69
+
70
+def get_starting_nodes(graph):
71
+    """
72
+    Arguments:
73
+        graph, nx.DiGraph: de Bruijn tree
74
+
75
+    Return:
76
+        starting_nodes, list of strings: list of starting nodes 
77
+    """
78
+    starting_nodes = []
79
+    for node in graph:
80
+        if graph.in_degree(node) == 0:
81
+            starting_nodes.append(node)
82
+
83
+    return starting_nodes
84
+
85
+def std():
86
+    pass
87
+
88
+
89
+def get_sink_nodes(graph):
90
+    """
91
+    Arguments:
92
+        graph, nx.DiGraph: de Bruijn tree
93
+
94
+    Return:
95
+        sink_nodes, list of strings: list of terminal nodes 
96
+    """
97
+    sink_nodes = []
98
+    for node in graph:
99
+        if graph.out_degree(node) == 0:
100
+            sink_nodes.append(node)
101
+
102
+    return sink_nodes
103
+
104
+
105
+def path_average_weight():
106
+    pass
107
+
108
+
109
+def remove_paths():
110
+    pass
111
+
112
+
113
+def select_best_path():
114
+    pass
115
+
116
+
117
+def save_contigs(tuples, outname):
118
+    """
119
+    Arguments:
120
+        tuples, tuple: Obtained from get_contigs()
121
+        outname, str: name of the file to be written
122
+    """
123
+    with open(outname, "w") as outfile:
124
+        for duo in tuples:
125
+            outfile.write("{} {}".format(duo[0], duo[1]))
126
+
127
+    return
128
+
129
+
130
+def get_contigs(graph, starting_nodes, sink_nodes):
131
+    """
132
+    Arguments:
133
+        graph, nx.DiGraph: de Bruijn tree
134
+        starting_nodes, list of strings: list of starting nodes 
135
+        sink_nodes, list of strings: list of terminal nodes 
136
+
137
+    Return:
138
+        contigs, list of tupple: list of tupple (contigs, len(contigs))
139
+    """
140
+    contigs = []
141
+    for starting_node in starting_nodes:
142
+        for sink_node in sink_nodes:
143
+            if algorithms.has_path(graph, starting_node, sink_node) == True:
144
+                path = algorithms.shortest_path(graph, starting_node, sink_node)
145
+                contig = path[0]
146
+                for i in range(len(path)-1):
147
+                    contig += path[i+1][-1]
148
+                contigs.append((contig, len(contig)))
149
+
150
+    return contigs
151
+
152
+def solve_bubble():
153
+    pass
154
+
155
+
156
+def simplify_bubbles():
157
+    pass
158
+
159
+
160
+def solve_entry_tips():
161
+    pass
162
+
163
+
164
+def solve_out_tips():
165
+    pass