phylopomp
Phylodynamics for POMPs
Loading...
Searching...
No Matches
parse.cc
Go to the documentation of this file.
1#include "genealogy.h"
2#include "generics.h"
3#include "internal.h"
4#include <regex>
5#include <unordered_map>
6
7void
9(void)
10{
11 for (node_t *p : *this) {
12 if (p->empty()) {
13 ball_t *b = new ball_t(p,p->uniq,blue,p->deme());
14 p->insert(b);
15 } else if (p->holds(black))
16 swap(p->last_ball(),p->green_ball());
17 }
18}
19
20void
22(void)
23{
24 for (node_t *p : *this) {
25 if (!p->holds_own() &&
26 p->slate == p->parent()->slate &&
27 p->deme() == p->parent()->deme()) {
28 while (!p->empty()) {
29 ball_t *b = p->last_ball();
30 p->erase(b); p->parent()->insert(b);
31 }
32 detach(p);
33 }
34 }
35}
36
39static
42(const string_t& s)
43{
44 int d;
45 try {
46 d = stoi(s);
47 if (d < 0) err("in '%s': negative deme number detected.",__func__);
48 }
49 catch (const std::invalid_argument& e) {
50 err("in '%s': invalid Newick format: deme should be indicated with an integer.",__func__);
51 }
52 catch (const std::out_of_range& e) {
53 err("in '%s': invalid Newick format: deme out of range.",__func__);
54 }
55 catch (const std::exception& e) {
56 err("in '%s': parsing deme label: %s.",__func__,e.what());
57 }
58 catch (...) {
59 err("in '%s': other deme-parsing error.",__func__);
60 }
61 return name_t(d);
62}
63
65static
68(const std::string& s)
69{
70 std::string copy(s);
71 const std::unordered_map<string_t,color_t> options({
72 {"sample",blue},{"extant",black},{"migration",green},
73 {"node",green},{"branch",green},{"root",green}
74 });
75 color_t col = green;
76 std::transform(copy.begin(),copy.end(),copy.begin(),
77 [](unsigned char c){return std::tolower(c);});
78 try {
79 col = options.at(copy);
80 }
81 catch (const std::out_of_range& e) {
82 err("in %s: invalid metadata: type '%s' not recognized.",__func__,s.c_str());
83 }
84 return col;
85}
86
88static
91(string_t::const_iterator b,
92 string_t::const_iterator e)
93{
94 double bl = 0.0;
95 if (b != e) {
96 std::smatch m;
97 if (std::regex_match(b,e,m,std::regex("^(?:\\[.*?\\])?([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:\\[.*?\\])?$"))) {
98 try {
99 bl = stod(m[1].str());
100 }
101 catch (const std::invalid_argument& e) {
102 err("in '%s': invalid Newick format: branch length should be a non-negative decimal number.",__func__);
103 }
104 catch (const std::out_of_range& e) {
105 err("in '%s': invalid Newick format: branch length out of range.",__func__);
106 }
107 catch (const std::exception& e) {
108 err("in '%s': parsing branch-length: %s.",__func__,e.what());
109 }
110 catch (...) {
111 err("in '%s': other branch-length parsing error.",__func__);
112 }
113 if (bl < 0.0) err("in '%s': negative branch length detected.",__func__);
114 } else {
115 warn("in '%s': in branch-length spec '%s': no branch-length detected: assuming zero branch length.",__func__,string_t(b,e).c_str());
116 }
117 }
118 return bl;
119}
120
123node_t*
125(string_t::const_iterator b,
126 string_t::const_iterator e,
127 node_t* parent,
128 slate_t bl)
129{
130 name_t deme = 0;
131 color_t col = green;
132 if (b != e) {
133 std::smatch m;
134 if (std::regex_match(b,e,m,std::regex("^.*?\\[&&PhyloPOMP.+?deme=(\\w+).*?\\].*$")))
135 deme = scan_name(m[1].str());
136 if (std::regex_match(b,e,m,std::regex("^.*?\\[&&PhyloPOMP.+?type=(\\w+).*?\\].*$")))
137 col = scan_color(m[1].str());
138 }
139 node_t *q = make_node(deme);
140 if (col != green) {
141 ball_t *b = new ball_t(q,q->uniq,col,deme);
142 q->insert(b);
143 }
144 q->slate = bl+parent->slate;
145 attach(parent,q);
146 push_back(q);
147 ndeme() = (ndeme() > q->deme()) ? ndeme() : q->deme();
148 return q;
149}
150
154(const string_t& s)
155{
156 node_t *p = 0, *q;
157 slate_t tf = timezero();
158 slate_t bl = 0.0;
159 string_t::const_reverse_iterator f = s.crend(), e = s.crbegin(), b = e;
160 bool open = false; // branch-string reading-frame open?
161 int stack = 0, sqstack = 0;
162 if (!s.empty() && *b != ';')
163 err("in '%s': invalid Newick format: no final semicolon.",__func__);
164 while (b != f) {
165 switch (*b) {
166 case ';': // root
167 if (stack != 0)
168 err("in '%s': invalid Newick: unbalanced parentheses.",__func__);
169 p = make_node();
170 p->slate = timezero();
171 push_front(p);
172 b++; e = b;
173 open = true;
174 bl = 0.0;
175 break;
176 case ')': // internal node
177 if (open) {
178 q = scan_branch_label(b.base(),e.base(),p,bl);
179 if (q->holds(black))
180 err("in '%s': 'type=extant' on internal node.",__func__);
181 tf = (q->slate > tf) ? q->slate : tf;
182 bl = 0.0;
183 p = q;
184 } else {
185 err("in '%s': invalid Newick: missing comma or semicolon.",__func__);
186 }
187 b++; e = b;
188 stack++;
189 open = true;
190 break;
191 case '(': // tip node, eldest sister
192 if (open) {
193 q = scan_branch_label(b.base(),e.base(),p,bl);
194 tf = (q->slate > tf) ? q->slate : tf;
195 bl = 0.0;
196 }
197 p = p->parent();
198 b++;
199 e = b;
200 stack--;
201 open = false;
202 break;
203 case ',': // tip node, younger sister
204 if (stack <= 0)
205 err("in '%s': invalid Newick string: misplaced comma or unbalanced parentheses.",__func__);
206 if (open) {
207 q = scan_branch_label(b.base(),e.base(),p,bl);
208 tf = (q->slate > tf) ? q->slate : tf;
209 bl = 0.0;
210 }
211 b++; e = b;
212 open = true;
213 bl = 0.0;
214 break;
215 case ']': // skip metadata
216 sqstack++;
217 while (b != f && sqstack > 0) {
218 b++;
219 if (*b == ']') sqstack++;
220 if (*b == '[') sqstack--;
221 }
222 if (sqstack != 0)
223 err("in '%s': invalid Newick format: unbalanced square brackets.",__func__);
224 else
225 b++;
226 break;
227 case '[':
228 err("in '%s': invalid Newick: unbalanced square brackets.",__func__);
229 break;
230 case ':':
231 if (open) {
232 bl = scan_branch_length(b.base(),e.base());
233 b++; e = b;
234 } else {
235 err("in '%s': invalid Newick format: misplaced colon.",__func__);
236 }
237 break;
238 default:
239 b++;
240 break;
241 }
242 }
243 if (stack != 0)
244 err("in '%s': invalid Newick format: unbalanced parentheses.",__func__);
245 if (open) {
246 q = scan_branch_label(b.base(),e.base(),p,bl);
247 tf = (q->slate > tf) ? q->slate : tf;
248 bl = 0.0;
249 }
250 time() = tf;
251 sort(); cap_tips(); clip_zlb(); weed();
252 return *this;
253}
254
255extern "C" {
256
259 SEXP parse_newick (SEXP X, SEXP T0, SEXP Tf) {
260 PROTECT(X = AS_CHARACTER(X));
261 PROTECT(T0 = AS_NUMERIC(T0));
262 PROTECT(Tf = AS_NUMERIC(Tf));
263 double t0 = *REAL(T0);
264 double tf = *REAL(Tf);
265 // parse the Newick representation into a genealogy:
266 string_t x = CHAR(STRING_ELT(X,0));
267 genealogy_t G(t0);
268 G.parse(x);
269 if (!ISNA(tf)) {
270 G.curtail(tf,t0);
271 }
272 G.trace_lineages();
273 UNPROTECT(3);
274 return serial(G);
275 }
276
277}
color_t
BALL COLORS.
Definition ball.h:12
@ green
Definition ball.h:12
@ black
Definition ball.h:12
@ blue
Definition ball.h:12
Balls function as pointers.
Definition ball.h:27
Encodes a genealogy.
Definition genealogy.h:19
size_t ndeme(void) const
number of demes
Definition genealogy.h:59
void cap_tips(void)
Definition parse.cc:9
void clip_zlb(void)
clip out all zero-length branches
Definition parse.cc:22
slate_t & timezero(void)
view/set zero time.
Definition genealogy.h:153
slate_t & time(void)
view/set current time.
Definition genealogy.h:145
genealogy_t & parse(const string_t &s)
Parse a Newick string and create the indicated genealogy.
Definition parse.cc:154
node_t * make_node(name_t d=undeme)
Definition genealogy.h:229
void curtail(slate_t tnew, slate_t troot)
Definition curtail.cc:12
node_t * scan_branch_label(string_t::const_iterator b, string_t::const_iterator e, node_t *p, slate_t bl)
Definition parse.cc:125
Encodes a genealogical node.
Definition node.h:21
node_t * parent(void) const
Definition node.h:115
name_t deme(void) const
view deme
Definition node.h:96
name_t uniq
Definition node.h:32
ball_t * green_ball(void) const
pointer to my green ball
Definition node.h:88
void insert(ball_t *a)
insert a ball into the pocket of a node
Definition node.h:156
slate_t slate
Definition node.h:33
bool holds_own(void) const
Definition node.h:118
void weed(void)
drop all dead roots
Definition nodeseq.h:203
void detach(node_t *p)
Definition nodeseq.h:168
void sort(void)
order nodes in order of increasing time
Definition nodeseq.h:103
void attach(node_t *p, node_t *q)
Definition nodeseq.h:163
void swap(ball_t *a, ball_t *b)
swap balls a and b, wherever they lie
Definition nodeseq.h:153
void trace_lineages(void)
Definition nodeseq.h:242
ball_t * last_ball(void) const
retrieve the last ball
Definition pocket.h:126
bool holds(ball_t *b) const
does this node hold the given ball?
Definition pocket.h:109
SEXP serial(const TYPE &X)
binary serialization
Definition generics.h:33
size_t name_t
Definition internal.h:54
#define warn(...)
Definition internal.h:19
#define err(...)
Definition internal.h:18
double slate_t
Definition internal.h:53
static const int deme
Definition lbdp.cc:7
static color_t scan_color(const std::string &s)
simple function for scanning the color
Definition parse.cc:68
static slate_t scan_branch_length(string_t::const_iterator b, string_t::const_iterator e)
Scan the branch length.
Definition parse.cc:91
static name_t scan_name(const string_t &s)
Definition parse.cc:42
SEXP parse_newick(SEXP X, SEXP T0, SEXP Tf)
Definition parse.cc:259