phylopomp
Phylodynamics for POMPs
Loading...
Searching...
No Matches
parse.cc
Go to the documentation of this file.
1#include "genealogy.h"
2#include "generics.h"
3#include "internal.h"
4#include <regex>
5#include <unordered_map>
6
7void
9(void)
10{
11 for (node_t *p : *this) {
12 if (p->empty()) {
13 ball_t *b = new ball_t(p,p->uniq,blue,p->deme());
14 p->insert(b);
15 } else if (p->holds(black))
16 swap(p->last_ball(),p->green_ball());
17 }
18}
19
22static
25(const string_t& s)
26{
27 double bl;
28 try {
29 bl = (s.empty()) ? 0.0 : stod(s);
30 }
31 catch (const std::invalid_argument& e) {
32 err("in '%s': invalid Newick format: branch length should be a non-negative decimal number.",__func__);
33 }
34 catch (const std::out_of_range& e) {
35 err("in '%s': invalid Newick format: branch length out of range.",__func__);
36 }
37 catch (const std::exception& e) {
38 err("in '%s': parsing branch-length: %s.",__func__,e.what());
39 }
40 catch (...) {
41 err("in '%s': other branch-length parsing error.",__func__);
42 }
43 if (bl < 0.0) err("in '%s': negative branch length detected.",__func__);
44 return slate_t(bl);
45}
46
49static
52(const string_t& s)
53{
54 int d;
55 try {
56 d = stoi(s);
57 if (d < 0) err("in '%s': negative deme number detected.",__func__);
58 }
59 catch (const std::invalid_argument& e) {
60 err("in '%s': invalid Newick format: deme should be indicated with an integer.",__func__);
61 }
62 catch (const std::out_of_range& e) {
63 err("in '%s': invalid Newick format: deme out of range.",__func__);
64 }
65 catch (const std::exception& e) {
66 err("in '%s': parsing deme label: %s.",__func__,e.what());
67 }
68 catch (...) {
69 err("in '%s': other deme-parsing error.",__func__);
70 }
71 return name_t(d);
72}
73
75static
78(const std::string& s)
79{
80 std::string copy(s);
81 const std::unordered_map<string_t,color_t> options({
82 {"sample",blue},{"extant",black},{"migration",green},
83 {"node",green},{"branch",green},{"root",green}
84 });
85 color_t col = green;
86 std::transform(copy.begin(),copy.end(),copy.begin(),
87 [](unsigned char c){return std::tolower(c);});
88 try {
89 col = options.at(copy);
90 }
91 catch (const std::out_of_range& e) {
92 err("in %s: invalid metadata: type '%s' not recognized.",__func__,s.c_str());
93 }
94 return col;
95}
96
99node_t*
101(string_t::const_iterator b,
102 string_t::const_iterator e)
103{
104 name_t deme = 0;
105 color_t col = green;
106 slate_t bl = 0;
107 if (b != e) {
108 std::smatch m;
109 if (std::regex_match(b,e,m,std::regex("^.*?\\[&&PhyloPOMP.+?deme=(\\w+).*?\\].*$")))
110 deme = scan_name(m[1].str());
111 if (std::regex_match(b,e,m,std::regex("^.*?\\[&&PhyloPOMP.+?type=(\\w+).*?\\].*$")))
112 col = scan_color(m[1].str());
113 if (std::regex_match(b,e,m,std::regex("^.*:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)$")))
114 bl = scan_slate(m[1].str());
115 else
116 warn("in '%s': in branch-string '%s': no branch-length detected: assuming zero branch length.",
117 __func__,string_t(b,e).c_str());
118 }
119 node_t *q = make_node(deme);
120 if (col != green) {
121 ball_t *b = new ball_t(q,q->uniq,col,deme);
122 q->insert(b);
123 }
124 q->slate = bl;
125 return q;
126}
127
131(const string_t& s)
132{
133 node_t *p = 0, *q;
134 slate_t tf = timezero();
135 string_t::const_reverse_iterator b = s.crbegin(), e = b;
136 bool open = false; // branch-string reading-frame open?
137 int stack = 0, sqstack = 0;
138 if (!s.empty() && *b != ';')
139 err("in '%s': invalid Newick format: no final semicolon.",__func__);
140 while (b != s.crend()) {
141 switch (*b) {
142 case ';': // root
143 if (stack != 0)
144 err("in '%s': invalid Newick: unbalanced parentheses.",__func__);
145 p = make_node();
146 p->slate = timezero();
147 push_front(p);
148 b++;
149 e = b;
150 open = true;
151 break;
152 case ')': // internal node
153 if (open) {
154 q = scan_branch(b.base(),e.base());
155 if (q->holds(black))
156 err("in '%s': 'type=extant' on internal node.",__func__);
157 q->slate += p->slate;
158 attach(p,q);
159 push_back(q);
160 tf = (q->slate > tf) ? q->slate : tf;
161 ndeme() = (ndeme() > q->deme()) ? ndeme() : q->deme();
162 p = q;
163 } else {
164 err("in '%s': invalid Newick: missing comma or semicolon.",__func__);
165 }
166 stack++;
167 b++;
168 e = b;
169 open = true;
170 break;
171 case '(': // tip node, eldest sister
172 if (open) {
173 q = scan_branch(b.base(),e.base());
174 q->slate += p->slate;
175 attach(p,q);
176 push_back(q);
177 tf = (q->slate > tf) ? q->slate : tf;
178 ndeme() = (ndeme() > q->deme()) ? ndeme() : q->deme();
179 }
180 p = p->parent();
181 b++;
182 stack--;
183 e = b;
184 open = false;
185 break;
186 case ',': // tip node, younger sister
187 if (stack <= 0)
188 err("in '%s': invalid Newick string: misplaced comma or unbalanced parentheses.",__func__);
189 if (open) {
190 q = scan_branch(b.base(),e.base());
191 q->slate += p->slate;
192 attach(p,q);
193 push_back(q);
194 tf = (q->slate > tf) ? q->slate : tf;
195 ndeme() = (ndeme() > q->deme()) ? ndeme() : q->deme();
196 }
197 b++;
198 e = b;
199 open = true;
200 break;
201 case ']': // skip metadata
202 sqstack++;
203 while (b != s.crend() && sqstack > 0) {
204 b++;
205 if (*b == ']') sqstack++;
206 if (*b == '[') sqstack--;
207 }
208 if (sqstack != 0)
209 err("in '%s': invalid Newick format: unbalanced square brackets.",__func__);
210 if (b != s.crend()) b++;
211 break;
212 default:
213 b++;
214 break;
215 }
216 }
217 if (stack != 0)
218 err("in '%s': invalid Newick format: unbalanced parentheses.",__func__);
219 if (open) {
220 q = scan_branch(b.base(),e.base());
221 q->slate += p->slate;
222 attach(p,q);
223 push_back(q);
224 tf = (q->slate > tf) ? q->slate : tf;
225 ndeme() = (ndeme() > q->deme()) ? ndeme() : q->deme();
226 }
227 time() = tf;
228 sort(); repair_tips(); drop_zlb(); weed();
229 return *this;
230}
231
232extern "C" {
233
236 SEXP parse_newick (SEXP X, SEXP T0, SEXP Tf) {
237 PROTECT(X = AS_CHARACTER(X));
238 PROTECT(T0 = AS_NUMERIC(T0));
239 PROTECT(Tf = AS_NUMERIC(Tf));
240 double t0 = *REAL(T0);
241 double tf = *REAL(Tf);
242 // parse the Newick representation into a genealogy:
243 string_t x = CHAR(STRING_ELT(X,0));
244 genealogy_t G(t0);
245 G.parse(x);
246 if (!ISNA(tf)) {
247 G.curtail(tf,t0);
248 }
249 G.trace_lineages();
250 UNPROTECT(3);
251 return serial(G);
252 }
253
254}
color_t
BALL COLORS.
Definition ball.h:12
@ green
Definition ball.h:12
@ black
Definition ball.h:12
@ blue
Definition ball.h:12
Balls function as pointers.
Definition ball.h:27
Encodes a genealogy.
Definition genealogy.h:19
void drop_zlb(void)
drop all zero-length branches
Definition genealogy.h:373
size_t ndeme(void) const
number of demes
Definition genealogy.h:59
slate_t & timezero(void)
view/set zero time.
Definition genealogy.h:153
slate_t & time(void)
view/set current time.
Definition genealogy.h:145
genealogy_t & parse(const string_t &s)
Parse a Newick string and create the indicated genealogy.
Definition parse.cc:131
node_t * make_node(name_t d=undeme)
Definition genealogy.h:229
void curtail(slate_t tnew, slate_t troot)
Definition curtail.cc:11
void repair_tips(void)
Definition parse.cc:9
node_t * scan_branch(string_t::const_iterator b, string_t::const_iterator e)
Definition parse.cc:101
Encodes a genealogical node.
Definition node.h:21
node_t * parent(void) const
Definition node.h:115
name_t deme(void) const
view deme
Definition node.h:96
name_t uniq
Definition node.h:32
ball_t * green_ball(void) const
pointer to my green ball
Definition node.h:88
void insert(ball_t *a)
insert a ball into the pocket of a node
Definition node.h:156
slate_t slate
Definition node.h:33
void weed(void)
drop all dead roots
Definition nodeseq.h:214
void sort(void)
order nodes in order of increasing time
Definition nodeseq.h:103
void attach(node_t *p, node_t *q)
Definition nodeseq.h:174
void swap(ball_t *a, ball_t *b)
swap balls a and b, wherever they lie
Definition nodeseq.h:164
void trace_lineages(void)
Definition nodeseq.h:253
ball_t * last_ball(void) const
retrieve the last ball
Definition pocket.h:126
bool holds(ball_t *b) const
does this node hold the given ball?
Definition pocket.h:109
SEXP serial(const TYPE &X)
binary serialization
Definition generics.h:33
size_t name_t
Definition internal.h:54
#define warn(...)
Definition internal.h:19
#define err(...)
Definition internal.h:18
double slate_t
Definition internal.h:53
static const int deme
Definition lbdp.cc:7
static color_t scan_color(const std::string &s)
simple function for scanning the color
Definition parse.cc:78
static name_t scan_name(const string_t &s)
Definition parse.cc:52
SEXP parse_newick(SEXP X, SEXP T0, SEXP Tf)
Definition parse.cc:236
static slate_t scan_slate(const string_t &s)
Definition parse.cc:25