PandA-2024.02
generate.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <assert.h>
9 #include <math.h>
10 #include "viterbi.h"
11 
12 // For testing, we can always choose the most likely probability instead of
13 // randomly sampling
14 //#define DETERMINISTIC_TRANSITION
15 //#define DETERMINISTIC_EMISSION
16 //#define DEBUG
17 
18 void dump_transition(prob_t transition[N_STATES*N_STATES],int log) {
19  state_t s0,s1;
20  prob_t p;
21  printf("transition\n");
22  for(s0=0;s0<N_STATES;s0++) {
23  printf("[ ");
24  p = 0.0;
25  for(s1=0;s1<N_STATES;s1++) {
26  p += expf(-transition[s0*N_STATES+s1]);
27  if(log)
28  printf("%4.2f,", transition[s0*N_STATES+s1]);
29  else
30  printf("%01.2f,", expf(-transition[s0*N_STATES+s1]));
31  }
32  printf("] sum=%4.2f\n", p);
33  }
34 }
35 
36 void dump_emission(prob_t emission[N_STATES*N_TOKENS], int log) {
37  state_t s;
38  tok_t o;
39  prob_t p;
40  printf("emission\n");
41  for(s=0;s<N_STATES;s++) {
42  printf("[ ");
43  p = 0.0;
44  for(o=0;o<N_TOKENS;o++) {
45  p += expf(-emission[s*N_TOKENS+o]);
46  if(log)
47  printf("%4.2f,", emission[s*N_TOKENS+o]);
48  else
49  printf("%01.2f,", expf(-emission[s*N_TOKENS+o]));
50  }
51  printf("] sum=%4.2f\n", p);
52  }
53 }
54 
55 void dump_init(prob_t init[N_STATES],int log) {
56  state_t s;
57  printf("init\n");
58  printf("[ ");
59  for(s=0;s<N_STATES;s++){
60  if(log)
61  printf("%4.2f,", init[s]);
62  else
63  printf("%01.2f,", expf(-init[s]));
64  }
65  printf("]\n");
66 }
67 
68 void dump_path(state_t path[N_OBS]){
69  step_t t;
70  printf("path\n[");
71  for(t=0; t<N_OBS; t++) {
72  printf("%d,",path[t]);
73  }
74  printf("]\n");
75 }
76 
77 void dump_obs(tok_t obs[N_OBS]){
78  step_t t;
79  printf("obs\n[");
80  for(t=0; t<N_OBS; t++) {
81  printf("%d,",obs[t]);
82  }
83  printf("]\n");
84 }
85 
86 int main(int argc, char **argv) {
87  struct bench_args_t data;
88  int fd;
89  state_t s0, s1, s[N_OBS];
90  tok_t o;
91  step_t t;
92  double P[N_STATES];
93  double Q[N_TOKENS];
94  struct prng_rand_t state;
95 #if !defined(DETERMINISTIC_TRANSITION) || !defined(DETERMINISTIC_EMISSION)
96  prob_t r;
97 #endif
98 #if defined(DETERMINISTIC_TRANSITION) || defined(DETERMINISTIC_TRANSITION)
99  prob_t min;
100  tok_t omin;
101 #endif
102 
103  // Fill data structure
104  prng_srand(1,&state);
105 
106  // Generate a random transition matrix P(S1|S0)
107  // Invariant: SUM_S1 P(S1|S0) = 1
108  for(s0=0; s0<N_STATES; s0++) {
109  // Generate random weights
110  double sum = 0;
111  for(s1=0; s1<N_STATES; s1++) {
112  P[s1] = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
113 #ifndef DETERMINISTIC_TRANSITION
114  if(s1==s0) P[s1] = N_STATES; // self-transitions are much more likely
115 #else
116  if(s1==s0) P[s1] = 0.5/N_STATES; // self-transitions are less likely (otherwise we'd never change states)
117 #endif
118  sum += P[s1];
119  }
120  // Normalize and convert to -log domain
121  for(s1=0; s1<N_STATES; s1++) {
122  data.transition[s0*N_STATES+s1] = -1*logf(P[s1]/sum);
123  }
124  }
125 
126  // Generate a random emission matrix P(O|S)
127  // Invariant: SUM_O P(O|S) = 1
128  for(s0=0; s0<N_STATES; s0++) {
129  // Generate random weights
130  double sum = 0;
131  for(o=0; o<N_TOKENS; o++) {
132  Q[o] = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
133  if( o==s0 ) Q[o] = N_TOKENS; // one token is much more likely
134  sum += Q[o];
135  }
136  // Normalize and convert to -log domain
137  for(o=0; o<N_TOKENS; o++) {
138  data.emission[s0*N_TOKENS+o] = -1*logf(Q[o]/sum);
139  }
140  }
141 
142  // Generate a random starting distribution P(S_0)
143  // Invariant: SUM P(S_0) = 1
144  {
145  // Generate random weights
146  double sum = 0;
147  for(s0=0; s0<N_STATES; s0++) {
148  P[s0] = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
149  sum += P[s0];
150  }
151  // Normalize and convert to -log domain
152  for(s0=0; s0<N_STATES; s0++) {
153  data.init[s0] = -1*logf(P[s0]/sum);
154  }
155  }
156 
157  #ifdef DEBUG
158  dump_init(data.init,0);
159  dump_transition(data.transition,0);
160  dump_emission(data.emission,0);
161  #endif
162 
163  // To get observations, just run the HMM forwards N_OBS steps;
164  // Nondeterministic sampling uses the inverse transform method
165 
166  // Sample s_0 from init
167 #ifndef DETERMINISTIC_TRANSITION
168  r = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
169  s[0]=0; do{r-=expf(-data.init[s[0]]);} while(r>0&&(++s[0]));
170 #else
171  s[0]=0;
172  min=data.init[0];
173  for(s0=1;s0<N_STATES;s0++) {
174  if(data.init[s0]<min) {
175  min=data.init[s0];
176  s[0]=s0;
177  }
178  }
179 #endif
180  // Sample o_0 from emission
181 #ifndef DETERMINISTIC_EMISSION
182  r = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
183  for( o=0; r>0; ++o ) {r-=expf(-data.emission[s[0]*N_TOKENS+o]);}
184  o=0; do{ r-=expf(-data.emission[s[0]*N_TOKENS+o]); }while(r>0 && ++o);
185  data.obs[0] = o;
186 #else
187  omin=0;
188  min=data.emission[s[0]*N_TOKENS+0];
189  for(o=1;o<N_TOKENS;o++) {
190  if(data.emission[s[0]*N_TOKENS+o]<min) {
191  min=data.emission[s[0]*N_TOKENS+o];
192  omin=o;
193  }
194  }
195  data.obs[0] = omin;
196 #endif
197 
198  for(t=1; t<N_OBS; t++) {
199  // Sample s_t from transition
200 #ifndef DETERMINISTIC_TRANSITION
201  r = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
202  s[t]=0; do{r-=expf(-data.transition[s[t-1]*N_STATES+s[t]]);} while(r>0&&++s[t]);
203 #else
204  s[t]=0;
205  min=data.transition[s[t-1]*N_STATES];
206  for(s0=1;s0<N_STATES;s0++) {
207  if(data.transition[s[t-1]*N_STATES+s0]<min) {
208  min=data.transition[s[t-1]*N_STATES+s0];
209  s[t]=s0;
210  }
211  }
212 #endif
213  // Sample o_t from emission
214 #ifndef DETERMINISTIC_EMISSION
215  r = ((double)prng_rand(&state)/(double)PRNG_RAND_MAX);
216  o=0; do{ r-=expf(-data.emission[s[t]*N_TOKENS+o]); }while(r>0 && ++o);
217  data.obs[t] = o;
218 #else
219  omin=0;
220  min=data.emission[s[t]*N_TOKENS+0];
221  for(o=1;o<N_TOKENS;o++) {
222  if(data.emission[s[t]*N_TOKENS+o]<min) {
223  min=data.emission[s[t]*N_TOKENS+o];
224  omin=o;
225  }
226  }
227  data.obs[t] = omin;
228 #endif
229  }
230 
231  #ifdef DEBUG
232  dump_path(s);
233  dump_obs(data.obs);
234  #endif
235 
236  // Open and write
237  fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
238  assert( fd>0 && "Couldn't open input data file" );
239  data_to_input(fd, (void*)(&data));
240 
241  return 0;
242 }
uint8_t tok_t
Definition: viterbi.h:13
static uint64_t prng_rand(struct prng_rand_t *state)
Definition: support.h:85
void dump_init(prob_t init[N_STATES], int log)
Definition: generate.c:55
TYPE prob_t
Definition: viterbi.h:14
void data_to_input(int fd, void *vdata)
Definition: local_support.c:34
#define min(x, y)
int32_t step_t
Definition: viterbi.h:16
void dump_obs(tok_t obs[N_OBS])
Definition: generate.c:77
void dump_emission(prob_t emission[N_STATES *N_TOKENS], int log)
Definition: generate.c:36
int sum
Definition: dotproduct.h:3
const size_t P
Definition: helm.c:5
prob_t transition[N_STATES *N_STATES]
Definition: viterbi.h:33
prob_t emission[N_STATES *N_TOKENS]
Definition: viterbi.h:34
void dump_path(state_t path[N_OBS])
Definition: generate.c:68
void dump_transition(prob_t transition[N_STATES *N_STATES], int log)
Definition: generate.c:18
void init(int bucket[BUCKETSIZE])
Definition: sort.c:42
prob_t init[N_STATES]
Definition: viterbi.h:32
uint64_t s[RAND_SSIZE]
Definition: support.h:77
int main(int argc, char **argv)
Definition: generate.c:12
#define N_OBS
Definition: viterbi.h:22
uint8_t state_t
Definition: viterbi.h:15
static void prng_srand(uint64_t seed, struct prng_rand_t *state)
Definition: support.h:106
tok_t obs[N_OBS]
Definition: viterbi.h:31
#define N_STATES
Definition: viterbi.h:21
#define N_TOKENS
Definition: viterbi.h:23
#define PRNG_RAND_MAX
Definition: support.h:82

Generated on Mon Feb 12 2024 13:02:49 for PandA-2024.02 by doxygen 1.8.13