From sumomo @ users.sourceforge.jp Fri Dec 5 15:45:36 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Fri, 05 Dec 2008 15:45:36 +0900 Subject: [Julius-cvs 349] CVS update: julius4/gramtools/mkdfa/mkfa-1.44-flex Message-ID: <1228459536.344620.3857.nullmailer@users.sourceforge.jp> Index: julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c diff -u julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c:1.2 julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c:1.3 --- julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c:1.2 Tue Dec 18 17:45:47 2007 +++ julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c Fri Dec 5 15:45:36 2008 @@ -8,7 +8,7 @@ /* A lexical scanner generated by flex */ /* Scanner skeleton version: - * $Header: /cvsroot/julius/julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c,v 1.2 2007/12/18 08:45:47 sumomo Exp $ + * $Header: /cvsroot/julius/julius4/gramtools/mkdfa/mkfa-1.44-flex/lex.yy.c,v 1.3 2008/12/05 06:45:36 sumomo Exp $ */ #define FLEX_SCANNER @@ -512,7 +512,16 @@ errno=0; \ clearerr(yyin); \ } \ - } + int n; \ + for(n = 0; n < result; n++) { \ + if (buf[n] == '\n') { \ + if (n >= 1 && buf[n-1] == '\r') { \ + buf[n-1] = '\n'; \ + } \ + } \ + } \ + } + #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - From sumomo @ users.sourceforge.jp Fri Dec 12 16:14:57 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Fri, 12 Dec 2008 16:14:57 +0900 Subject: [Julius-cvs 350] CVS update: julius4/julius Message-ID: <1229066097.529138.25253.nullmailer@users.sourceforge.jp> Index: julius4/julius/output_file.c diff -u julius4/julius/output_file.c:1.1 julius4/julius/output_file.c:1.2 --- julius4/julius/output_file.c:1.1 Tue Dec 18 17:45:48 2007 +++ julius4/julius/output_file.c Fri Dec 12 16:14:57 2008 @@ -12,7 +12,7 @@ * @author Akinobu Lee * @date Wed Dec 12 11:07:46 2007 * - * $Revision: 1.1 $ + * $Revision: 1.2 $ * */ /* @@ -77,6 +77,8 @@ int i, j; boolean multi; static char phbuf[MAX_HMMNAME_LEN]; + SentenceAlign *align; + HMM_Logical *p; if (recog->process_list->next != NULL) multi = TRUE; else multi = FALSE; @@ -157,21 +159,26 @@ } } /* output alignment result if exist */ - if (s->align.filled) { - HMM_Logical *p; - int i; - + for (align = s->align; align; align = align->next) { fprintf(fp, "=== begin forced alignment ===\n"); + switch(align->unittype) { + case PER_WORD: + fprintf(fp, "-- word alignment --\n"); break; + case PER_PHONEME: + fprintf(fp, "-- phoneme alignment --\n"); break; + case PER_STATE: + fprintf(fp, "-- state alignment --\n"); break; + } fprintf(fp, " id: from to n_score unit\n"); fprintf(fp, " ----------------------------------------\n"); - for(i=0;ialign.num;i++) { - fprintf(fp, "[%4d %4d] %f ", s->align.begin_frame[i], s->align.end_frame[i], s->align.avgscore[i]); - switch(s->align.unittype) { + for(i=0;inum;i++) { + fprintf(fp, "[%4d %4d] %f ", align->begin_frame[i], align->end_frame[i], align->avgscore[i]); + switch(align->unittype) { case PER_WORD: - fprintf(fp, "%s\t[%s]\n", winfo->wname[s->align.w[i]], winfo->woutput[s->align.w[i]]); + fprintf(fp, "%s\t[%s]\n", winfo->wname[align->w[i]], winfo->woutput[align->w[i]]); break; case PER_PHONEME: - p = s->align.ph[i]; + p = align->ph[i]; if (p->is_pseudo) { fprintf(fp, "{%s}\n", p->name); } else if (strmatch(p->name, p->body.defined->name)) { @@ -181,7 +188,7 @@ } break; case PER_STATE: - p = s->align.ph[i]; + p = align->ph[i]; if (p->is_pseudo) { fprintf(fp, "{%s}", p->name); } else if (strmatch(p->name, p->body.defined->name)) { @@ -190,19 +197,19 @@ fprintf(fp, "%s[%s]", p->name, p->body.defined->name); } if (r->am->hmminfo->multipath) { - if (s->align.is_iwsp[i]) { - fprintf(fp, " #%d (sp)\n", s->align.loc[i]); + if (align->is_iwsp[i]) { + fprintf(fp, " #%d (sp)\n", align->loc[i]); } else { - fprintf(fp, " #%d\n", s->align.loc[i]); + fprintf(fp, " #%d\n", align->loc[i]); } } else { - fprintf(fp, " #%d\n", s->align.loc[i]); + fprintf(fp, " #%d\n", align->loc[i]); } break; } } - - fprintf(fp, "re-computed AM score: %f\n", s->align.allscore); + + fprintf(fp, "re-computed AM score: %f\n", align->allscore); fprintf(fp, "=== end forced alignment ===\n"); } Index: julius4/julius/output_stdout.c diff -u julius4/julius/output_stdout.c:1.6 julius4/julius/output_stdout.c:1.7 --- julius4/julius/output_stdout.c:1.6 Tue Oct 21 02:40:23 2008 +++ julius4/julius/output_stdout.c Fri Dec 12 16:14:57 2008 @@ -12,7 +12,7 @@ * @author Akinobu Lee * @date Tue Sep 06 17:18:46 2005 * - * $Revision: 1.6 $ + * $Revision: 1.7 $ * */ /* @@ -756,6 +756,8 @@ Sentence *s; RecogProcess *r; boolean multi; + HMM_Logical *p; + SentenceAlign *align; if (recog->process_list->next != NULL) multi = TRUE; else multi = FALSE; @@ -919,21 +921,26 @@ } /* output alignment result if exist */ - if (s->align.filled) { - HMM_Logical *p; - int i; - + for (align = s->align; align; align = align->next) { printf("=== begin forced alignment ===\n"); + switch(align->unittype) { + case PER_WORD: + printf("-- word alignment --\n"); break; + case PER_PHONEME: + printf("-- phoneme alignment --\n"); break; + case PER_STATE: + printf("-- state alignment --\n"); break; + } printf(" id: from to n_score unit\n"); printf(" ----------------------------------------\n"); - for(i=0;ialign.num;i++) { - printf("[%4d %4d] %f ", s->align.begin_frame[i], s->align.end_frame[i], s->align.avgscore[i]); - switch(s->align.unittype) { + for(i=0;inum;i++) { + printf("[%4d %4d] %f ", align->begin_frame[i], align->end_frame[i], align->avgscore[i]); + switch(align->unittype) { case PER_WORD: - myprintf("%s\t[%s]\n", winfo->wname[s->align.w[i]], winfo->woutput[s->align.w[i]]); + myprintf("%s\t[%s]\n", winfo->wname[align->w[i]], winfo->woutput[align->w[i]]); break; case PER_PHONEME: - p = s->align.ph[i]; + p = align->ph[i]; if (p->is_pseudo) { printf("{%s}\n", p->name); } else if (strmatch(p->name, p->body.defined->name)) { @@ -943,7 +950,7 @@ } break; case PER_STATE: - p = s->align.ph[i]; + p = align->ph[i]; if (p->is_pseudo) { printf("{%s}", p->name); } else if (strmatch(p->name, p->body.defined->name)) { @@ -952,20 +959,20 @@ printf("%s[%s]", p->name, p->body.defined->name); } if (r->am->hmminfo->multipath) { - if (s->align.is_iwsp[i]) { - printf(" #%d (sp)\n", s->align.loc[i]); + if (align->is_iwsp[i]) { + printf(" #%d (sp)\n", align->loc[i]); } else { - printf(" #%d\n", s->align.loc[i]); + printf(" #%d\n", align->loc[i]); } } else { - printf(" #%d\n", s->align.loc[i]); + printf(" #%d\n", align->loc[i]); } break; } } - printf("re-computed AM score: %f\n", s->align.allscore); - + printf("re-computed AM score: %f\n", align->allscore); + printf("=== end forced alignment ===\n"); } } From sumomo @ users.sourceforge.jp Fri Dec 12 16:14:57 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Fri, 12 Dec 2008 16:14:57 +0900 Subject: [Julius-cvs 351] CVS update: julius4/libjulius/include/julius Message-ID: <1229066097.687359.25263.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/include/julius/extern.h diff -u julius4/libjulius/include/julius/extern.h:1.10 julius4/libjulius/include/julius/extern.h:1.11 --- julius4/libjulius/include/julius/extern.h:1.10 Sun Nov 16 21:28:03 2008 +++ julius4/libjulius/include/julius/extern.h Fri Dec 12 16:14:57 2008 @@ -12,7 +12,7 @@ * @author Akinobu LEE * @date Mon Mar 7 23:19:14 2005 * - * $Revision: 1.10 $ + * $Revision: 1.11 $ * */ /* @@ -147,12 +147,12 @@ int mfcc_go(Recog *recog, int (*ad_check)(Recog *)); /* word_align.c */ -void word_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); -void phoneme_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); -void state_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); -void word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); -void phoneme_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); -void state_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r); +void word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); +void phoneme_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); +void state_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); +void word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); +void phoneme_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); +void state_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void do_alignment_all(RecogProcess *r, HTK_Param *param); /* m_usage.c */ @@ -290,6 +290,8 @@ /* recogmain.c */ int adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog); +SentenceAlign *result_align_new(); +void result_align_free(SentenceAlign *a); void result_sentence_malloc(RecogProcess *r, int num); void result_sentence_free(RecogProcess *r); void clear_result(RecogProcess *r); Index: julius4/libjulius/include/julius/recog.h diff -u julius4/libjulius/include/julius/recog.h:1.5 julius4/libjulius/include/julius/recog.h:1.6 --- julius4/libjulius/include/julius/recog.h:1.5 Fri Nov 14 04:44:09 2008 +++ julius4/libjulius/include/julius/recog.h Fri Dec 12 16:14:57 2008 @@ -70,7 +70,7 @@ * @author Akinobu Lee * @date Fri Feb 16 13:42:28 2007 * - * $Revision: 1.5 $ + * $Revision: 1.6 $ * */ /* @@ -294,6 +294,24 @@ } GMMCalc; /** + * Alignment result, valid when forced alignment was done + * + */ +typedef struct __sentence_align__ { + int num; ///< Number of units + short unittype; ///< Unit type (one of PER_*) + WORD_ID *w; ///< word sequence by id (PER_WORD) + HMM_Logical **ph; ///< Phone sequence (PER_PHONEME, PER_STATE) + short *loc; ///< sequence of state location in a phone (PER_STATE) + boolean *is_iwsp; ///< TRUE if PER_STATE and this is the inter-word pause state at multipath mode + int *begin_frame; ///< List of beginning frame + int *end_frame; ///< List of ending frame + LOGPROB *avgscore; ///< Score averaged by frames + LOGPROB allscore; ///< Re-computed acoustic score + struct __sentence_align__ *next; ///< data chain pointer +} SentenceAlign; + +/** * Output result structure * */ @@ -305,27 +323,7 @@ LOGPROB score_lm; ///< Language model likelihood (scaled) for N-gram LOGPROB score_am; ///< Acoustic model likelihood for N-gram int gram_id; ///< The grammar ID this sentence belongs to for DFA - - /** - * Alignment result, valid when forced alignment was done - * - */ - struct { - boolean filled; ///< True if has data - int num; ///< Number of units - short unittype; ///< Unit type (one of PER_*) - - WORD_ID *w; ///< word sequence by id (PER_WORD) - HMM_Logical **ph; ///< Phone sequence (PER_PHONEME, PER_STATE) - short *loc; ///< sequence of state location in a phone (PER_STATE) - boolean *is_iwsp; ///< TRUE if PER_STATE and this is the inter-word pause state at multipath mode - - int *begin_frame; ///< List of beginning frame - int *end_frame; ///< List of ending frame - LOGPROB *avgscore; ///< Score averaged by frames - - LOGPROB allscore; ///< Re-computed acoustic score - } align; + SentenceAlign *align; } Sentence; From sumomo @ users.sourceforge.jp Fri Dec 12 16:14:58 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Fri, 12 Dec 2008 16:14:58 +0900 Subject: [Julius-cvs 352] CVS update: julius4/libjulius/src Message-ID: <1229066098.016552.25277.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/src/beam.c diff -u julius4/libjulius/src/beam.c:1.10 julius4/libjulius/src/beam.c:1.11 --- julius4/libjulius/src/beam.c:1.10 Fri Nov 14 13:12:12 2008 +++ julius4/libjulius/src/beam.c Fri Dec 12 16:14:57 2008 @@ -42,7 +42,7 @@ * @author Akinobu LEE * @date Tue Feb 22 17:00:45 2005 * - * $Revision: 1.10 $ + * $Revision: 1.11 $ * */ /* @@ -583,6 +583,7 @@ LOGPROB maxscore; int i; TRELLIS_ATOM **idx; + int num; if (r->lmvar != LM_DFA_WORD) return; @@ -629,13 +630,14 @@ /* more than one candidate is requested */ /* get actual number of candidates to output */ - r->result.sentnum = r->config->output.output_hypo_maxnum; - if (r->result.sentnum > bt->num[last_time]) { - r->result.sentnum = bt->num[last_time]; + num = r->config->output.output_hypo_maxnum; + if (num > bt->num[last_time]) { + num = bt->num[last_time]; } /* prepare result storage */ - r->result.sent = (Sentence *)mymalloc(sizeof(Sentence)* r->result.sentnum); + result_sentence_malloc(r, num); + r->result.sentnum = num; /* sort by score */ idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]); @@ -662,7 +664,6 @@ } else { s->gram_id = 0; } - s->align.filled = FALSE; } /* free work area for sort */ free(idx); @@ -670,7 +671,7 @@ } else { /* only max is needed */ /* prepare result storage */ - r->result.sent = (Sentence *)mymalloc(sizeof(Sentence)); + result_sentence_malloc(r, 1); r->result.sentnum = 1; s = &(r->result.sent[0]); s->word_num = 1; @@ -686,11 +687,11 @@ } else { s->gram_id = 0; } - s->align.filled = FALSE; } /* copy as 1st pass result */ memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence)); + r->result.pass1.align = NULL; //callback_exec(CALLBACK_RESULT, r); //free(r->result.sent); Index: julius4/libjulius/src/jfunc.c diff -u julius4/libjulius/src/jfunc.c:1.4 julius4/libjulius/src/jfunc.c:1.5 --- julius4/libjulius/src/jfunc.c:1.4 Thu Sep 25 14:00:06 2008 +++ julius4/libjulius/src/jfunc.c Fri Dec 12 16:14:57 2008 @@ -19,7 +19,7 @@ * @author Akinobu Lee * @date Wed Aug 8 15:04:28 2007 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -1484,8 +1484,10 @@ LOGPROB max_score; PROCESS_AM *am; MFCCCalc *mfcc; + SentenceAlign *align; s = &(r->result.sent[0]); + align = result_align_new(); max_score = LOG_ZERO; @@ -1507,10 +1509,10 @@ return; } outprob_prepare(&(r->am->hmmwrk), mfcc->param->samplenum); - word_align(s->word, s->word_num, mfcc->param, s, r); - printf("%f: %f\n", alpha, s->align.allscore); - if (max_score < s->align.allscore) { - max_score = s->align.allscore; + word_align(s->word, s->word_num, mfcc->param, align, r); + printf("%f: %f\n", alpha, align->allscore); + if (max_score < align->allscore) { + max_score = align->allscore; max_alpha = alpha; } } @@ -1523,6 +1525,8 @@ printf("------------ end VTLN -------------\n"); + result_align_free(align); + } #endif Index: julius4/libjulius/src/recogmain.c diff -u julius4/libjulius/src/recogmain.c:1.9 julius4/libjulius/src/recogmain.c:1.10 --- julius4/libjulius/src/recogmain.c:1.9 Tue Nov 18 17:46:59 2008 +++ julius4/libjulius/src/recogmain.c Fri Dec 12 16:14:57 2008 @@ -12,7 +12,7 @@ * @author Akinobu Lee * @date Wed Aug 8 14:53:53 2007 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ * */ @@ -192,6 +192,67 @@ /** * + * allocate storage of recognition alignment results. + * + * @return the new pointer + * + * + * アラインメント結果の格納場所を確保 + * + * @return 確保された領域へのポインタ + * + * + * @callgraph + * @callergraph + * + */ +SentenceAlign * +result_align_new() +{ + SentenceAlign *new; + new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign)); + new->w = NULL; + new->ph = NULL; + new->loc = NULL; + new->begin_frame = NULL; + new->end_frame = NULL; + new->avgscore = NULL; + new->is_iwsp = NULL; + new->next = NULL; + return new; +} + +/** + * + * free storage of recognition alignment results. + * + * @param a [i/o] alignment data to be released + * + * + * アラインメント結果の格納場所を確保 + * + * @param a [i/o] 解放されるアラインメントデータ + * + * + * @callgraph + * @callergraph + * + */ +void +result_align_free(SentenceAlign *a) +{ + if (a->w) free(a->w); + if (a->ph) free(a->ph); + if (a->loc) free(a->loc); + if (a->begin_frame) free(a->begin_frame); + if (a->end_frame) free(a->end_frame); + if (a->avgscore) free(a->avgscore); + if (a->is_iwsp) free(a->is_iwsp); + free(a); +} + +/** + * * Allocate storage of recognition results. * * @@ -210,16 +271,7 @@ { int i; r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num); - for(i=0;iresult.sent[i].align.filled = FALSE; - r->result.sent[i].align.w = NULL; - r->result.sent[i].align.ph = NULL; - r->result.sent[i].align.loc = NULL; - r->result.sent[i].align.begin_frame = NULL; - r->result.sent[i].align.end_frame = NULL; - r->result.sent[i].align.avgscore = NULL; - r->result.sent[i].align.is_iwsp = NULL; - } + for(i=0;iresult.sent[i].align = NULL; r->result.sentnum = 0; } @@ -240,15 +292,15 @@ result_sentence_free(RecogProcess *r) { int i; + SentenceAlign *a, *atmp; if (r->result.sent) { for(i=0;iresult.sentnum;i++) { - if (r->result.sent[i].align.w) free(r->result.sent[i].align.w); - if (r->result.sent[i].align.ph) free(r->result.sent[i].align.ph); - if (r->result.sent[i].align.loc) free(r->result.sent[i].align.loc); - if (r->result.sent[i].align.begin_frame) free(r->result.sent[i].align.begin_frame); - if (r->result.sent[i].align.end_frame) free(r->result.sent[i].align.end_frame); - if (r->result.sent[i].align.avgscore) free(r->result.sent[i].align.avgscore); - if (r->result.sent[i].align.is_iwsp) free(r->result.sent[i].align.is_iwsp); + a = r->result.sent[i].align; + while(a) { + atmp = a->next; + result_align_free(a); + a = atmp; + } } free(r->result.sent); r->result.sent = NULL; Index: julius4/libjulius/src/word_align.c diff -u julius4/libjulius/src/word_align.c:1.4 julius4/libjulius/src/word_align.c:1.5 --- julius4/libjulius/src/word_align.c:1.4 Sun Nov 16 21:28:04 2008 +++ julius4/libjulius/src/word_align.c Fri Dec 12 16:14:57 2008 @@ -33,7 +33,7 @@ * @author Akinobu Lee * @date Sat Sep 24 16:09:46 2005 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * */ /* @@ -170,7 +170,7 @@ * @param wnum [in] @a words の長さ * @param param [in] 入力特徴パラメータ列 * @param per_what [in] 単語・音素・状態のどの単位でアラインメントを取るかを指定 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * @@ -185,7 +185,7 @@ * */ static void -do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, Sentence *s, RecogProcess *r) +do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r) { HMM_Logical **phones; /* phoneme sequence */ boolean *has_sp; /* whether phone can follow short pause */ @@ -277,53 +277,51 @@ allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen); /* store result to s */ - s->align.num = rlen; - s->align.unittype = per_what; - s->align.begin_frame = (int *)mymalloc(sizeof(int) * rlen); - s->align.end_frame = (int *)mymalloc(sizeof(int) * rlen); - s->align.avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); + align->num = rlen; + align->unittype = per_what; + align->begin_frame = (int *)mymalloc(sizeof(int) * rlen); + align->end_frame = (int *)mymalloc(sizeof(int) * rlen); + align->avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); for(i=0;ialign.begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; - s->align.end_frame[i] = end_frame[i]; - s->align.avgscore[i] = end_score[i]; + align->begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; + align->end_frame[i] = end_frame[i]; + align->avgscore[i] = end_score[i]; } switch(per_what) { case PER_WORD: - s->align.w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen); + align->w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen); for(i=0;ialign.w[i] = words[id_seq[i]]; + align->w[i] = words[id_seq[i]]; } break; case PER_PHONEME: - s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); + align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); for(i=0;ialign.ph[i] = phones[id_seq[i]]; + align->ph[i] = phones[id_seq[i]]; } break; case PER_STATE: - s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); - s->align.loc = (short *)mymalloc(sizeof(short) * rlen); - if (hmminfo->multipath) s->align.is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen); + align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); + align->loc = (short *)mymalloc(sizeof(short) * rlen); + if (hmminfo->multipath) align->is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen); for(i=0;ialign.ph[i] = phones[phloc[id_seq[i]]]; + align->ph[i] = phones[phloc[id_seq[i]]]; if (hmminfo->multipath) { if (enable_iwsp && stloc[id_seq[i]] > end_num) { - s->align.loc[i] = stloc[id_seq[i]] - end_num; - s->align.is_iwsp[i] = TRUE; + align->loc[i] = stloc[id_seq[i]] - end_num; + align->is_iwsp[i] = TRUE; } else { - s->align.loc[i] = stloc[id_seq[i]]; - s->align.is_iwsp[i] = FALSE; + align->loc[i] = stloc[id_seq[i]]; + align->is_iwsp[i] = FALSE; } } else { - s->align.loc[i] = stloc[id_seq[i]]; + align->loc[i] = stloc[id_seq[i]]; } } break; } - s->align.allscore = allscore; - - s->align.filled = TRUE; + align->allscore = allscore; free_hmm(shmm); free(id_seq); @@ -353,7 +351,7 @@ * @param words [in] 単語列 * @param wnum [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * @@ -362,16 +360,16 @@ * @param words [in] word sequence * @param wnum [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -word_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r) +word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, wnum, param, PER_WORD, s, r); + do_align(words, wnum, param, PER_WORD, align, r); } /** @@ -381,7 +379,7 @@ * @param revwords [in] 単語列(逆順) * @param wnum [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * @@ -390,20 +388,20 @@ * @param revwords [in] word sequence in reversed direction * @param wnum [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r) +word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int w; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum); for (w=0;w * @@ -423,16 +421,16 @@ * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -phoneme_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +phoneme_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, num, param, PER_PHONEME, s, r); + do_align(words, num, param, PER_PHONEME, align, r); } /** @@ -442,7 +440,7 @@ * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * @@ -451,20 +449,20 @@ * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;p * @@ -484,16 +482,16 @@ * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -state_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +state_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { - do_align(words, num, param, PER_STATE, s, r); + do_align(words, num, param, PER_STATE, align, r); } /** @@ -503,7 +501,7 @@ * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 - * @param s [out] アラインメント結果を格納するSentence構造体 + * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * @@ -512,20 +510,20 @@ * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors - * @param s [out] Sentence data area to store the alignment result + * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void -state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r) +state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;presult.sentnum; n++) { s = &(r->result.sent[n]); - /* do forced alignment if needed */ - if (r->config->annotate.align_result_word_flag) - word_align(s->word, s->word_num, param, s, r); - if (r->config->annotate.align_result_phoneme_flag) - phoneme_align(s->word, s->word_num, param, s, r); - if (r->config->annotate.align_result_state_flag) - state_align(s->word, s->word_num, param, s, r); + /* do forced alignment if needed */ + if (r->config->annotate.align_result_word_flag) { + now = result_align_new(); + word_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } + if (r->config->annotate.align_result_phoneme_flag) { + now = result_align_new(); + phoneme_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } + if (r->config->annotate.align_result_state_flag) { + now = result_align_new(); + state_align(s->word, s->word_num, param, now, r); + if (s->align == NULL) s->align = now; + else prev->next = now; + prev = now; + } } } From sumomo @ users.sourceforge.jp Sat Dec 13 02:23:29 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Sat, 13 Dec 2008 02:23:29 +0900 Subject: [Julius-cvs 353] CVS update: julius4/libsent/src/ngram Message-ID: <1229102609.330757.12619.nullmailer@users.sourceforge.jp> Index: julius4/libsent/src/ngram/ngram_read_arpa.c diff -u julius4/libsent/src/ngram/ngram_read_arpa.c:1.9 julius4/libsent/src/ngram/ngram_read_arpa.c:1.10 --- julius4/libsent/src/ngram/ngram_read_arpa.c:1.9 Thu Oct 16 01:06:33 2008 +++ julius4/libsent/src/ngram/ngram_read_arpa.c Sat Dec 13 02:23:29 2008 @@ -20,7 +20,7 @@ * @author Akinobu LEE * @date Wed Feb 16 16:52:24 2005 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ * */ /* @@ -30,7 +30,7 @@ * All rights reserved */ -/* $Id: ngram_read_arpa.c,v 1.9 2008/10/15 16:06:33 sumomo Exp $ */ +/* $Id: ngram_read_arpa.c,v 1.10 2008/12/12 17:23:29 sumomo Exp $ */ /* words should be alphabetically sorted */ @@ -473,7 +473,7 @@ bowt = (LOGPROB) atof(s); if (t->bo_wt == NULL) { t->bo_wt = (LOGPROB *)mymalloc(sizeof(LOGPROB) * t->totalnum); - for(i=0;ibo_wt[nnid] = 0.0; + for(i=0;ibo_wt[i] = 0.0; } t->bo_wt[nnid] = bowt; } else { From sumomo @ users.sourceforge.jp Sat Dec 13 19:10:58 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Sat, 13 Dec 2008 19:10:58 +0900 Subject: [Julius-cvs 354] CVS update: julius4 Message-ID: <1229163058.321607.21670.nullmailer@users.sourceforge.jp> Index: julius4/00readme-ja.txt diff -u julius4/00readme-ja.txt:1.6 julius4/00readme-ja.txt:1.7 --- julius4/00readme-ja.txt:1.6 Fri Oct 3 00:00:37 2008 +++ julius4/00readme-ja.txt Sat Dec 13 19:10:57 2008 @@ -4,6 +4,7 @@ Julius + (Rev 4.1.1 2008/12/13) (Rev 4.1 2008/10/03) (Rev 4.0.2 2008/05/27) (Rev 4.0 2007/12/19) @@ -32,36 +33,13 @@ http://julius.sourceforge.jp/ -Julius-4.1 -=========== - -4.0 ?? 4.0.2 ?????????????C??????????? -???????????????? "-fallback1pass" ? "-usepower" ? -?????Linux ??????? API ??????? OSS ?? ALSA ? -????????? - -4.0.2 ?? 4.1 ?????????????????MSD-HMM?????? -CVN ??? frequency warping for VTLN ?????????????? -???????????????? perl ????? "jclient-perl" ? -???????? - -4.1 ????????????????????????????????? -???????????????????????タ??????????? -??????????????????????????????????? -?????? "plugin" ???????????????????????? -???????????????????????????????????-????????????????? - -???Julius ???????????? "The Juliusbook" ??????? -???????????????????????? Docbook XML ??????-??html ?? pdf ?????????????????????????? -???????????????????????? 4.1 ???????? -????????????????????????Julius???????? -????????????????? - -Juliusbook ???????????????????? +Julius-4.1.1 +============= -??????_????????Release-ja.txt ??????? +4.1.1 ???????????????????????????????? +4.?????????????????D +4.0 ???????????????????????????????D +????????o????? Release-ja.txt ???????? ??????? @@ -73,8 +51,9 @@ configure configure????? configure.in Sample.jconf jconf ?????????? - julius/ Julius ????? - libsent/ Julius ???????? + julius/ Julius ??? + libjulius/ JuliusLib ??????????? ??? + libsent/ JuliusLib ??????? ??? adinrec/ ????? adinrec adintool/ ????/?????? adintool generate-ngram/ N-gram?????? Index: julius4/00readme.txt diff -u julius4/00readme.txt:1.6 julius4/00readme.txt:1.7 --- julius4/00readme.txt:1.6 Fri Oct 3 00:00:37 2008 +++ julius4/00readme.txt Sat Dec 13 19:10:58 2008 @@ -4,6 +4,7 @@ Julius + (Rev 4.1.1 2008/12/13) (Rev 4.1 2008/10/03) (Rev 4.0.2 2008/05/27) (Rev 4.0 2007/12/19) @@ -44,33 +45,16 @@ together with source codes. -What's new in Julius-4.1 -========================== +What's new in Julius-4.1.1 +=========================== -From 4.0 to 4.0.2, many bugs are fixed and small improvements were -done. New options "-fallback1pass" and "-usepower" were added. The -default audio API is changed from "oss" to "alsa" on Linux. - -From 4.0.2 to 4.1, multi-stream AM, MSD-HMM, CVN, frequency warping -for VTLN are all supported. "jclient-perl", a perl version of module -mode client, is newly added. - -Plug-in support is newly implemented on rev. 4.1. Using dynamic -object linking, Julius now can load extensions at run time, and extend -its ability easily without modifying the original engine source code. -The directory "plugin" contains several example source codes and ready -to compile and test them. The source codes also contain all function -specification documents. - -From this release, you can get the total documentation of Julius -called "Juliusbook". It is written in Docbook XML, and PDF version -and HTML version is available. At this time English version has only -the option descriptions and online manuals. Full version that -explains all aspects and specification of Julius (models, input, -decoding, etc...) will be released within a short term. +Revision 4.1.1 is a bug fix release. Many bugs are fixed relating +N-gram handling (tested on SRILM), module mode and more. All users +that uses older versions of Julius-4 are strongly recommended to use +this version. -Contents of Julius-4.1 +Contents of Julius-4.1.1 ======================== (Documents with suffix "ja" are written in Japanese) @@ -80,9 +64,10 @@ Release.txt Release note / ChangeLog configure configure script configure.in - Sample.jconf Sample configuration file for Julius-3.5.3 - julius/ Julius/Julian 3.5.3 sources - libsent/ Julius/Julian 3.5.3 library sources + Sample.jconf Sample configuration file + julius/ Julius sources + libjulius/ JuliusLib core engine library sources + libsent/ JuliusLib low-level library sources adinrec/ Record one sentence utterance to a file adintool/ Record/split/send/receive speech data generate-ngram/ Tool to generate random sentences from N-gram @@ -96,7 +81,6 @@ jclient-perl/ A simple perl version of module mode client plugin/ Several plugin source codes and documentation man/ Unix online manuals - olddoc/ ChangeLogs before 3.2 Documentation Index: julius4/Release-ja.txt diff -u julius4/Release-ja.txt:1.7 julius4/Release-ja.txt:1.8 --- julius4/Release-ja.txt:1.7 Fri Oct 3 10:27:11 2008 +++ julius4/Release-ja.txt Sat Dec 13 19:10:58 2008 @@ -1,3 +1,38 @@ +4.1.1 (2008.12.13) +=================== +????F + +[N-gram] + - SRILM ????????f????????????????+[????] + - "-htkconf ConfigFile" ? ConfigFile ?? "SOURCERATE" ?????? + ????+ - "-input stdin" ?????????+[???????] + - ????ッ? "-1pass" ??????????????????????+ - "-palign", "-walign" ? "-salign" ??????????????? + タ??????????+[Module mode] + - ????ッ? ? receive/activate/dactivate ?????????+ - ???????????ID???????????????????+ - ?ッ?????????? (GRAM=..) ??????0 ????????+ - ????ッ? "SYNCGRAM" ????Julius?????????+[Others] + - ????? OS ??????????????i"-ldl" ???? + - Jconf ???????????????????????+ - 1??タ????1??????????????o???????+[Tools] + Jcontrol + - "graminfo" ??????????? + - ????ッ?????ォ?j??????????????????+ mkdfa + - mkfa ???????????+ - DOS ?ョ??????????????????+ adintool + - ????????[??????????????D + - ????????????????????????????+ + 4.1 (2008.10.3) ================ ?????????????? Index: julius4/Release.txt diff -u julius4/Release.txt:1.7 julius4/Release.txt:1.8 --- julius4/Release.txt:1.7 Fri Oct 3 10:27:11 2008 +++ julius4/Release.txt Sat Dec 13 19:10:58 2008 @@ -1,3 +1,36 @@ +4.1.1 (2008.12.13) +=================== +Bug fixes: + +[N-gram] + - sometimes could not read an ARPA N-gram file trained by SRILM. +[A/D-in] + - "-input stdin" does not work. + - "SOURCERATE" at "-htkconf" is ignored. +[Forced alignments] + - now can be used in isolated word recognition and with "-1pass". + - "-palign", "-walign" and "-salign" can not be run together at a time. +[Module mode] + - freezes when a grammar is specified by its ID number. + - wrong grammar ID in recognition result (GRAM=.. always 0) + - "SYNCGRAM" will cause crash at isolated word recognition. + - unable to receive/activate/dactivate on isolated word recognition. +[Others] + - fails to compile on several OS (needs "-ldl"). + - does not handle backslash escaping correctly in Jconf file. + - does not output the 1st pass result as a final result with "-1pass". +[Tools] + Jcontrol + - does not support "graminfo" command. + - can not send a dictionary to Julius running isolated word recognition. + mkdfa + - segfault on mkfa + - fails to read a grammar file on DOS format. + adintool + - wrong behavior when splitting a long audio file. + - now output time of each segment. + + 4.1 (2008.10.3) ================ New plugin extension: Index: julius4/Sample.jconf diff -u julius4/Sample.jconf:1.5 julius4/Sample.jconf:1.6 --- julius4/Sample.jconf:1.5 Sun Sep 28 00:33:44 2008 +++ julius4/Sample.jconf Sat Dec 13 19:10:58 2008 @@ -1,6 +1,6 @@ # # Sample Jconf configuration file -# for Julius library rev.4.0.2 +# for Julius library rev.4.1.1 # # 1) Options can also be specified in command line option. # The values are default values in Julius. @@ -51,6 +51,9 @@ #-input stdin # waveform from standard input #-input adinnet # waveform via network client #-input netaudio # DatLink server +#-input oss # OSS API input (if available) +#-input alsa # ALSA API input (if available) +#-input esd # ESounD daemon input (if available) #-filelist filename # input file list #-notypecheck # does not check parameter type of input @@ -177,7 +180,7 @@ ## This option is only a switcher and can be used anywhere anytime. # -GLOBAL -## This option disables strict section check and back to 4.0 +## This option disables the strict section checkings and back to 4.0 # -nosectioncheck ###################################################################### From sumomo @ users.sourceforge.jp Sat Dec 13 19:10:58 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Sat, 13 Dec 2008 19:10:58 +0900 Subject: [Julius-cvs 355] CVS update: julius4/libjulius Message-ID: <1229163058.492903.21681.nullmailer@users.sourceforge.jp> Index: julius4/libjulius/configure diff -u julius4/libjulius/configure:1.7 julius4/libjulius/configure:1.8 --- julius4/libjulius/configure:1.7 Tue Oct 7 03:10:04 2008 +++ julius4/libjulius/configure Sat Dec 13 19:10:58 2008 @@ -592,7 +592,7 @@ JULIUS_PRODUCTNAME=JuliusLib -JULIUS_VERSION=4.1 +JULIUS_VERSION=4.1.1 # Check whether --enable-pthread or --disable-pthread was given. Index: julius4/libjulius/configure.in diff -u julius4/libjulius/configure.in:1.7 julius4/libjulius/configure.in:1.8 --- julius4/libjulius/configure.in:1.7 Tue Oct 7 03:10:04 2008 +++ julius4/libjulius/configure.in Sat Dec 13 19:10:58 2008 @@ -4,7 +4,7 @@ dnl Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl -dnl $Id: configure.in,v 1.7 2008/10/06 18:10:04 sumomo Exp $ +dnl $Id: configure.in,v 1.8 2008/12/13 10:10:58 sumomo Exp $ dnl AC_INIT(src/search_bestfirst_main.c) @@ -12,7 +12,7 @@ AC_CONFIG_AUX_DIR(../support) JULIUS_PRODUCTNAME=JuliusLib -JULIUS_VERSION=4.1 +JULIUS_VERSION=4.1.1 dnl Checks for options From sumomo @ users.sourceforge.jp Sat Dec 13 19:10:58 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Sat, 13 Dec 2008 19:10:58 +0900 Subject: [Julius-cvs 356] CVS update: julius4/libsent Message-ID: <1229163058.661004.21690.nullmailer@users.sourceforge.jp> Index: julius4/libsent/configure diff -u julius4/libsent/configure:1.11 julius4/libsent/configure:1.12 --- julius4/libsent/configure:1.11 Fri Nov 14 02:11:28 2008 +++ julius4/libsent/configure Sat Dec 13 19:10:58 2008 @@ -563,7 +563,7 @@ ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. -LIBSENT_VERSION=4.1 +LIBSENT_VERSION=4.1.1 # specify mic type # Check whether --with-mictype or --without-mictype was given. Index: julius4/libsent/configure.in diff -u julius4/libsent/configure.in:1.11 julius4/libsent/configure.in:1.12 --- julius4/libsent/configure.in:1.11 Fri Nov 14 02:11:28 2008 +++ julius4/libsent/configure.in Sat Dec 13 19:10:58 2008 @@ -3,7 +3,7 @@ dnl Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl -dnl $Id: configure.in,v 1.11 2008/11/13 17:11:28 sumomo Exp $ +dnl $Id: configure.in,v 1.12 2008/12/13 10:10:58 sumomo Exp $ dnl dnl Process this file with autoconf to produce a configure script. @@ -11,7 +11,7 @@ AC_CONFIG_HEADER(include/sent/config.h) AC_CONFIG_AUX_DIR(../support) -LIBSENT_VERSION=4.1 +LIBSENT_VERSION=4.1.1 dnl Checks for options # specify mic type From sumomo @ users.sourceforge.jp Sat Dec 13 19:10:58 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Sat, 13 Dec 2008 19:10:58 +0900 Subject: [Julius-cvs 357] CVS update: julius4/support Message-ID: <1229163058.801375.21697.nullmailer@users.sourceforge.jp> Index: julius4/support/build-all.sh diff -u julius4/support/build-all.sh:1.5 julius4/support/build-all.sh:1.6 --- julius4/support/build-all.sh:1.5 Fri Oct 3 00:02:00 2008 +++ julius4/support/build-all.sh Sat Dec 13 19:10:58 2008 @@ -6,7 +6,7 @@ # # argument: any configure options except "--enable-setup=..." is allowed. # -JULIUS_VERSION=4.1 +JULIUS_VERSION=4.1.1 ###################################################################### From sumomo @ users.sourceforge.jp Fri Dec 19 15:42:09 2008 From: sumomo @ users.sourceforge.jp (sumomo @ users.sourceforge.jp) Date: Fri, 19 Dec 2008 15:42:09 +0900 Subject: [Julius-cvs 358] CVS update: julius4/gramtools/yomi2voca Message-ID: <1229668929.274422.18436.nullmailer@users.sourceforge.jp> Index: julius4/gramtools/yomi2voca/yomi2voca.pl.in diff -u julius4/gramtools/yomi2voca/yomi2voca.pl.in:1.2 julius4/gramtools/yomi2voca/yomi2voca.pl.in:1.3 --- julius4/gramtools/yomi2voca/yomi2voca.pl.in:1.2 Tue Dec 18 17:45:48 2007 +++ julius4/gramtools/yomi2voca/yomi2voca.pl.in Fri Dec 19 15:42:09 2008 @@ -1,32 +1,185 @@ #!@PERL@ -# Copyright (c) 1991-2007 Kawahara Lab., Kyoto University +# Copyright (c) 1991-2008 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology -# Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology +# Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology # All rights reserved # # @configure_input@ # -# ひらがな -> ROMAN(標準形式) 変換スクリプト -# .yomi -> .voca +# ひらがな -> Julius 標準モデル用変換スクリプト +# 第2フィールドのひらがなを変換する. +# +# .yomi -> .dict +# +# 助詞の「は」「へ」「を」→「w a」「e」「o」は変換後に手動で直すこと. +# +# ver2: 小さい「ぁぃぅぇぉ」や「う゛」などに対応 +# +# +$error = 0; +$lineno = 0; while (<>) { -# カテゴリ名はすっとばす + +# 文法用に,"%" で始まる行はそのまま出力する. if (/^%/){ print; next; } - chop; + chomp; # 表記とひらがな読みを分離 @a = split; $_ = $a[1]; # ひらがな,長音以外はそのまま + +# 3文字以上からなる変換規則(v a) + s/う゛ぁ/ b a/g; + s/う゛ぃ/ b i/g; + s/う゛ぇ/ b e/g; + s/う゛ぉ/ b o/g; + s/う゛ゅ/ by u/g; + # 2文字からなる変換規則 + s/ぅ゛/ b u/g; + + s/あぁ/ a a/g; + s/いぃ/ i i/g; + s/いぇ/ i e/g; + s/いゃ/ y a/g; + s/うぅ/ u:/g; + s/えぇ/ e e/g; + s/おぉ/ o:/g; + s/かぁ/ k a:/g; + s/きぃ/ k i:/g; + s/くぅ/ k u:/g; + s/くゃ/ ky a/g; + s/くゅ/ ky u/g; + s/くょ/ ky o/g; + s/けぇ/ k e:/g; + s/こぉ/ k o:/g; + s/がぁ/ g a:/g; + s/ぎぃ/ g i:/g; + s/ぐぅ/ g u:/g; + s/ぐゃ/ gy a/g; + s/ぐゅ/ gy u/g; + s/ぐょ/ gy o/g; + s/げぇ/ g e:/g; + s/ごぉ/ g o:/g; + s/さぁ/ s a:/g; + s/しぃ/ sh i:/g; + s/すぅ/ s u:/g; + s/すゃ/ sh a/g; + s/すゅ/ sh u/g; + s/すょ/ sh o/g; + s/せぇ/ s e:/g; + s/そぉ/ s o:/g; + s/ざぁ/ z a:/g; + s/じぃ/ j i:/g; + s/ずぅ/ z u:/g; + s/ずゃ/ zy a/g; + s/ずゅ/ zy u/g; + s/ずょ/ zy o/g; + s/ぜぇ/ z e:/g; + s/ぞぉ/ z o:/g; + s/たぁ/ t a:/g; + s/ちぃ/ ch i:/g; + s/つぁ/ ts a/g; + s/つぃ/ ts i/g; + s/つぅ/ ts u:/g; + s/つゃ/ ch a/g; + s/つゅ/ ch u/g; + s/つょ/ ch o/g; + s/つぇ/ ts e/g; + s/つぉ/ ts o/g; + s/てぇ/ t e:/g; + s/とぉ/ t o:/g; + s/だぁ/ d a:/g; + s/ぢぃ/ j i:/g; + s/づぅ/ d u:/g; + s/づゃ/ zy a/g; + s/づゅ/ zy u/g; + s/づょ/ zy o/g; + s/でぇ/ d e:/g; + s/どぉ/ d o:/g; + s/なぁ/ n a:/g; + s/にぃ/ n i:/g; + s/ぬぅ/ n u:/g; + s/ぬゃ/ ny a/g; + s/ぬゅ/ ny u/g; + s/ぬょ/ ny o/g; + s/ねぇ/ n e:/g; + s/のぉ/ n o:/g; + s/はぁ/ h a:/g; + s/ひぃ/ h i:/g; + s/ふぅ/ f u:/g; + s/ふゃ/ hy a/g; + s/ふゅ/ hy u/g; + s/ふょ/ hy o/g; + s/へぇ/ h e:/g; + s/ほぉ/ h o:/g; + s/ばぁ/ b a:/g; + s/びぃ/ b i:/g; + s/ぶぅ/ b u:/g; + s/ふゃ/ hy a/g; + s/ぶゅ/ by u/g; + s/ふょ/ hy o/g; + s/べぇ/ b e:/g; + s/ぼぉ/ b o:/g; + s/ぱぁ/ p a:/g; + s/ぴぃ/ p i:/g; + s/ぷぅ/ p u:/g; + s/ぷゃ/ py a/g; + s/ぷゅ/ py u/g; + s/ぷょ/ py o/g; + s/ぺぇ/ p e:/g; + s/ぽぉ/ p o:/g; + s/まぁ/ m a:/g; + s/みぃ/ m i:/g; + s/むぅ/ m u:/g; + s/むゃ/ my a/g; + s/むゅ/ my u/g; + s/むょ/ my o/g; + s/めぇ/ m e:/g; + s/もぉ/ m o:/g; + s/やぁ/ y a:/g; + s/ゆぅ/ y u:/g; + s/ゆゃ/ y a:/g; + s/ゆゅ/ y u:/g; + s/ゆょ/ y o:/g; + s/よぉ/ y o:/g; + s/らぁ/ r a:/g; + s/りぃ/ r i:/g; + s/るぅ/ r u:/g; + s/るゃ/ ry a/g; + s/るゅ/ ry u/g; + s/るょ/ ry o/g; + s/れぇ/ r e:/g; + s/ろぉ/ r o:/g; + s/わぁ/ w a:/g; + s/をぉ/ o:/g; + + s/う゛/ b u/g; s/でぃ/ d i/g; + s/でぇ/ d e:/g; + s/でゃ/ dy a/g; + s/でゅ/ dy u/g; + s/でょ/ dy o/g; s/てぃ/ t i/g; + s/てぇ/ t e:/g; + s/てゃ/ ty a/g; + s/てゅ/ ty u/g; + s/てょ/ ty o/g; s/すぃ/ s i/g; + s/ずぁ/ z u a/g; s/ずぃ/ z i/g; + s/ずぅ/ z u/g; + s/ずゃ/ zy a/g; + s/ずゅ/ zy u/g; + s/ずょ/ zy o/g; + s/ずぇ/ z e/g; + s/ずぉ/ z o/g; s/きゃ/ ky a/g; s/きゅ/ ky u/g; s/きょ/ ky o/g; @@ -38,6 +191,16 @@ s/ちゅ/ ch u/g; s/ちぇ/ ch e/g; s/ちょ/ ch o/g; + s/とぅ/ t u/g; + s/とゃ/ ty a/g; + s/とゅ/ ty u/g; + s/とょ/ ty o/g; + s/どぁ/ d o a/g; + s/どぅ/ d u/g; + s/どゃ/ dy a/g; + s/どゅ/ dy u/g; + s/どょ/ dy o/g; + s/どぉ/ d o:/g; s/にゃ/ ny a/g; s/にゅ/ ny u/g; s/にょ/ ny o/g; @@ -53,10 +216,13 @@ s/ぎゃ/ gy a/g; s/ぎゅ/ gy u/g; s/ぎょ/ gy o/g; - s/じゃ/ j a/g; + s/ぢぇ/ j e/g; s/ぢゃ/ j a/g; - s/じゅ/ j u/g; + s/ぢゅ/ j u/g; + s/ぢょ/ j o/g; s/じぇ/ j e/g; + s/じゃ/ j a/g; + s/じゅ/ j u/g; s/じょ/ j o/g; s/びゃ/ by a/g; s/びゅ/ by u/g; @@ -64,11 +230,16 @@ s/ぴゃ/ py a/g; s/ぴゅ/ py u/g; s/ぴょ/ py o/g; + s/うぁ/ u a/g; s/うぃ/ w i/g; s/うぇ/ w e/g; s/うぉ/ w o/g; s/ふぁ/ f a/g; s/ふぃ/ f i/g; + s/ふぅ/ f u/g; + s/ふゃ/ hy a/g; + s/ふゅ/ hy u/g; + s/ふょ/ hy o/g; s/ふぇ/ f e/g; s/ふぉ/ f o/g; @@ -142,13 +313,42 @@ s/ゆ/ y u/g; s/よ/ y o/g; s/わ/ w a/g; + s/ゐ/ i/g; + s/ゑ/ e/g; s/ん/ N/g; s/っ/ q/g; s/ー/:/g; -#その他特別なルール +# ここまでに処理されてない ぁぃぅぇぉ はそのまま大文字扱い + s/ぁ/ a/g; + s/ぃ/ i/g; + s/ぅ/ u/g; + s/ぇ/ e/g; + s/ぉ/ o/g; + s/ゎ/ w a/g; + s/ぉ/ o/g; + +# その他特別なルール s/を/ o/g; +# 最初の空白を削る s/^ ([a-z])/$1/g; + +# 変換の結果長音記号が続くことがまれにあるので一つにまとめる + s/:+/:/g; + +# アルファベット列になっていない場合,変換に失敗しているので +# 標準エラー出力に出力する. + + $lineno++; + if (! /^[ a-zA-Z:]+$/) { + if ($error == 0) { + $error = 1; + print STDERR "Error: (they were also printed to stdout)\n"; + } + print STDERR "line " , $lineno , ": " , @a[0], "\t", $_,"\n"; + } + print @a[0], "\t", $_,"\n"; + }