/* K D F I L T - filter program for kanjidic See the "printf" statements below for some simple operating instructions */ #include #include #include #define TRUE 1 #define FALSE 0 long int lno=0; FILE *fi,*fo,*fopen(); unsigned char cc,c,instr[512],outstr[512],codes[20]; int i,ip,istate,op,ic; main(argc,argv) int argc; char **argv; { int argk; register char **p; argk = argc; if (argk < 3) { printf("usage: kdfilt fni -CODES fno\n\n "); printf("KDFILT removes unwanted index codes from kanjidic\n\n "); printf(" fni - the input file name (e.g. kanjidic)\n "); printf(" fno - the output file containing the filtered kanjidic\n "); printf(" -CODES the index codes you want removed, e.g. -MQY will \n result in the Morohashi, Four-Corner & PinYin codes being removed.\n\n "); exit(0); } p=argv; p++; fi= fopen(*p,"r"); if (fi == NULL) { printf("\nCannot open %s\n",*p); exit(0); } p++; strcpy(codes,*p); if((codes[0] != '-') || (strlen(codes) < 2)) { printf("usage: kdfilt fni -CODES fno\n "); exit(0); } p++; fo = fopen(*p,"w"); if(fo == NULL) { printf("open of %s failed\n",*p); exit(0); } while (feof(fi) != TRUE) { fgets (instr,511,fi); if(feof(fi)) break; if(instr[0] == '#') { fprintf(fo,"%s",instr); continue; } if((++lno % 10) == 0)printf("Line: %ld\r",lno); strcat (instr,"\n"); for(ip = 0;ip <8;ip++) { outstr[ip] = instr[ip]; } op = 8; ip = 8; istate = 0; while (TRUE) { c = instr[ip]; switch (istate) { case 0 : /* start of field */ if (c == ' ') break; if ((c == 0xa4) || (c == 0xa5) || (c == '{')) { for ( ; instr[ip] >= 0x20; ip++) outstr[op++] = instr[ip]; c = 0x10; break; } if ( c < 0x20) break; ic = c; if( strchr(codes,ic) != NULL) { istate = 1; break; } outstr[op++] = c; istate = 2; break; case 1 : /* in a field being skipped */ if ( c < 0x20) break; if ( c == ' ') { if(outstr[op-1] != ' ') outstr[op++] = ' '; istate = 0; break; } break; case 2 : /* in a field being copied */ if ( c < 0x20) break; if ( c == ' ') { if(outstr[op-1] != ' ') outstr[op++] = ' '; istate = 0; break; } outstr[op++] = c; break; } if ( c < 0x20) { if(outstr[op-1] != ' ') outstr[op++] = ' '; outstr[op] = 0; fprintf(fo,"%s\n",outstr); break; } ip++; } } fclose(fi); fclose(fo); }