home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: alt.sources
- From: KLUNDE@VMS.MACC.WISC.EDU ("Ken R. Lunde")
- Subject: [sci.lang.japan] Latest version of VALUES.C (with automatic KANJI code detection)
- Message-ID: <1990Oct15.221427.1411@math.lsa.umich.edu>
- Date: Mon, 15 Oct 90 22:14:27 GMT
-
- Archive-name: kanji-values/13-Oct-90
- Original-posting-by: KLUNDE@VMS.MACC.WISC.EDU ("Ken R. Lunde")
- Original-subject: Latest version of VALUES.C (with automatic KANJI code detection)
- Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti)
-
- [Reposted from sci.lang.japan.
- Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).]
-
- /* VALUES.C version of 14 October 1990 */
- /* A utility for displaying the values of Japanese characters. */
- /* Written by Ken R. Lunde, University of Wisconsin-Madison */
- /* EMAIL: klunde@vms.macc.wisc.edu */
- /* Available at the ucdavis.edu (128.120.2.1) FTP archive in pub/JIS/C. */
-
- /* I do not consider myself to be a very advanced programmer, but perhaps one */
- /* other person may have a use for this program. Please feel free to use this */
- /* source code anyway you wish. The conversion algorithms for the major codes */
- /* for Japanese are used, and are very reusable. The algorithm which detects */
- /* the input file's Japanese code automatically is also quite useful. */
-
- /* This program was written as a tool for determining the values for Japanese */
- /* and ASCII characters. It is written in ANSI C, so should be compilable on */
- /* almost any platform, but I do not offer any guarantees. :-) */
-
- /* This version accepts SHIFT-JIS, EUC, or the 7-bit JIS codes as valid input */
- /* for the file it reads. This program automatically detects which KANJI code */
- /* is used in the input file. The output file will use the same code that the */
- /* input file used. */
-
- /* This program creates a file containing the contents of the input file, and */
- /* displays each character's SHIFT-JIS, EUC, and JIS values in one of three */
- /* different styles: octal, decimal, or hexdecimal -- the user must specify */
- /* which one to use. ASCII and KUTEN values are also given. A tab separates */
- /* the fields in the output file. I find that a tab width of 14 characters is */
- /* best when printing. The SJIS, EUC, and JIS columns are padded with zero's */
- /* for octal and decimal output. This makes the output more "readable." */
-
- /* For SHIFT-JIS input files only, half-size KATAKANA are treated. Only their */
- /* ASCII value is displayed since they are single-byte characters. Printable */
- /* ASCII characters are handled with all the Japanese codes. */
-
- /* Please send comments and suggestions! ENJOY! */
-
- #include <stdio.h>
-
- int DetectCodeType(FILE *in);
- int fclose(FILE *fp);
- int isodd(int number);
- void exit(int data);
- void Introduction(FILE *out,int choice,int code);
- void print1byte(FILE *out,int choice,int one);
- void print2byte(FILE *out,int code,int choice,int one,int two,int data[8]);
- void seven2shift(int *ptr1,int *ptr2);
- void shift2seven(int *ptr1,int *ptr2);
- void Skip_ESC_Seq(FILE *in,int data,int *ptr);
- void TreatEUC(FILE *in,FILE *out,int code,int choice);
- void TreatJIS(FILE *in,FILE *out,int code,int choice);
- void TreatSJIS(FILE *in,FILE *out,int code,int choice);
-
- #define NOT_SET 0
- #define NEW 1
- #define OLD 2
- #define NEC 3
- #define EUC 4
- #define SJIS 5
- #define TRUE 1
- #define FALSE 0
- #define ESC 27
- #define SJIS1 0
- #define SJIS2 1
- #define EUC1 2
- #define EUC2 3
- #define JIS1 4
- #define JIS2 5
- #define KT1 6
- #define KT2 7
- #define OCT 8
- #define DEC 10
- #define HEX 16
- #define KI_NEW "$B"
- #define KO_NEW "(J"
- #define KI_OLD "$@"
- #define KO_OLD "(J"
- #define KI_NEC "K"
- #define KO_NEC "H"
-
- main()
- {
- FILE *in,*out;
- int code,choice;
- char infilename[80],outfilename[80];
-
- printf("\nInfile name -> ");
- gets(infilename);
- if ((in = fopen(infilename,"r")) == NULL) {
- printf("\nCannot open %s",infilename);
- exit(1);
- }
- if ((code = DetectCodeType(in)) == NOT_SET) {
- printf("\nNo KANJI code detected in %s",infilename);
- exit(1);
- }
- if ((in = fopen(infilename,"r"))==NULL) {
- printf("\nCannot open %s",infilename);
- exit(1);
- }
- printf("Outfile name -> ");
- gets(outfilename);
- if ((out = fopen(outfilename,"w"))==NULL) {
- printf("\nCannot open %s",outfilename);
- exit(1);
- }
- printf("Output (8 = octal, 10 = decimal, 16 = hexadecimal) -> ");
- scanf("%d",&choice);
- if ((choice != OCT) && (choice != DEC) && (choice != HEX)) {
- printf("\nInvalid choice! Bye!");
- exit(1);
- }
- Introduction(out,choice,code);
- switch (code) {
- case SJIS :
- TreatSJIS(in,out,code,choice);
- break;
- case EUC :
- TreatEUC(in,out,code,choice);
- break;
- case NEW :
- case OLD :
- case NEC :
- TreatJIS(in,out,code,choice);
- break;
- }
- fclose(out);
- fclose(in);
- return 0;
- }
-
- int DetectCodeType(FILE *in)
- {
- int p1,p2,p3,whatcode;
-
- whatcode = NOT_SET;
- while (((p1 = getc(in)) != EOF) && (whatcode == NOT_SET)) {
- if (p1 == ESC) {
- p2 = getc(in);
- if (p2 == '$') {
- p3 = getc(in);
- if (p3 == 'B')
- whatcode = NEW;
- else if (p3 == '@')
- whatcode = OLD;
- }
- else if (p2 == 'K')
- whatcode = NEC;
- }
- else if ((p1 >= 129) && (p1 <= 254)) {
- p2 = getc(in);
- if (((p1 >= 129) && (p1 <= 159)) && ((p2 >= 64) && (p2 <= 160)))
- whatcode = SJIS;
- else if (((p1 >= 161) && (p1 <= 254)) && ((p2 >= 161) && (p2 <= 254)))
- whatcode = EUC;
- }
- }
- fclose(in);
- return whatcode;
- }
-
- int isodd(int number)
- {
- return ((number % 2) ? 1 : 0);
- }
-
- void Introduction(FILE *out,int choice,int code)
- {
- switch (choice) {
- case OCT :
- fprintf(out,"Character values (in octal):\n\n");
- break;
- case DEC :
- fprintf(out,"Character values (in decimal):\n\n");
- break;
- case HEX :
- fprintf(out,"Character values (in hexadecimal):\n\n");
- break;
- }
- switch (code) {
- case SJIS :
- fprintf(out,"Output KANJI code will be SHIFT-JIS\n\n");
- break;
- case EUC :
- fprintf(out,"Output KANJI code will be EUC\n\n");
- break;
- case NEW :
- fprintf(out,"Output KANJI code will be JIS 7-bit (NEW-JIS)\n\n");
- break;
- case OLD :
- fprintf(out,"Output KANJI code will be JIS 7-bit (OLD-JIS)\n\n");
- break;
- case NEC :
- fprintf(out,"Output KANJI code will be JIS 7-bit (NEC-JIS)\n\n");
- break;
- }
- fprintf(out,"CHARACTER\tSHIFT-JIS or\tEUC\tJIS\tASCII\tKUTEN\n");
- fprintf(out,"\tsingle-byte\n\n");
- }
-
- void print1byte(FILE *out,int choice,int one)
- {
- switch (choice) {
- case OCT :
- fprintf(out,"%c\t%03o\n",one,one);
- break;
- case DEC :
- fprintf(out,"%c\t%03d\n",one,one);
- break;
- case HEX :
- fprintf(out,"%c\t%X\n",one,one);
- break;
- }
- }
-
- void print2byte(FILE *out,int code,int choice,int one,int two,int data[8])
- {
- switch (code) {
- case NEW :
- fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEW,one,two,ESC,KO_NEW);
- break;
- case OLD :
- fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_OLD,one,two,ESC,KO_OLD);
- break;
- case NEC :
- fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEC,one,two,ESC,KO_NEC);
- break;
- default :
- fprintf(out,"%c%c\t",one,two);
- break;
- }
- switch (choice) {
- case OCT :
- fprintf(out,"%03o-%03o\t",data[SJIS1],data[SJIS2]);
- fprintf(out,"%03o-%03o\t",data[EUC1],data[EUC2]);
- fprintf(out,"%03o-%03o\t",data[JIS1],data[JIS2]);
- break;
- case DEC :
- fprintf(out,"%03d-%03d\t",data[SJIS1],data[SJIS2]);
- fprintf(out,"%03d-%03d\t",data[EUC1],data[EUC2]);
- fprintf(out,"%03d-%03d\t",data[JIS1],data[JIS2]);
- break;
- case HEX :
- fprintf(out,"%X-%X\t",data[SJIS1],data[SJIS2]);
- fprintf(out,"%X-%X\t",data[EUC1],data[EUC2]);
- fprintf(out,"%X-%X\t",data[JIS1],data[JIS2]);
- break;
- }
- fprintf(out,"%c%c\t",data[JIS1],data[JIS2]);
- fprintf(out,"%02d-%02d\n",data[KT1],data[KT2]);
- }
-
- void seven2shift (int *p1,int *p2)
- {
- if (isodd(*p1))
- *p2 += 31;
- else
- *p2 += 126;
- if ((*p2 >= 127) && (*p2 < 158))
- (*p2)++;
- if ((*p1 >= 33) && (*p1 <= 94)) {
- if (isodd(*p1))
- *p1 = ((*p1 - 1) / 2) + 113;
- else if (!isodd(*p1))
- *p1 = (*p1 / 2) + 112;
- }
- else if ((*p1 >= 95) && (*p1 <= 126)) {
- if (isodd(*p1))
- *p1 = ((*p1 - 1) / 2) + 177;
- else if (!isodd(*p1))
- *p1 = (*p1 / 2) + 176;
- }
- }
-
- void shift2seven(int *p1,int *p2)
- {
- int temp;
-
- temp = *p2;
- if ((*p2 >= 64) && (*p2 <= 158))
- *p2 -= 31;
- else if ((*p2 >= 159) && (*p2 <= 252))
- *p2 -= 126;
- if ((temp > 127) && (temp <= 158))
- (*p2)--;
- if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 64) && (temp <= 158))
- *p1 = ((*p1 - 113) * 2) + 1;
- else if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 159) && (temp <= 252))
- *p1 = (*p1 - 112) * 2;
- else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 64) && (temp <= 158))
- *p1 = ((*p1 - 177) * 2) + 1;
- else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 159) && (temp <= 252))
- *p1 = (*p1 - 176) * 2;
- }
-
- void Skip_ESC_Seq(FILE *in,int temp,int *shifted_in)
- {
- int junk;
-
- if ((temp == '$') || (temp == '('))
- junk = getc(in);
- if ((temp == 'K') || (temp == '$'))
- *shifted_in = TRUE;
- else
- *shifted_in = FALSE;
- }
-
- void TreatEUC(FILE *in,FILE *out,int code,int choice)
- {
- int one,two;
- int data[8];
-
- while ((one = getc(in)) != EOF) {
- if ((one >= 161) && (one <= 254)) {
- two = getc(in);
- data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
- data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
- data[SJIS1] -= 128;
- data[SJIS2] -= 128;
- seven2shift(&data[SJIS1],&data[SJIS2]);
- data[JIS1] -= 128;
- data[JIS2] -= 128;
- data[KT1] -= 160;
- data[KT2] -= 160;
- print2byte(out,code,choice,one,two,data);
- }
- else if ((one >= 33) && (one <= 126))
- print1byte(out,choice,one);
- }
- }
-
- void TreatJIS(FILE *in,FILE *out,int code,int choice)
- {
- int shifted_in,temp,one,two;
- int data[8];
-
- shifted_in = FALSE;
- while ((one = getc(in)) != EOF) {
- if (one == ESC) {
- temp = getc(in);
- Skip_ESC_Seq(in,temp,&shifted_in);
- if ((one = getc(in)) == EOF)
- exit(1);
- }
- if (shifted_in) {
- two = getc(in);
- data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
- data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
- seven2shift(&data[SJIS1],&data[SJIS2]);
- data[EUC1] += 128;
- data[EUC2] += 128;
- data[KT1] -= 32;
- data[KT2] -= 32;
- print2byte(out,code,choice,one,two,data);
- }
- else if ((!shifted_in) && ((one >= 33) && (one <= 126)))
- print1byte(out,choice,one);
- }
- }
-
- void TreatSJIS(FILE *in,FILE *out,int code,int choice)
- {
- int one,two;
- int data[8];
-
- while ((one = getc(in)) != EOF) {
- if (((one >= 129) && (one <= 159)) || ((one >= 224) && (one <= 239))) {
- two = getc(in);
- data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
- data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
- shift2seven(&data[EUC1],&data[EUC2]);
- data[EUC1] += 128;
- data[EUC2] += 128;
- shift2seven(&data[JIS1],&data[JIS2]);
- shift2seven(&data[KT1],&data[KT2]);
- data[KT1] -= 32;
- data[KT2] -= 32;
- print2byte(out,code,choice,one,two,data);
- }
- else if (((one >= 33) && (one <= 126)) || ((one >= 161) && (one <= 223)))
- print1byte(out,choice,one);
- }
- }
-