home *** CD-ROM | disk | FTP | other *** search
- # EXEUTIL.AWK
- # Andrew Schulman (CIS 76320,302)
- #
- # Copyright (c) 1992 Andrew Schulman. All rights reserved.
- #
- # Contact: Andrew Schulman (CompuServe 76320,302)
- #
- # Performs various operations on new executable (NE) files:
- # DIFF: display entry-point differences between two NE files
- # DUPES: display multiple entry points to same code
- # IMPORTS: display all imports
- # UNDOC: display all imports of undocumented functions
- # FINDUNDOC: display all undocumented exports
- #
- # Requires Thompson AWK compiler (Thompson Automation; 503-224-1639) features:
- # Produce standalone executable (awkc -xe exeutil.awk)
- # Automatic sorting of arrays
- # Multidimensional arrays
- # Hex numbers
- #
- # Requires the following other components:
- # EXEDUMP.EXE
- # UNDOCWIN.DAT
- # WINFUNC.DAT
- #
- # from the book "Undocumented Windows" by Schulman, Maxey, and Pietrek
- # (Addison-Wesley 1992)
- #
-
- ######################################################################
- # general-purpose routines
-
- function hex(s) { return ("0x" s) + 0; }
-
- function help()
- {
- print "To display entry-point differences between two versions of file:";
- print " EXEUTIL -DIFF exe_file1 exe_file2";
- print "example: exeutil -diff \\win31\\system\\user.exe \\win30\\system\\user.exe";
- print "";
- print "To display multiple entry points to same code:";
- print " EXEUTIL -DUPES exe_file";
- print "example: exeutil -dupes \\windows\\system\\krnl386.exe";
- print "";
- print "To display all imports:";
- print " EXEUTIL -IMPORTS exe_file";
- print "example: exeutil -imports \\windows\\progman.exe";
- print "";
- print "To display all imports of undocumented functions:";
- print " EXEUTIL -UNDOC exe_file";
- print "example: exeutil -undoc \\windows\\progman.exe";
- print "";
- print "To find any undocumented exports:";
- print " EXEUTIL -FINDUNDOC exe_file header_file";
- print "example: exeutil -findundoc \\win31\\system\\user.exe \\windev\\windows.h";
- exit;
- }
-
- function fail(s) { print "exeutil error: " s > stderr; exit; }
-
- function getpathname(filename)
- {
- local path, dir, tmp, x;
- if ((pathname = findfirst(filename)) != "")
- return pathname;
- else if ((pathname = findfirst("\\sourcer\\" filename)) != "")
- return pathname;
- else
- {
- # later - also look in ARGV[0] if supplied by AWK
- path = ENV["PATH"];
- split(path, dir, ";");
- for (x in dir)
- if ((pathname = findfirst(dir[x] "\\" filename)) != "")
- return pathname;
- }
- return "";
- }
-
- function run(filename, cmdline)
- {
- if ((pathname = getpathname(filename)) == "")
- fail("EXEUTIL requires " filename " on PATH");
- if ((ret = system(pathname " " cmdline)) != 0)
- fail("Cannot execute " filename " " cmdline);
- }
-
- function check_magic(f) \
- {
- if (filesize(f) == -1)
- fail("Can't find " f);
- run("exedump.exe", " -magic " f " > _tmp.tmp");
- getline s < "_tmp.tmp";
- close("_tmp.tmp");
- split(s, arr);
- if (arr[1] != "NE")
- fail(f " is an " arr[1] ", not an NE (new executable) file");
- }
-
- #######################################################################
- # EXEUTIL -DIFF
-
- function diffhdr() \
- {
- FS = " ";
- tempfile = "_tmp.tmp";
- run("exedump.exe", " -nobanner -noreloc " files[2] " > " tempfile);
-
- printf("Entry point differences: \n");
-
- while (getline < tempfile)
- {
- if ($0 == "begin entry")
- in_entry = 1;
- else if (in_entry)
- {
- if (NF == 0)
- break;
- else if ((NF == 3) || (NF == 4))
- entrytab2[++line] = $0;
- }
- odometer();
- }
-
- for (x in entrytab)
- {
- split(entrytab[x], arr);
- if (arr[4])
- func1[arr[3]] = arr[4]; # ord = func
- else
- func1[arr[3]] = "(unnamed @" arr[3] ")" ;
- }
- for (x in entrytab2)
- {
- split(entrytab2[x], arr);
- if (arr[4])
- func2[arr[3]] = arr[4]; # ord = func
- else
- func2[arr[3]] = "(unnamed @" arr[3] ")" ;
- }
-
- # list functions in ne1 that are different or missing in ne2
- printf("\t%30s\t%s\n", files[1], files[2]);
- for (i in func1)
- if (func1[i] != func2[i]) ## difference between versions
- {
- printf("%u\t%-30s\t%s\n", i, func1[i], func2[i]);
- delete func2[i]; ## said something once, why said it again
- }
-
- # list functions in ne2 that are different or missing in ne1
- printf("\n\t%30s\t%s\n", files[2], files[1]);
- for (i in func2)
- if (func2[i] != func1[i])
- printf("%u\t%-30s\t%s\n", i, func2[i], func1[i]);
-
- delete func1;
- delete func2;
- delete entrytab2;
- }
-
- #######################################################################
- # EXEUTIL -DUPES
-
- function dupes() \
- {
- FS = " ";
- printf("Multiple entry points for same segment:offset: \n");
-
- for (x in entrytab)
- {
- split(entrytab[x], arr);
- s = arr[1];
- o = arr[2];
- if ((s in so) && (o in so[s]))
- duplicate[s][o][entrytab[x]] = 1;
- else
- so[s][o] = entrytab[x];
- }
-
- for (s in duplicate)
- for (o in duplicate[s])
- {
- print so[s][o];
- for (f in duplicate[s][o])
- print f;
- print "";
- }
-
- delete arr;
- delete duplicate;
- delete so;
- }
-
- #######################################################################
- # EXEUTIL -IMPORTS
-
- function load_winfunc() \
- {
- if (winfunc)
- return; # already loaded
-
- if ((winfunc_dat2 = getpathname(winfunc_dat)) != "")
- {
- while (getline < winfunc_dat2)
- if ($0 !~ /;/) # discard comments
- winfunc[$1 "_" $3] = $4;
- close(winfunc_dat2);
- }
- else
- fail("can't find .DAT file -- " winfunc_dat);
- }
-
- function imports() \
- {
- FS = " ";
- load_winfunc();
-
- printf("%s imported references: \n", toupper(files[1]));
-
- for (mod in reloc)
- {
- if (mod == modname)
- {
- # internal references really, probably unnamed!
- for (ordnum in reloc[modname])
- {
- internal_reloc[ordnum]++;
- if ((s = winfunc[modname "_" ordnum]))
- internal_reloc_name[ordnum] = s;
- else
- internal_reloc_name[ordnum] = "?";
- }
- }
- else
- {
- for (ordnum in reloc[mod])
- {
- if (winfunc[mod "_" ordnum])
- printf("\t%s (%s.%s)",
- winfunc[mod "_" ordnum], mod, ordnum);
- else
- printf("\t%s.%s", mod, ordnum);
- if (reloc[mod][ordnum] > 10)
- printf(" -- %u references\n", reloc[mod][ordnum]);
- else
- print "";
- }
- }
- }
-
- if (internal_reloc)
- {
- print "";
- printf("%s internal references: \n", toupper(files[1]));
- for (f in internal_reloc)
- {
- printf("\t%s (%s.%s)", internal_reloc_name[f], modname, f);
- if (internal_reloc[f] > 10)
- printf(" -- %u references\n", internal_reloc[f]);
- else
- print "";
- }
- }
- }
-
- #######################################################################
- # EXEUTIL -UNDOC (imports)
-
- function load_undocwin() \
- {
- if (undocwin)
- return; # already loaded
-
- if ((uwin_dat2 = getpathname(uwin_dat)) != "")
- {
- while (getline < uwin_dat2)
- if ($0 !~ /;/) # discard comments
- undocwin[$1 "_" $3] = $4;
- close(uwin_dat2);
- }
- else
- fail("can't find .DAT file -- " uwin_dat);
- }
-
- function getundoc() \
- {
- FS = " ";
- load_undocwin();
-
- printf("%s undocumented imports: \n", toupper(files[1]));
-
- for (mod in reloc)
- for (ordnum in reloc[mod])
- if (undocwin[mod "_" ordnum])
- {
- printf("\t%s (%s.%s)",
- undocwin[mod "_" ordnum], mod, ordnum);
- if (reloc[mod][ordnum] > 10)
- printf(" -- %u references\n", reloc[mod][ordnum]);
- else
- print "";
- }
-
- if (reloc["KERNEL"][50]) # GetProcAddress
- print "\tuses run-time dynamic linking (GetProcAddress)" ;
-
- if (internal_reloc)
- for (f in internal_reloc)
- if (undocwin[modname "_" f] == internal_reloc_name[f])
- {
- printf("\t%s (%s.%s) -- INTERNAL",
- internal_reloc_name[f], modname, f);
- if (internal_reloc[f] > 10)
- printf(" -- %u references\n", internal_reloc[f]);
- else
- print "";
- }
- }
-
- #######################################################################
- # EXEUTIL -FINDUNDOC (exports)
-
- (not_first_file == 0) && (FILENAME != ARGV[2]) \
- {
- not_first_file = 1;
- FS = "[^A-Za-z0-9_]+";
- }
-
- # print status report every 100 lines
- function odometer()
- {
- if ((NR % 100) == 0)
- printf("%u %s\t\t\t\r",
- NR,
- (FILENAME=="_tmp.tmp" ? files[1] : FILENAME)) > stderr;
- }
-
- {
- odometer();
- }
-
- # for every file other than the first one specified
- not_first_file == 1 \
- {
- for (i=1; i<=NF; i++) # for each field in record
- if ($i ~ /^[A-Za-z_]/) # if valid identifier
- doc[toupper($i)]++; # put in dictionary
- }
-
- # maybe use if (FILENAME ~ "*.H")???
- # /^.*[ \t]+FAR[ \t]+.*(PASCAL)|(cdecl)|(WINAPI)|(API)[ \t]+.*\(.*\)?;?$/ &&
- # $1 != "typedef" {
- # sub(/\(.*;?/, "", $0) ;
- # # resplit and print last field
- # nf = split($0, f) ;
- # doc[toupper(f[nf])]++;
- # }
-
- function findundoc()
- {
- FS = " "; # restore, since changed it
- print ";" ;
- print "; Functions in " files[1] " but not in " files[2] ":" ;
- if (description)
- print "; " description ;
- print ";" ;
- # for every entry in the entry table
- for (x in entrytab)
- {
- split(entrytab[x], arr); # split line into fields
- if (arr[4])
- {
- if (! (arr[4] in doc)) # if name not in doc array
- printf("%s . %d\t%s\n", # display it
- modname, arr[3], arr[4]); # modname . ord \t funcname
- }
- }
- }
-
- #######################################################################
- #
- # Take output from EXEDUMP and put into arrays
- #
-
- # EXEDUMP output for entry table looks like this:
- # begin entry
- # 1 007b 1 TASKMANDLGPROC
- # 1 1234 2
- # AWK assigns the first field to $1, the second to $2, and the number
- # of fields to NF; NR is the record number. Thus, segment is $1,
- # offset is $2, ordinal is $3, and name (optional) is $4.
-
- # for every line in entry table
- $0 == "begin entry", NF == 0 \
- {
- if (NF == 3 || NF == 4) # if three or four fields...
- entrytab[NR] = $0; # put entire line into array
- }
-
- # EXEDUMP output for relocation table looks like this:
- # begin reloc
- # 1 031e [m] USER 135 PTR
- # 1 0302 [m] KERNEL 158 PTR
- # 1 311d [n] WMHANDLER_WNDPROC /* callfunc 1 */ OFF
- # The type (such as [m] or [n]) is $3. For [m], the module is $4 and
- # the ordinal is $5; for [n], the ordinal is $7, and the name is $4.
-
- # for every line in relocation table
- $0 == "begin reloc", NF == 0 \
- {
- if (do_reloc)
- {
- if ($3 == "[m]") # mod.ord reference
- {
- reloc[$4][$5]++; # increment refcount for reloc[module][ordinal]
- }
- else if ($3 == "[n]") # internal reference
- {
- internal_reloc[$7]++; # increment refcount for ordinal#
- internal_reloc_name[$7] = $4; # save name for ordinal#
- }
- }
- }
-
- $1 == "description" \
- {
- split($0, arr, "\"");
- if (arr[2])
- description = arr[2];
- }
-
- $1 == "modname" \
- {
- modname = $2;
- }
-
- # main
- BEGIN \
- {
- print "Windows New-Executable (NE) Header Utilities version 1.0" > stderr;
- print "from \"Undocumented Windows\" by Schulman et al. " \
- "(Addison-Wesley, 1992)" > stderr;
- print "Copyright (c) 1992 Andrew Schulman. All rights reserved." > stderr;
- print "";
-
- winfunc_dat = "WINFUNC.DAT" ; # default
- uwin_dat = "UNDOCWIN.DAT" ;
-
- do_reloc = 0;
- do_options = 0;
- DO_DIFF = 1;
- DO_DUPES = 2;
- DO_FINDUNDOC = 4;
- DO_IMPORTS = 8;
- DO_UNDOC = 16;
-
- if ((ARGC < 2) || (ARGV[1] == "/?"))
- help();
-
- arg = toupper(ARGV[1]);
- if (arg == "-FINDUNDOC")
- do_options = DO_FINDUNDOC;
- else if (arg == "-DIFF")
- do_options = DO_DIFF;
- else if (arg == "-DUPES")
- do_options = DO_DUPES;
- else if (arg == "-IMPORTS")
- {
- do_reloc++;
- do_options = DO_IMPORTS;
- }
- else if (arg == "-UNDOC")
- {
- do_reloc++;
- do_options = DO_UNDOC;
- }
-
- for (i=2; i<ARGC; i++)
- files[++num_files] = ARGV[i];
-
- if (do_options == 0)
- help();
- if (num_files < 1)
- help();
-
- check_magic(files[1]);
-
- if (and(do_options, DO_DIFF))
- {
- if (num_files != 2)
- fail("-DIFF requires two NE files");
- check_magic(files[2]);
- }
-
- if (and(do_options, DO_FINDUNDOC))
- {
- if (num_files != 2)
- fail("-FINDUNDOC requires an NE file and a header");
- }
-
- if (do_reloc)
- run("exedump.exe", " -nobanner " files[1] " > _tmp.tmp");
- else
- run("exedump.exe", " -nobanner -noreloc " files[1] " > _tmp.tmp");
- ARGV[1] = "";
- ARGV[2] = "_tmp.tmp";
- okay = 1;
- }
-
- END \
- {
- if (! okay)
- exit;
-
- if (and(do_options, DO_DIFF)) diffhdr();
- if (and(do_options, DO_DUPES)) dupes();
- if (and(do_options, DO_FINDUNDOC)) findundoc();
- if (and(do_options, DO_IMPORTS)) imports();
- if (and(do_options, DO_UNDOC)) getundoc();
-
- system("del _tmp.tmp");
- }
-
-
-