home *** CD-ROM | disk | FTP | other *** search
- name clean
- page 55,132
- title 'CLEAN - Filter text file'
- ;
- ; CLEAN.ASM Filter to turn document files into
- ; normal text files.
- ;
- ; Usage is: C>CLEAN <infile >outfile
- ;
- ; All text characters are passed through with high bit stripped
- ; off. Form feeds, carriage returns, and line feeds are passed
- ; through. Tabs are expanded to spaces. All other control codes
- ; are discarded.
- ;
- ; Copyright (c) 1984, 1985 by Ray Duncan
- ;
- ; To assemble and link into an EXE file for execution:
- ;
- ; C>MASM CLEAN;
- ; C>LINK CLEAN;
- ;
- tab equ 09h ; ASCII tab code
- lf equ 0ah ; ASCII line feed
- ff equ 0ch ; ASCII form feed
- cr equ 0dh ; ASCII carriage return
- eof equ 01ah ; End-of-file marker
- blank equ 020h ; ASCII space code
-
- tab_wid equ 8 ; width of one tab stop
-
- stdin equ 0000 ; standard input device handle
- stdout equ 0001 ; standard output device handle
- stderr equ 0002 ; standard error device handle
-
- cseg segment para public 'CODE'
-
- assume cs:cseg,ds:dseg,es:dseg,ss:sseg
-
- clean proc far ; entry point from MS-DOS.
-
- mov ax,dseg ; make our data segment addressable
- mov ds,ax
- mov es,ax
-
- mov ah,30h ; check version of MS-DOS
- int 21h
- cmp al,2
- jae clean2 ; proceed, DOS 2.0 or greater
- mov dx,offset msg1 ; if DOS 1.x print error message
- mov ah,9 ; we must use the old MS-DOS
- int 21h ; string output function
- mov ah,0 ; exit via function 0
- int 21h
-
- clean2: call getchar ; get a character from input.
- and al,07fh ; turn off the high bit.
- cmp al,blank ; is it a control char?
- jae clean4 ; no. write it to output.
- cmp al,eof ; is it end of file?
- je clean9 ; yes, go write EOF mark and exit.
- cmp al,tab ; is it a tab?
- je clean7 ; yes, go expand it to spaces.
- cmp al,cr ; is it a carriage return?
- je clean3 ; yes, go process it.
- cmp al,lf ; is it a line feed?
- je clean3 ; yes, go process it.
- cmp al,ff ; is it a form feed?
- jne clean2 ; no. discard it.
-
- clean3: mov column,0 ; if acceptable control character,
- jmp clean5 ; we should be back at column 0.
-
- clean4: inc column ; if it's a non-ctrl char,
- ; then increment column counter
-
- clean5: call putchar ; write the char to output.
- jnc clean2 ; if OK, go back for another char.
-
- mov dx,offset msg2 ; write failed, display error
- mov cx,msg2_len ; message and exit
-
- clean6: mov bx,stderr ; issue error message to
- mov ah,40h ; standard error device
- int 21h
- mov ax,4c01h ; then return to DOS with an
- int 21h ; error code of 1.
-
- clean7: mov ax,column ; tab code detected, must expand
- cwd ; expand tabs to spaces.
- mov cx,tab_wid ; divide the current column counter
- idiv cx ; by the desired tab_width
- sub cx,dx ; tab_width minus the remainder is
- add column,cx ; number of spaces to send out
-
- clean8: push cx ; move to the next tab position.
- mov al,blank
- call putchar ; send an ASCII blank
- pop cx
- loop clean8 ; loop until tab stop reached
- jmp clean2 ; go get another character
-
- clean9: call putchar ; write out the EOF mark,
- mov ax,4c00h ; and exit to DOS with
- int 21h ; return code of zero
-
- clean endp
-
- getchar proc near ; get a character from
- ; the Standard Input device...
- mov bx,stdin ; handle for Standard Input
- mov cx,1 ; length to read = 1 byte
- mov dx,offset ibuff ; address of input buffer
- mov ah,3fh ; function 3FH = read
- int 21h ; transfer to DOS
- or ax,ax ; any characters read?
- jz getc1 ; if none, return EOF
- mov al,ibuff ; else, return the char in AL
- ret
- getc1: mov al,eof ; no chars read, return
- ret ; an End-of-File (EOF) mark.
- getchar endp
-
- putchar proc near ; send a character to
- ; the Standard Output device...
- mov obuff,al ; save char. to be written
- mov bx,stdout ; handle for Standard Output
- mov cx,1 ; length to write = 1 byte
- mov dx,offset obuff ; address of output
- mov ah,40h ; function 40H = write
- int 21h ; transfer to DOS
- cmp ax,1 ; was char. really written?
- jne putc1
- clc ; yes, return carry = 0
- ret ; as success signal.
- putc1: stc ; write failed, return carry = 1
- ret ; as error signal (device is full).
- putchar endp
-
- cseg ends
-
- dseg segment para public 'DATA'
-
- ibuff db 0 ; temporary storage for character
- ; read from input stream
-
- obuff db 0 ; temporary storage for character
- ; sent to output stream
-
- column dw 0 ; current column counter
-
- msg1 db cr,lf
- db 'clean: need MS-DOS version 2 or greater.'
- db cr,lf,'$'
-
- msg2 db cr,lf
- db 'clean: disk is full.'
- db cr,lf
- msg2_len equ $-msg2
-
- dseg ends
- ; declare stack segment
- sseg segment para stack 'STACK'
-
- dw 64 dup (?)
- sseg ends
-
- end clean