home *** CD-ROM | disk | FTP | other *** search
- title CLEAN - Text file filter
- page 55,132
- ;
- ; CLEAN.ASM Filter to turn document files into
- ; normal text files.
- ;
- ; Copyright 1988 Ray Duncan
- ;
- ; Build: C>MASM CLEAN;
- ; C>LINK CLEAN;
- ;
- ; Usage: C>CLEAN <infile >outfile
- ;
- ; All text characters are passed through with high
- ; bit stripped off. Form feeds, carriage returns,
- ; and line feeds are passed through. Tabs are expanded
- ; to spaces. All other control codes are discarded.
- ;
-
- tab equ 09h ; ASCII tab code
- lf equ 0ah ; ASCII line feed
- ff equ 0ch ; ASCII form feed
- cr equ 0dh ; ASCII carriage return
- blank equ 020h ; ASCII space code
- eof equ 01ah ; Ctrl-Z End-of-file
-
- tabsiz equ 8 ; width of tab stop
-
- bufsiz equ 128 ; size of input and
- ; output buffers
-
- stdin equ 0000 ; standard input handle
- stdout equ 0001 ; standard output handle
- stderr equ 0002 ; standard error handle
-
-
- _TEXT segment word public 'CODE'
-
- assume cs:_TEXT,ds:_DATA,es:_DATA,ss:STACK
-
- clean proc far ; entry point from MS-DOS.
-
- push ds ; save DS:0000 for final
- xor ax,ax ; return to MS-DOS, in case
- push ax ; Function 4CH can't be used.
-
- mov ax,_DATA ; make data segment addressable
- mov ds,ax
- mov es,ax
-
- mov ah,30h ; check version of MS-DOS
- int 21h
- cmp al,2 ; MS-DOS 2.0 or later?
- jae clean1 ; jump if version OK
-
- ; MS-DOS 1.x, display error
- ; message and exit...
- mov dx,offset msg1 ; DS:DX = message address
- mov ah,9 ; Fxn 9 = display string
- int 21h ; transfer to MS-DOS
- ret ; then exit the old way
-
- clean1: call init ; initialize input buffer
-
- clean2: call getc ; get character from input
- jc clean9 ; exit if end of stream
-
- and al,07fh ; strip off high bit
-
- cmp al,blank ; is it a control char?
- jae clean4 ; no, write it
-
- cmp al,eof ; is it end of file?
- je clean8 ; yes, write EOF and exit
-
- cmp al,tab ; is it a tab?
- je clean6 ; yes, expand it to spaces
-
- cmp al,cr ; is it a carriage return?
- je clean3 ; yes, go process it.
-
- cmp al,lf ; is it a line feed?
- je clean3 ; yes, go process it.
-
- cmp al,ff ; is it a form feed?
- jne clean2 ; no, discard it
-
- clean3: mov column,0 ; if CR, LF, or FF
- jmp clean5 ; reset column to zero
-
- clean4: inc column ; if non-control char.
- ; increment column counter
-
- clean5: call putc ; write char. to stdout
- jnc clean2 ; if disk not full,
- ; get another character
-
- ; write failed...
- mov dx,offset msg2 ; DS:DX = error message
- mov cx,msg2_len ; CX = message length
- mov bx,stderr ; BX = standard error handle
- mov ah,40h ; Fxn 40H = write
- int 21h ; transfer to MS-DOS
-
- mov ax,4c01h ; Fxn 4CH = terminate
- ; return code = 1
- int 21h ; transfer to MS-DOS
-
- clean6: mov ax,column ; tab code detected
- cwd ; tabsiz - (column MOD tabsiz)
- mov cx,tabsiz ; is number of spaces needed
- idiv cx ; to move to next tab stop
- sub cx,dx
-
- add column,cx ; also update column counter
-
- clean7: push cx ; save spaces counter
-
- mov al,blank ; write an ASCII space
- call putc
-
- pop cx ; restore spaces counter
- loop clean7 ; loop until tab stop
-
- jmp clean2 ; get another character
-
- clean8: call putc ; write EOF mark
-
- clean9: call flush ; write last output buffer
- mov ax,4c00h ; Fxn 4CH = terminate
- ; return code =0
- int 21h ; transfer to MS-DOS
-
- clean endp
-
-
- getc proc near ; get char. from stdin
- ; returns Carry = 1 if
- ; end of input, else
- ; AL = char, Carry = 0
-
- mov bx,iptr ; get input buffer pointer
- cmp bx,ilen ; end of buffer reached?
- jne getc1 ; not yet, jump
-
- ; more data is needed...
- mov bx,stdin ; BX = standard input handle
- mov cx,bufsiz ; CX = length to read
- mov dx,offset ibuff ; DS:DX = buffer address
- mov ah,3fh ; Fxn 3FH = read
- int 21h ; transfer to MS-DOS
- jc getc2 ; jump if read failed
-
- or ax,ax ; was anything read?
- jz getc2 ; jump if end of input
-
- mov ilen,ax ; save length of data
- xor bx,bx ; reset buffer pointer
-
- getc1: mov al,[ibuff+bx] ; get char. from buffer
- inc bx ; bump buffer pointer
-
- mov iptr,bx ; save updated pointer
- clc ; return char in AL
- ret ; and Carry = 0 (clear)
-
- getc2: stc ; end of input stream
- ret ; return Carry = 1 (set)
-
- getc endp
-
-
- putc proc near ; send char. to stdout
- ; returns Carry = 1 if
- ; error, else Carry = 0
-
- mov bx,optr ; store character into
- mov [obuff+bx],al ; output buffer
-
- inc bx ; bump buffer pointer
- cmp bx,bufsiz ; buffer full?
- jne putc1 ; no, jump
-
- mov bx,stdout ; BX = standard output handle
- mov cx,bufsiz ; CX = length to write
- mov dx,offset obuff ; DS:DX = buffer address
- mov ah,40h ; Fxn 40H = write
- int 21h ; transfer to MS-DOS
- jc putc2 ; jump if write failed
-
- cmp ax,cx ; was write complete?
- jne putc2 ; jump if disk full
-
- xor bx,bx ; reset buffer pointer
-
- putc1: mov optr,bx ; save buffer pointer
- clc ; write successful,
- ret ; return Carry = 0 (clear)
-
- putc2: stc ; write failed or disk full,
- ret ; return Carry = 1 (set)
-
- putc endp
-
-
- init proc near ; initialize input buffer
-
- mov bx,stdin ; BX = standard input handle
- mov cx,bufsiz ; CX = length to read
- mov dx,offset ibuff ; DS:DX = buffer address
- mov ah,3fh ; Fxn 3FH = read
- int 21h ; transfer to MS-DOS
- jc init1 ; jump if read failed
- mov ilen,ax ; save actual bytes read
- init1: ret
-
- init endp
-
-
- flush proc near ; flush output buffer
-
- mov cx,optr ; CX = bytes to write
- jcxz flush1 ; exit if buffer empty
- mov dx,offset obuff ; DS:DX = buffer address
- mov bx,stdout ; BX = standard output handle
- mov ah,40h ; Fxn 40H = write
- int 21h ; transfer to MS-DOS
- flush1: ret
-
- flush endp
-
- _TEXT ends
-
-
- _DATA segment word public 'DATA'
-
- ibuff db bufsiz dup (0) ; input buffer
- obuff db bufsiz dup (0) ; output buffer
-
- iptr dw 0 ; ibuff pointer
- ilen dw 0 ; bytes in ibuff
- optr dw 0 ; obuff pointer
-
- column dw 0 ; current column counter
-
- msg1 db cr,lf
- db 'clean: need MS-DOS version 2 or greater.'
- db cr,lf,'$'
-
- msg2 db cr,lf
- db 'clean: disk is full.'
- db cr,lf
- msg2_len equ $-msg2
-
- _DATA ends
-
-
- STACK segment para stack 'STACK'
-
- dw 64 dup (?)
-
- STACK ends
-
- end clean
-