CLS
InFile$="A SIMPLE STRING TO BE ENCODED USING A MINIMAL NUMBER OF BITS"
CALL Huffman(InFile$,OutFile$,NewFile$)
print:print:print
PRINT "In:  ";LEN(InFile$);InFile$
PRINT "Out: ";LEN(OutFile$)
PRINT "New: ";LEN(NewFile$);NewFile$
input,r
END
'*****************************************************************************
'   Huffman Encoding File Compression Technique
'
'   From: R Sedgwick.  Algorithms.  Reading, MA: Addison-Wesley.
'                      1984.  Second Ed.  pp  286 / 93.
'
'   Converted to Power Basic by M. Rosenberg CI$: [73707,2545]
'
SUB Huffman(InText$,OutText$,NewText$)
	SHARED N%,Heap%(),Count%()
	DIM Count%(1024),Heap%(1024),Dad%(1024),Code%(256),Leng%(256)

' Count the frequency of each character in the message to be encoded (P. 287)

	FOR I%=0 to 255 : Count%(I%)=0 : NEXT I%
	Csr%=0 :  DO : INCR Csr% : X%=ASC(MID$(InText$,Csr%,1)) : INCR Count%(X%)
			LOOP UNTIL Csr%=LEN(InText$)

' Initialize the heap array to point to non-zero frequency counts (P. 290)

	N%=0 : FOR I%=0 to 255 : IF Count%(I%)<>0 THEN INCR N% : Heap%(N%)=I%
		  NEXT I%
' Construct an indirect heap on the frequency values (P. 289)

	FOR K% = N% TO 1 STEP -1 : CALL PqDownHeap(K%) : NEXT K%

' Construct the trie (P. 290)
	DO : T%=Heap%(1) : Heap%(1)=Heap%(N%) : DECR N%
		CALL PqDownHeap(1)
		Count%(255+N%)=Count%(Heap%(1))+Count%(T%)
		Dad%(T%)=255+N% : Dad%(Heap%(1))=-255-N%
		Heap%(1)=255+N% : CALL PqDownHeap(1)
	LOOP UNTIL N%=1
	Dad%(255+N%)=0

' Reconstruct the information from the representation of the coding tree (P.291)
'    computed during the sifting process.

	FOR K% = 0 TO 255
	    IF Count%(K%)=0 THEN
			Code%(K%)=0 : Leng%(K%)=0
	    ELSE
			I%=0 : J&=1 : T%=Dad%(K%) : X%=0
			DO : IF T%<0 THEN X%=X%+J& : T%=0-T%
				T%=Dad%(T%) : J&=J&+J& : INCR I%
			LOOP UNTIL T%=0
			Code%(K%)=X% : Leng%(K%)=I%
	    END IF
	NEXT K%

' Use the computed representations of the code to encode the string (P. 292)

	J%=0 : OutText$="" : Hold$=""
	DO : INCR J%
		Char%=ASC(MID$(InText$,J%,1)) : Compr$=BIN$(Code%(Char%))
		DO WHILE LEN(Compr$)< Leng%(Char%) : Compr$="0"+Compr$ : LOOP
		Hold$=Hold$+Compr$
		IF LEN(Hold$)>8 THEN
			OutText$=OutText$+CHR$(Bin2Int(LEFT$(Hold$,8)))
			Hold$=RIGHT$(Hold$,LEN(Hold$)-8)
		END IF
	LOOP UNTIL J%=LEN(InText$)

' Add a byte at the end that contains any left-over bits

	IF LEN(Hold$)>0 THEN
		Hold$=Hold$+STRING$(8-LEN(Hold$),"0")
		OutText$=OutText$+CHR$(Bin2Int(LEFT$(Hold$,8)))
	END IF
'*****************************************************************************
' Unpack compressed string into character representation of binary

	J%=0 : UnCompr$="" : NewText$=""
	DO : INCR J%
		Hold$=MID$(OutText$,J%,1) : Hold$=BIN$(ASC(Hold$))
		DO WHILE LEN(Hold$)<8 : Hold$="0"+Hold$ : LOOP
		UnCompr$=UnCompr$+Hold$
	LOOP UNTIL J%=LEN(OutText$)

' Decode compressed string

	DO : FOR  K%=1 TO 256
			IF K%=256 THEN EXIT LOOP 		'All done
			IF  Leng%(K%)>0 THEN
				IF Bin2Int(LEFT$(UnCompr$,Leng%(K%)))=Code%(K%) THEN
					UnCompr$=RIGHT$(UnCompr$,LEN(UnCompr$)-Leng%(K%))
					NewText$=NewText$+CHR$(K%) : EXIT FOR
				END IF
			END IF
		NEXT K%
	LOOP UNTIL LEN(UnCompr$) = 0


END SUB 'Huffman

SUB PqDownHeap(K%)
' Build and maintain an indirect heap on the frequency values (P. 139)
'     reversing the inequalities since we want the smallest values first.

	SHARED N%,Heap%(),Count%()
	LOCAL J%,V%,Limit%
	V%=Heap%(K%) : Limit% = N%/2
	DO WHILE K% <= Limit%
	   J%=K%+K%
	   IF J%<N% THEN IF Count%(Heap%(J%)) > Count%(Heap%(J%+1)) THEN INCR J%
	   IF Count%(V%)<=Count%(Heap%(J%)) THEN Heap%(K%)=V% : EXIT SUB
	   Heap%(K%)=Heap%(J%) : Heap%(J%)=V% : K%=J%
	LOOP
END SUB 'PqDownHeap

'*****************************************************************************
FUNCTION Bin2Int(X$)
	X$=RTRIM$(X$) :X$=LTRIM$(X$) : Ll%=LEN(X$) : Ex%=0 : Tot%=0 : I%=Ll%
	DO WHILE I% > 0
	    IF MID$(X$,I%,1)="1" THEN Tot&=Tot&+(2^Ex&)
	    INCR Ex& : DECR I% : WEND
	Bin2Int=Tot&
END FUNCTION 'Bin2Int
