;
; #######################################  Max Reason
; #####  Assembly Language Library  #####  copyright 1988-2000
; #######################################  Windows XBasic assembly language library
;
; subject to LGPL - see COPYING_LIB
;
; maxreason@maxreason.com
;
; for Windows XBasic
;
; PROGRAM "xlib"
; VERSION "0.017"
;
; revision November 2003
; Added _XstTry@12 and _XstGetExceptionInformation@8
; and other modifications to handle exceptions
; code by Ken Minogue
;
; revision September 2005 - GH
; - Converted to GoAsm
; - Deleted extraneous initialization code.
;
; revision April 2006 - GH
;	- went through all the code and deleted extraneous stuff
;	- optimized where possibilities were evident
;	- simplified memory allocation slightly
; - "cleaned up" code and comments
;
; revision 0.018 April 2007 - DS
; - modifed code to remove calls to Xit, XstCauseException, and XstSystemExceptionToException
;
;
; Mostly assembly language source code for XBasic language intrinsics
; like ABS(), LEFT$(), MID$(), TRIM$(), etc.
;
; This file contains assembly language routines for several purposes, including:
;   1. Startup initialization - XxxMain is called by xinit.s or app at startup
;   2. Error handling - handle "jmp %eeeErrorName" in XBasic source programs
;   3. Dynamic memory management - malloc, calloc, recalloc, free, etc...
;   4. Array management - DimArray, RedimArray, FreeArray
;   5. Intrinsic functions - ABS(), BIN$(), CHR$(), etc...
;   6. General support routines, especially for program development environment

; To create the program development environment, this file is assembled into
; object file "xlib.o" which is linked to, and becomes part of, the program
; development environment - aka PDE.  The global variables in xlib.s are
; therefore in the PDE executable file, and are read in by the PDE when it
; starts up.  The addresses of all xlib.s routines are therefore available
; to the compiler and calls to xlib.s routines in user programs are resolved
; without difficulty.

; When standalone XBasic programs are created by WindowsNT tools, they
; are linked to "xlib.dll", a DLL version of xlib.  Program references
; to xlib.s routines are thus resolved by the Windows program loader.

; External variables are not shared in the same manner in both cases.
; External variables are shared by all programs linked into a single
; executable (.DLL or .EXE).  External variables in user programs are
; not shared with .DLL libraries.  So function libraries developed in
; the PDE should not contain external variables - at least not external
; variables meant to be shared by programs or other function libraries
; that use the .DLL as a .DLL.  External variables are only shared with
; programs linked into a single .EXE or .DLL.


; #######################
; #####  CONSTANTS  #####  assembly language constants for this file
; #######################

#define  PAGE_NOACCESS	  				0x00000001
#define  MEM_RESERVE	  					0x00002000
#define  PAGE_READWRITE   				0x00000004
#define  MEM_COMMIT	  						0x00001000

; the following are values for sysException after calling XstExceptionToSystemException using exception values shown on right
#define EXCEPTION_MEMORY_ALLOCATION	0xE000001A  ; 26
#define EXCEPTION_INT_OVERFLOW			0xC0000095  ; 19
#define EXCEPTION_OUT_OF_BOUNDS			0xC000008C  ; 2
#define EXCEPTION_NODE_NOT_EMPTY		0xE000001B  ; 27
#define EXCEPTION_ARRAY_DIMENSION		0xE000001C  ; 28
#define EXCEPTION_INVALID_ARGUMENT	0xE000001D  ; 29


CRITICAL_SECTION STRUCT
	DebugInfo      dd	0
	LockCount      dd	0
	RecursionCount dd	0
	OwningThread   dd	0
	LockSemaphore  dd	0
	Reserved       dd	0
ENDS

; ############################################
; ############################################
; #####  		CODE SECTION  							 #####
; ############################################
; ############################################


; ########################
; #####  XxxMain ()  #####
; ########################

; _XxxMain is not called from this code. A minimal _XxxMain is retained,
; however, for the sake of compatibility with previous EXEs.

.code
align	8
_XxxMain:
	; [ebp+28] = %_StartApplication
	; invoke _Xit@4, [ebp+28]
	ret 32  												; remove 8 entry arguments and return to WinMain()


; ###########################
; #####  initialize ()  #####
; ###########################

initialize:
_initialize:

; allocate 512mb dynamic memory
	invoke _VirtualAlloc@16, 0, 0x20000000, MEM_RESERVE, PAGE_NOACCESS
	test	eax,eax										; did it fail?
	jnz	> allocOK										; 0 = failure
	ret															; Can't allocate memory, return with error

; dynamic memory successfully reserved
; now we commit the first 8mb
allocOK:
	mov	[Dyn_Base],eax							; store the base of Dyn_Headers_Start area
	mov	[Dyn_Headers_Start],eax			; ditto
	invoke _VirtualAlloc@16, eax, 0x00800000, MEM_COMMIT, PAGE_READWRITE
	test	eax,eax										; did it fail?
	jnz	>	allocFinished							; 0 = failure
	ret															; Can't allocate memory, return with error

allocFinished:
	mov	eax,[Dyn_Base]							; base of Dyn_Headers_Start area
	add	eax,0x00800000							; after Dyn_Headers_Start area (8mb)
	mov	[Dyn_Page_End],eax					; after committed area
	sub	eax,16											; eax = last header addr
	mov	[Dyn_Headers_End],eax
	mov d[%initialized],-1        	; dynamic memory has been initialized


; Build low header and high header to allocate stretchy space.
; To start off, all of dynamic memory is in one big free block.
	mov	eax,[Dyn_Headers_Start]			; eax -> first dyno header
	xor	ecx,ecx											; ready to zero some stuff later
	mov	[%pointers+0x40],eax				; first (and only) dyno block is a big one
	mov	ebx,[Dyn_Headers_End]				; ebx -> last dyno header
	mov	edx,ebx											; edx -> last dyno header
	sub	ebx,eax											; ebx = size of the one block
	mov	[eax+0],ebx									; addr-uplink(first) = size(first)
	mov	[eax+4],ecx									; addr-downlink(first) = 0 (none)
	mov	[eax+8],ecx									; size-uplink(first) = 0 (none)
	mov	[eax+12],ecx								; size-downlink(first) = 0 (none)
	mov	[edx+0],ecx									; addr-uplink(last) = 0
	mov	[edx+4],ebx									; addr-downlink(last) = size(first)
	mov	[edx+8],ecx									; size-uplink(last) = 0 (none)
	mov	[edx+12],ecx								; size-uplink(last) = 0 (none)   11/04/93
	or	ebx,0x80000000							; mark allocated  11/04/93
	mov	[edx+4],ebx									; mark allocated  11/04/93

; setup for Thread Safety
;	invoke _InitializeCriticalSection@4, addr Mem_Thread_Lock

; allocation routines blow up unless there's a permanent allocated
; memory block at the bottom of the dyno memory area, so make one!
	mov	esi,16											; esi = 16 bytes
	call	%____calloc								; allocate 16 byte chunk
	mov eax,0x80130001							; info word = allocated string
	mov	[esi-4],eax									; save info word
	mov d[esi-8],14									; save length
	mov	edi,esi											; destination
	mov	esi,ADDR %pdeString					; source
	mov	ecx,14											; count
	rep movsb
	ret


; ############################
; #####  XxxTerminate@0  #####
; ############################

_XxxTerminate@0:
	push  eax
	push	-1
	push	0
	call	_XxxXstFreeLibrary@8			; free all libraries
	mov	eax,[Dyn_Base]
	mov	ebx,[Dyn_Page_End]
	sub	ebx,eax
	push	0x4000
	push	ebx
	push	eax
	call	_VirtualFree@12						; free all DYNO memory
	call	_ExitProcess@4
	ret


; *********************
; *****  %_error  *****  errorNumber = ERROR (arg)
; *********************

%_error:
	mov	ebx,eax											; ebx = arg
	inc	ebx													; ebx = 0 if arg = -1
	jz	> getError									; get ERROR but don't update it
	xchg eax,[ERROR]								; eax = ERROR : ERROR = arg
	ret

getError:
	mov	eax,[ERROR]								; eax = ERROR : ERROR unchanged
	ret


; ******************************
; *****  SUPPORT ROUTINES  *****
; ******************************

; ##########################
; #####  %_ZeroMemory  #####
; ##########################

; optimized 24 November 2005, Greg Heller
; (should add MMX instructions for really big blocks someday)

%_ZeroMemory:
_XxxZeroMemory:
	mov	ecx,edi											; ecx = byte after last
	sub	ecx,esi											; ecx = # of bytes to zero
	jnb	> zmpos											; positive value

	xchg	esi,edi										; make esi < edi
	mov	ecx,edi											; ecx = byte after last
	sub	ecx,esi											; ecx = # of bytes to zero

zmpos:
	shr	ecx,2												; ecx = # of dwords to zero
	mov	edi,esi											; edi -> beginning of block to zero

	; On PPro, P2 and P3, REP MOVS and REP STOS can perform fast by moving an entire
	; cache line at a time. This happens only when the following conditions are met:
	;  both source and destination must be aligned by 8
	;  direction must be forward (direction flag cleared)
	;  the count (ECX) must be greater than or equal to 64
	;  the difference between EDI and ESI must be numerically greater than or equal to 32
	;  the memory type for both source and destination must be either write-back or writecombining
	; (you can normally assume this).
	; Under these conditions, the number of uops issued is approximately 215+2*ECX for REP
	; MOVSD and 185+1.5*ECX for REP STOSD, giving a speed of approximately 5 bytes per clock
	; cycle for both instructions, which is almost 3 times as fast as when the above conditions are
	; not met.

	; FROM How to optimize for the Pentium family of microprocessors
	; By Agner Fog, Ph.D.
	; Copyright  1996 - 2004

%_ZeroMem0:												; This routine is used for large blocks
	xor	eax,eax											; ready to write some zeros
	cmp ecx,64											; is it more than 64 dwords? (per Agner Fog's rule above)
	jl > %_ZeroMem1									; if so go to the medium routine
	test edi,-7											;	is edi 8 byte aligned?
	jz >Z0													;	if so, jump to copy routine
	mov d[edi],eax									;	do one copy
	add edi,4												; realign edi
	dec	ecx													; set ecx to new value
Z0:																;	at this point edi is 8 byte aligned
	cld															; make sure it is going the right way
	rep stosd												; write them!
	ret															; go home again

%_ZeroMem1:												; This routine is used for medium-sized blocks
	cmp ecx,8												; is it less than 32 bytes
	jl > %_ZeroMem2									; if so go to the short routine
	push edx												; preserve edx
	push esi												; preserve esi

	mov edx,ecx											; save how many we will write
	shr ecx,3												; divide by 8
	xor	eax,eax											; ready to write some zeros
	mov esi,ecx											; calculate how many will be written in the big loop
	shl esi,3												; and multiply it by 8

Z1:
	mov [edi+ 0],eax								; write 32 bytes at a time
	mov [edi+ 4],eax								;
	mov [edi+ 8],eax								;
	mov [edi+12],eax								;
	mov [edi+16],eax								;
	mov [edi+20],eax								;
	mov [edi+24],eax								;
	mov [edi+28],eax								;
	add edi,32											; move the pointer
	dec ecx													; decrement the loop count
	jnz < Z1												; are we there yet?

	sub edx,esi											; subtract how many written already from how many needed
	pop esi													; restore esi, since we used it
	mov ecx,edx											; move the new count to ecx for the next loop
	pop edx													; restore edx, since we used it

%_ZeroMem2:												; This routine is used for small blocks
	jecxz	> zm_exit									; skip if no bytes to zero
	xor	eax,eax											; ready to write some zeros
Z2:
	mov [edi],eax										; write 1 dword at a time
	add edi,4												; move the pointer
	dec ecx													; decrement the loop count
	jnz < Z2

zm_exit:													; done, we're gone!
	ret

; operating system cannot allocate memory
%_eeeErrorNT:
;	push EXCEPTION_MEMORY_ALLOCATION
	mov eax, EXCEPTION_MEMORY_ALLOCATION
	jmp	%_RuntimeError

; usually due to attempt to allocate a block already allocated
%_eeeAllocation:
;	push EXCEPTION_MEMORY_ALLOCATION
	mov eax, EXCEPTION_MEMORY_ALLOCATION
	jmp	%_RuntimeError

; various integer overflow errors (type conversion, etc)
%_eeeOverflow:
;	push EXCEPTION_INT_OVERFLOW
	mov eax, EXCEPTION_INT_OVERFLOW 
	jmp	%_RuntimeError

; array index < 0 or > UBOUND()  (only if compiled with -bc switch)
%_OutOfBounds:
;	push EXCEPTION_OUT_OF_BOUNDS
	mov eax, EXCEPTION_OUT_OF_BOUNDS
	jmp	%_RuntimeError

; attempt to ATTACH to non-null node
%_NeedNullNode:
;	push EXCEPTION_NODE_NOT_EMPTY
	mov eax, EXCEPTION_NODE_NOT_EMPTY 
	jmp	%_RuntimeError

; error in #dimensions (eg. DIM a[5]: a[3,2] = 25)  (only if compiled with -bc switch)
%_UnexpectedLowestDim:
;	push EXCEPTION_ARRAY_DIMENSION
	mov eax, EXCEPTION_ARRAY_DIMENSION
	jmp	%_RuntimeError

; error in #dimensions (eg. DIM a[5,5]: a[3] = 25)  (only if compiled with -bc switch)
%_UnexpectedHigherDim:
;	push EXCEPTION_ARRAY_DIMENSION
	mov eax, EXCEPTION_ARRAY_DIMENSION
	jmp	%_RuntimeError

%_InvalidFunctionCall:
;	push EXCEPTION_INVALID_ARGUMENT
	mov eax, EXCEPTION_INVALID_ARGUMENT
	jmp	%_RuntimeError

%_RuntimeError:
;	call _XstCauseException@4
	invoke _RaiseException@16 eax, 0, 0, 0
	ret

; ############################################
; ############################################
; #####  DATA  #####  DATA  #####  DATA  #####
; ############################################
; ############################################

.data
align	4
%initialized 			dd 0

Dyn_Base					dd 0						; Dyno page base
Dyn_Headers_Start	dd 0						; Dyno headers start here
Dyn_Headers_End		dd 0						; Dyno headers end here
Dyn_Page_End			dd 0						; Dyno page ends here

ERROR							dd 0						; ERROR	(XBASIC error number)


;align	4
;Mem_Thread_Lock CRITICAL_SECTION	; for memory Critical Section

align	4
%pdeString db "XBLite xbl.dll"

align	4
%dbase:
%pointers:
xxxPointers:
_XxxPointers:
	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	;  16, 32, 48, 64  ... 240, 256
	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	;  512, 1K, 2K, 4K, 8K, 16K, 32K, 64K,  128K, 256K, 512K, 1M, 2M, 4M, 8M, 16M
	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	;  32M, 64M, 128M, 256M, 512M, 1G, 2G, 4G,  8G, 16G, 32G, 64G, 128G, 256G, 512G
; 	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	;  ...
; 	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; 	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; 	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; 	dd	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0


; #################
; #####  END  #####
; #################
