;$Id$
;###############################################################################
;
; NAME:
;  GA_FIT
;
; PURPOSE:
;  Implementation of a genetic algorithm for surface fitting.
;
; CATEGORY:
;  DAVE, Data Analysis, RAINS, surface fitting
;
; AUTHOR:
;   Robert M. Dimeo, Ph.D.
;   NIST Center for Neutron Research
;   100 Bureau Drive
;   Gaithersburg, MD 20899
;   Phone: (301) 975-8135
;   E-mail: robert.dimeo@nist.gov
;   http://www.ncnr.nist.gov/staff/dimeo
;
; LICENSE:
;  The software in this file is written by an employee of
;  National Institute of Standards and Technology
;  as part of the DAVE software project.
;
;  The DAVE software package is not subject to copyright protection
;  and is in the public domain. It should be considered as an
;  experimental neutron scattering data reduction, visualization, and
;  analysis system. As such, the authors assume no responsibility
;  whatsoever for its use, and make no guarantees, expressed or
;  implied, about its quality, reliability, or any other
;  characteristic. The use of certain trade names or commercial
;  products does not imply any endorsement of a particular product,
;  nor does it imply that the named product is necessarily the best
;  product for the stated purpose. We would appreciate acknowledgment
;  if the DAVE software is used of if the code in this file is
;  included in another product.
;
;###############################################################################
;+
; NAME:
;       GA_FIT
;
; PURPOSE:
;
;		Implementation of a genetic algorithm/least-squares hybrid minimization for
;		use in parameter estimation and model fitting.  Function returns 1 if
;		successfully executed, 0 if there was an input error, or -1 if no
;		satisfactory solution was found.
;
; AUTHOR:
;
;       Robert M. Dimeo, Ph.D.
;		NIST Center for Neutron Research
;       100 Bureau Drive
;		Gaithersburg, MD 20899
;       Phone: (301) 975-8135
;       E-mail: robert.dimeo@nist.gov
;       http://www.ncnr.nist.gov/staff/dimeo
;
; CATEGORY:
;
;       Mathematics, parameter estimation
;
; CALLING SEQUENCE:
;		RET_VAL = GA_FIT(	FUNCNAME,								$
;							DATA = data,							$
;							PARAM_BOUNDS = param_bounds,			$
;							NGEN = ngen,							$
;							NITER = niter,							$
;							KEEP_BEST = keep_best,					$
;							NPOP = npop,							$
;							PCROSS = pcross,						$
;							PMUT = pmut,							$
;							AMP_MUT = amp_mut,						$
;							ERRMSG = ermsg,							$
;							PARMS = parms,							$
;							AVE_FIT = ave_fit,						$
;							BEST_FIT = best_fit,					$
;							GA_BEST_FIT = ga_best_fit,				$
;							LS_FIT = ls_fit,						$
;							SIGMA = sigma,							$
;							_EXTRA = extra)
;
;
; INPUT PARAMETERS (REQUIRED)
;
;		FUNCNAME:	string variable naming the function to be minimized.  In
;					keeping with the IDL "function" named in CURVEFIT, this
;					"function" is really written as a procedure as follows:
;
;						PRO myFunction, X, P, YOUT
;							YOUT = P[0] + X*P[1]
;						END
;					Note that you do not need to specify the partial derivatives.
;
; INPUT KEYWORDS (REQUIRED)
;
;		DATA:			3xN array of (x,y,yerr) values.  This is the data to which
;						the model function (FUNCNAME) is to be fit.  N denotes the
;						number of data points.
;		PARAM_BOUNDS:	2xM array of parameter bounds where M denotes the number of
;						parameters.
;						The k-th parameter bounds are given, for instance, by
;						PARAM_BOUNDS[0,k] = lower_parm_bound[k]
;						PARAM_BOUNDS[1,k] = upper_parm_bound[k]
;
; INPUT KEYWORDS (OPTIONAL)
;
;		NGEN:		number of generations over which the algorithm will "evolve"
;					(Default: 10)
;		NPOP:		number of solutions in the "gene pool".
;					(Default: 20M, where M=#parameters)
;		NITER:		number of times to perform the GA search (Default: 1)
;		LS_FIT:		keyword which, when set, executes a least-squares search upon
;					completion of the genetic search. (Default: 0, not set)
;		KEEP_BEST:	keyword which, when set, keeps the best fit "gene" in the "pool"
;					from one generation to the next (Default: 0, not set)
;		PCROSS:		Probability of "genes" swapping chromosomes at each reproductive
;					stage (generation). (Default: 0.9)
;		PMUT:		Probability of an individual parameter changing by AMP_MUT times the
;					standard deviation of the range (determined by PARAM_BOUNDS) of that
;					parameter.(Default: 0.01)
;		AMP_MUT:	Scale factor for the amount by which a parameter can change by
;					mutation. (Default: 0.1)
;
; OUTPUT KEYWORDS:
;
;		ERRMSG:		Error message providing information should the return value of
;					the function be different from 1.
;		PARMS:		The model parameters after successful refinement using the GA
;					and/or least-squares fit.
;		SIGMA:		The standard deviation of the model parameters if the least-squares
;					option was selected (i.e. by setting the LS_FIT keyword).
;		AVE_FIT:	Average fitness for the GA search as a function of generation.
;		BEST_FIT:	Best fitness in the population as a function of generation.
;		GA_BEST_FIT:	The best fit parameters as determined from the GA search only.
;		EXTRA:		Not currently used.
;
; REQUIREMENTS:
;
;	If you wish to use the least-squares minimization option (i.e. by setting the
;	LS_FIT keyword) then you must have the following routines from Craig Markwardt's
;	library: http://cow.physics.wisc.edu/~craigm/idl/idl.html
;
;		MPFIT.PRO
;		MPCURVEFIT.PRO
;
; COMMON BLOCKS:
;
;		None
;
; DISCLAIMER
;
;		This software is provided as is without any warranty whatsoever.
;		Permission to use, copy, modify, and distribute modified or
;		unmodified copies is granted, provided this disclaimer
;		is included unchanged.
;
; MODIFICATION HISTORY:
;
;       Written by Rob Dimeo, March 27, 2003
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
pro MP_FIT_FUN,x,parms,yfit,_Extra = extra
funcname = extra.funcname
yfit = call_function(funcname,x,parms)
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_create_pop,npop,param_bounds
psize = size(param_bounds)
nparms = psize[2]
upop = 1+bytarr(npop)
pop = upop#param_bounds[0,*] + $
	upop#(param_bounds[1,*]-param_bounds[0,*])*randomu(s,npop,nparms)
return,pop
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_evaluate_fitness,pinfo
pop = *(*pinfo).popptr
fitness = dblarr((*pinfo).npop)
x = (*(*pinfo).dataptr)[0,*]
ydat = (*(*pinfo).dataptr)[1,*]
err = (*(*pinfo).dataptr)[2,*]
dof = n_elements(x)-(*pinfo).nparms

for i = 0,(*pinfo).npop-1 do begin
	eval = call_function((*pinfo).funcname,x,pop[i,*])
	diff = ydat - eval
	chisq = (total((diff/err)^2))/dof
	fitness[i] = chisq
endfor
; This fitness function is entirely ad-hoc!!!!
fitness = 1.0/(1.0+4.0*fitness)
best_fit = where(fitness eq max(fitness))
*(*pinfo).best_ptr = (*(*pinfo).popptr)[best_fit[0],*]
*(*pinfo).fitptr = fitness
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_reproduce,pinfo
pop = *(*pinfo).popptr
fitness = *(*pinfo).fitptr
sFitness = fitness/total(fitness)
regions = [0.0,total(sFitness,/cumulative)]
spin = randomu(s,(*pinfo).npop)
idx = value_locate(regions,spin)
newpop = pop[idx,*]
*(*pinfo).popptr = newpop
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_crossover,pinfo
; Swap parameter values between solutions.
pop = *(*pinfo).popptr
pcross = (*pinfo).pcross

list_1 = indgen((*pinfo).npop)
list_2 = list_1[sort(randomu(s,n_elements(list_1)))]
rnd = randomu(s,(*pinfo).npop/2)
newpop = pop

for i = 0,((*pinfo).npop/2)-1 do begin
	parent_one = pop[i,*]
	parent_two = pop[list_2[i],*]
	if rnd[i] le (*pinfo).pcross then begin
	; swap a parameter
		parm_index = fix((*pinfo).nparms*randomu(s,1))
		newpop[2*i,parm_index] = parent_two[parm_index]
		newpop[2*i+1,parm_index] = parent_one[parm_index]
	endif
endfor
*(*pinfo).popptr = newpop
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_mutate,pinfo
; Add a normal deviate to a parameter depending on the mutation probability,
; pmut, and the mutation amplitude, mut_amp.
pop = *(*pinfo).popptr
pcross = (*pinfo).pcross
npop = (*pinfo).npop
amp_mut = (*pinfo).amp_mut
pmut = (*pinfo).pmut
param_bounds = *(*pinfo).parmbndptr

for i = 0,(*pinfo).npop-1 do begin
	rnd = randomu(s,(*pinfo).nparms)
	for j = 0,(*pinfo).nparms-1 do begin
		if rnd[j] le (*pinfo).pmut then begin
			plo = param_bounds[0,j] & phi = param_bounds[1,j]
			dev = ((*pinfo).amp_mut)*(phi-plo)*randomn(s,1)
			pop[i,j] = pop[i,j]+dev
;			if pop[i,j] lt plo then (*(*pinfo).parmbndptr)[0,j] = pop[i,j]
;			if pop[i,j] gt phi then (*(*pinfo).parmbndptr)[1,j] = pop[i,j]
		endif
	endfor
endfor

*(*pinfo).popptr = pop
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_evolve,pinfo
for i = 0,(*pinfo).ngen-1 do begin
	ret_val = ga_evaluate_fitness(pinfo)
	if i eq 0 then ave_fit = (moment(*(*pinfo).fitptr))[0] else $
		ave_fit = [ave_fit,(moment(*(*pinfo).fitptr))[0]]
	if i eq 0 then best_fit = max(*(*pinfo).fitptr) else $
		best_fit = [best_fit,max(*(*pinfo).fitptr)]
	ret_val = ga_reproduce(pinfo)
	ret_val = ga_crossover(pinfo)
	ret_val = ga_mutate(pinfo)
	if (*pinfo).keep_best eq 1 then (*(*pinfo).popptr)[0,*] = *(*pinfo).best_ptr
endfor
*(*pinfo).ave_fit_ptr = ave_fit
*(*pinfo).best_fit_ptr = best_fit
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function ga_lm_refine,pinfo
if (*pinfo).ls_fit eq 1 then begin
	x = (*(*pinfo).dataptr)[0,*]
	y = (*(*pinfo).dataptr)[1,*]
	yerr = (*(*pinfo).dataptr)[2,*]
	w = 1D/yerr^2
	bestfit = max(*(*pinfo).fitptr,best_index)
	parms = (*(*pinfo).popptr)[best_index,*]
    yf = mpcurvefit(x, y, w, parms, sigma, $
                    FUNCTION_NAME='MP_FIT_FUN', $
                    /quiet, /autoderivative,$
                    functargs = {funcname:(*pinfo).funcname}, $
                    chisq = chisq)
	(*(*pinfo).popptr)[best_index,*] = parms
	*(*pinfo).sigma_ptr = sigma
	(*pinfo).chi_sq = chisq
endif else begin
	ret_val = max(*(*pinfo).fitptr,bestindex)
	parms = (*(*pinfo).popptr)[bestindex,*]
endelse
*(*pinfo).parms_ptr = parms
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
FUNCTION GA_FIT,	FUNCNAME,								$
					NGEN = ngen,							$
					NITER = niter,							$
					DATA = data,							$
					KEEP_BEST = keep_best,					$
					NPOP = npop,							$
					PCROSS = pcross,						$
					PMUT = pmut,							$
					AMP_MUT = amp_mut,						$
					PARAM_BOUNDS = param_bounds,			$
					ERRMSG = ermsg,							$
					PARMS = parms,							$
					AVE_FIT = ave_fit,						$
					BEST_FIT = best_fit,					$
					GA_BEST_FIT = ga_best_fit,				$
					LS_FIT = ls_fit,						$
					CHI_SQ = chi_sq,						$
					SIGMA = sigma,							$
					_EXTRA = extra

if n_params() eq 0 then begin
	ermsg = 'FUNCNAME is a required parameter'
	return,0
endif
ermsg = 'No errors generated'
if n_elements(data) eq 0 then begin
	ermsg = 'DATA keyword is a required keyword'
	return,0
endif

if n_elements(param_bounds) eq 0 then begin
	ermsg = 'PARAM_BOUNDS is a required keyword'
	return,0
endif
if n_elements(niter) eq 0 then niter = 1
psize = size(param_bounds)
nparms = psize[2]
orig_param_bounds = param_bounds
if n_elements(npop) eq 0 then npop = 20*nparms
if npop lt 2 then begin
	ermsg = 'NPOP must be at least 2'
	return,0
endif
if npop mod 2 ne 0 then npop = npop + 1
if n_elements(ngen) eq 0 then ngen = 10
if ngen eq 0 then begin
	ermsg = 'NGEN must be at least 1'
	return,0
endif
if n_elements(ls_fit) eq 0 then ls_fit = 0
if n_elements(pcross) eq 0 then pcross = 0.9
if n_elements(pmut) eq 0 then pmut = 0.01
if n_elements(amp_mut) eq 0 then amp_mut = 0.1
if n_elements(keep_best) eq 0 then keep_best = 0

; Create the initial population
;nparms = call_function(FUNCNAME)
; POP is NPOP X NPARMS
pop = ga_create_pop(npop,param_bounds)
; Create a pointer to an anonymous structure containing the relevant
; information to be passed among the various GA functions.
dataptr = ptr_new(data,/no_copy)
parmbndptr = ptr_new(param_bounds,/no_copy)
popptr = ptr_new(pop,/no_copy)

info = 	{	dataptr:dataptr,					$
			parmbndptr:parmbndptr,				$
			ngen:ngen,							$
			popptr:popptr,						$
			npop:npop,							$
			nparms:nparms,						$
			fitptr:ptr_new(/allocate_heap),		$
			ave_fit_ptr:ptr_new(/allocate_heap),	$
			best_fit_ptr:ptr_new(/allocate_heap),	$
			parms_ptr:ptr_new(/allocate_heap),	$
			sigma_ptr:ptr_new(/allocate_heap),	$
			pcross:pcross,						$
			pmut:pmut,							$
			amp_mut:amp_mut,					$
			ls_fit:ls_fit,						$
			keep_best:keep_best,				$
			chi_sq:0.0,							$
			best_ptr:ptr_new(/allocate_heap),	$
			funcname:funcname					}

pinfo = ptr_new(info)
bfit = 0.0
iter = 0
while (bfit lt 0.12) and (iter lt niter) do begin
	ret_val = ga_evolve(pinfo)
	best_fit = *(*pinfo).best_fit_ptr
	ave_fit = *(*pinfo).ave_fit_ptr
	; Does the user want to use a least-squares fit at this stage?
	ret_val = ga_lm_refine(pinfo)
	ret_val = ga_evaluate_fitness(pinfo)
	max_fitness = max(*(*pinfo).fitptr,index)
	bfit = (*(*pinfo).fitptr)[index]
	iter = iter + 1
endwhile

parms = *(*pinfo).parms_ptr

ga_best_fit = *(*pinfo).best_ptr
ret_val = ga_evaluate_fitness(pinfo)
max_fitness = max(*(*pinfo).fitptr,index)
bfit = (*(*pinfo).fitptr)[index]
print,'Best fitness: ',bfit
if (*pinfo).ls_fit then	sigma = *(*pinfo).sigma_ptr else sigma = -1L
data = *(*pinfo).dataptr
param_bounds = orig_param_bounds
chi_sq = (*pinfo).chi_sq
; Clean up all of the heap memory
heap_free,pinfo
if bfit lt 0.12 then begin
	ermsg = 'No solution found'
	return,-1
endif
return,1
end
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;