# Adjust DET densities into kudzu "heights"; divide by the integral
# tops and bottoms and densities are from stored_tree, if desired, tops and bottoms have
#   already been delta-translated by this stage
# sigma is the usual scalar
# phi, scalar, is phi in the math formulas
kudzu_heights <- function(stored_tree, sigma, phi){
	# extract parts of stored_tree
	tops <- stored_tree$tops
	bottoms <- stored_tree$bottoms
	densities <- stored_tree$densities
	L <- NROW(tops)
	p <- NCOL(tops)
	eph <- exp(phi)

	leaf_dim_integrals <- matrix(NA, nrow=L, ncol=p)
	leaf_integrals <- rep(NA, L)

	# loop over leaves
	for(l in 1:L) {

		# leaf width in units of sigma:
		widths <- tops[l,] - bottoms[l,]
		u <- widths / sigma
		eu <- exp(u)

		# get integral component for each dimension (not including DET density):
		leaf_dim_integrals[l, ] <- sigma * eu / (eu-1) * (log((eu*eph + (1/(eu*eph)) + 2)/(eph + 1/eph + 2)))

		# combine into integral for leaf l
		leaf_integrals[l] <- densities[l] * prod(leaf_dim_integrals[l, ])
	}

	# combine into total integral (denominator normalising kudzu into a PDF)
	kudzu_integral <- sum(leaf_integrals)

	return(list(heights = densities / kudzu_integral,
				leaf_dim_integrals = leaf_dim_integrals,
		        leaf_integrals = leaf_integrals,
		        kudzu_integral = kudzu_integral))
}


#' Adjust DET leaf height so a kudzu density function will integrate to one given a bandwidth, and export a kudzu list
#' 
#' @param stored_tree a tree list, such as kudzu::tree() returns
#' @param sigma a positive real number, the bandwidth of the inverse logistic ramp, larger sigma means more smoothing
#' @param phi a multiple of sigma, beyond which we regard the inverse logistic ramps as having negligible area under the curve; we use 6
#' 
#' @return a list specifying the kudzu density, containing a "tops" matrix, a "bottoms" matrix, a "heights" vector, "sigma", an "eb" matrix of exponentials of scaled bottoms, "et" likewise for tops, and "kudzu_version", a natural number for future back-compatibility
#' @export
#' 
#' @examples
#' irisdet <- tree(data=iris,
#' 	             goal="density",
#' 	             grow_parameters=list(min_n_l=5, max_nodes=100),
#' 	             prune_parameters=NA_integer_,
#' 	             dep_var=NA_integer_)
#' iriskudzu <- tree2kudzu(stored_tree=irisdet,
#' 	             sigma=0.1,
#' 	             phi=6)
tree2kudzu <- function(stored_tree, sigma, phi) {
	# normalise leaf heights
	kudzu_heights_output <- kudzu_heights(stored_tree=stored_tree,
			                              sigma=sigma,
			                              phi=phi)

	return(list(tops=stored_tree$tops,
		        bottoms=stored_tree$bottoms,
		        heights=kudzu_heights_output$heights,
		        det_densities=stored_tree$densities,
		        leaf_dim_integrals = kudzu_heights_output$leaf_dim_integrals,
		        leaf_integrals = kudzu_heights_output$leaf_integrals,
		        kudzu_integral = kudzu_heights_output$kudzu_integral,
		        sigma=sigma,
		        eb=exp(stored_tree$bottoms / sigma),
		        et=exp(stored_tree$tops / sigma),
		        kudzu_version=1))
}




######################################### subfunctions for ise_kudzu() start here ######################################

# functions for the case where a pair of leaves in some dimension have 2 unique edge values
#   - returns an integral of the part of the square density inside the sum and product over [-infinity, x_j]
ise_kudzu_2edges_1d <- function(x, kudzu_list, leaf1, leaf2, j) {
	ez <- exp(x / kudzu_list$sigma)
	eb <- kudzu_list$eb[leaf1,j] 
	et <- kudzu_list$et[leaf1,j]

	term1 <-  (et+eb) * log((ez+eb)/(ez+et)) / ((et-eb)^3)
	term2 <- ((et*ez) + (eb*ez) + 2*(et*eb))/(((et-eb)^2) * (ez+eb) * (ez+et))
	return(kudzu_list$sigma * et * et * (term1 - 2*term2))
}
# calls ise_kudzu_2edges_1d and evaluates [bottom - phi*sigma, top + phi*sigma]
ise_kudzu_2edges_1d_defint <- function(kudzu_list, leaf1, leaf2, j, phi) {
	return(ise_kudzu_2edges_1d(x=kudzu_list$tops[leaf1,j]+(phi*kudzu_list$sigma),
				               kudzu_list=kudzu_list,
				               leaf1=leaf1,
				               leaf2=leaf2,
				               j=j) - 
		   ise_kudzu_2edges_1d(x=kudzu_list$bottoms[leaf1,j]-(phi*kudzu_list$sigma),
				               kudzu_list=kudzu_list,
				               leaf1=leaf1,
				               leaf2=leaf2,
				               j=j))
}


# functions for the case where a pair of leaves in some dimension have 3 unique edge values
#   sharing a top
ise_kudzu_3edges_shared_top_1d <- function(x, kudzu_list, leaf1, leaf2, j) {
	ez <- exp(x / kudzu_list$sigma)
	eb1 <- kudzu_list$eb[leaf1,j] 
	et <- kudzu_list$et[leaf1,j]
	eb2 <- kudzu_list$eb[leaf2,j] 
	# there are three unique eb1, eb2, et

	term1 <- ((eb1*eb2 - et*et) * log(ez+et)) / (((et-eb1)^2) * ((et-eb2)^2))
	term2 <- (eb2 * log(ez+eb2)) / ((eb2-eb1) * ((eb2-et)^2))
	term3 <- (eb1 * log(ez+eb1)) / (((eb1-et)^2) * (eb1-eb2))
	term4 <- (et) / ((et-eb1)*(et-eb2)*(ez+et))
	return(kudzu_list$sigma * et * et * (term1 + term2 + term3 + term4))
}
ise_kudzu_3edges_1d_shared_top_defint <- function(kudzu_list, leaf1, leaf2, j, phi) {
	return(ise_kudzu_3edges_shared_top_1d(x=kudzu_list$tops[leaf1,j]+(phi*kudzu_list$sigma),
				                          kudzu_list=kudzu_list,
				                          leaf1=leaf1,
				                          leaf2=leaf2,
				                          j=j) - 
		   ise_kudzu_3edges_shared_top_1d(x=min(kudzu_list$bottoms[leaf1,j],
		   	                                    kudzu_list$bottoms[leaf2,j])-(phi*kudzu_list$sigma),
				                          kudzu_list=kudzu_list,
				                          leaf1=leaf1,
				                          leaf2=leaf2,
				                          j=j))
}

# likewise, but sharing a bottom
ise_kudzu_3edges_shared_bottom_1d <- function(x, kudzu_list, leaf1, leaf2, j) {
	ez <- exp(x / kudzu_list$sigma)
	eb <- kudzu_list$eb[leaf1,j] 
	et1 <- kudzu_list$et[leaf1,j]
	et2 <- kudzu_list$et[leaf2,j] 
	# there are three unique eb, et1, et2

	term1 <- ((et1*et2 - eb*eb) * log(ez+eb)) / (((eb-et1)^2) * ((eb-et2)^2))
	term2 <- (et2 * log(ez+et2)) / ((et2-et1) * ((et2-eb)^2))
	term3 <- (et1 * log(ez+et1)) / (((et1-eb)^2) * (et1-et2))
	term4 <- (eb*(et1 + et2)) / (((et1-eb)^2)*((et2-eb)^2)*(et2-et1)*(ez+eb))
	return(kudzu_list$sigma * et1 * et2 * (term1 + term2 + term3 + term4))
}
ise_kudzu_3edges_1d_shared_bottom_defint <- function(kudzu_list, leaf1, leaf2, j, phi) {
	return(ise_kudzu_3edges_shared_bottom_1d(x=max(kudzu_list$tops[leaf1,j],
		                                           kudzu_list$tops[leaf2,j])+(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j) - 
		   ise_kudzu_3edges_shared_bottom_1d(x=kudzu_list$bottoms[leaf1,j]-(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j))
}

# functions for the case where a pair of leaves in some dimension have3 unique edge values
#   and one leaf's top = the other's bottom
ise_kudzu_3edges_shared_top_bottom_1d <- function(x, kudzu_list, leaf1, leaf2, j) {
	ez <- exp(x / kudzu_list$sigma)
	# find which is the equal pair
	edges <- c(kudzu_list$eb[leaf1,j],
	           kudzu_list$eb[leaf2,j],
	           kudzu_list$et[leaf1,j],
	           kudzu_list$et[leaf2,j])
	eb <- min(edges) # unique bottom
	et <- max(edges) # unique top
	ej <- sort(unique(edges))[2] # junction

	term1 <- ((eb*et - ej*ej) * log(ez+ej)) / (((ej-eb)^2) * ((ej-et)^2))
	term2 <- (et * log(ez+et)) / ((et-ej)^2 * (et-eb))
	term3 <- (eb * log(ez+eb)) / (((eb-ej)^2) * (eb-et))
	term4 <- ((eb * ej * et) + (ej^3) - (ej*ej*(et-eb))) / (((ej-eb)^2)*((et-ej)^2)*(ez+ej))
	return(kudzu_list$sigma * ej * et * (term1 + term2 + term3 + term4))
}
ise_kudzu_3edges_1d_shared_top_bottom_defint <- function(kudzu_list, leaf1, leaf2, j, phi) {
	return(ise_kudzu_3edges_shared_top_bottom_1d(x=max(kudzu_list$tops[leaf1,j],
		                                           kudzu_list$tops[leaf2,j])+(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j) - 
		   ise_kudzu_3edges_shared_top_bottom_1d(x=min(kudzu_list$bottoms[leaf1,j],
		   	                                       kudzu_list$bottoms[leaf2,j])-(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j))
}



# functions for the case where a pair of leaves in some dimension have 4 unique edge values
ise_kudzu_4edges_1d <- function(x, kudzu_list, leaf1, leaf2, j) {
	ez <- exp(x / kudzu_list$sigma)
	edges <- c(kudzu_list$eb[leaf1,j],
	           kudzu_list$eb[leaf2,j],
	           kudzu_list$et[leaf1,j],
	           kudzu_list$et[leaf2,j] )
	
	int_terms <- rep(NA,4)
	for(i in 1:4) {
		int_terms[i] <- (edges[i] * log((edges[i]/ez)+1)) / prod(edges[i]-edges[-i])
	}
	return(kudzu_list$sigma * edges[3] * edges[4] * sum(int_terms))
}
ise_kudzu_4edges_1d_defint <- function(kudzu_list, leaf1, leaf2, j, phi) {
	return(ise_kudzu_4edges_1d(x=max(kudzu_list$tops[leaf1,j],
		                             kudzu_list$tops[leaf2,j])+(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j) - 
		   ise_kudzu_4edges_1d(x=min(kudzu_list$bottoms[leaf1,j],
		   	                         kudzu_list$bottoms[leaf2,j])-(phi*kudzu_list$sigma),
				                             kudzu_list=kudzu_list,
				                             leaf1=leaf1,
				                             leaf2=leaf2,
				                             j=j))
}







ise_switch_1d <- function(kudzu_list, leaf1, leaf2, j, phi) {
	edge_values <- c(kudzu_list$bottoms[leaf1,j], kudzu_list$tops[leaf1,j],
		             kudzu_list$bottoms[leaf2,j], kudzu_list$tops[leaf2,j])
	n_unique_edge_values <- length(unique(edge_values))
	if(n_unique_edge_values==2) func_1d <- ise_kudzu_2edges_1d_defint
	else if(n_unique_edge_values==4) func_1d <- ise_kudzu_4edges_1d_defint
	else if(n_unique_edge_values==3 & edge_values[1]==edge_values[3]) func_1d <- ise_kudzu_3edges_1d_shared_bottom_defint
	else if(n_unique_edge_values==3 & edge_values[2]==edge_values[4]) func_1d <- ise_kudzu_3edges_1d_shared_top_defint
	else if(n_unique_edge_values==3 & (edge_values[2]==edge_values[3] | edge_values[1]==edge_values[4])) func_1d <- ise_kudzu_3edges_1d_shared_top_bottom_defint
	       	        
	return(func_1d(kudzu_list=kudzu_list,
				   leaf1=leaf1,
				   leaf2=leaf2,
				   j=j,
				   phi=phi))
}

######################################### subfunctions for ise_kudzu() end here ######################################


# Calculate ISE given kudzu list and data
ise_kudzu <- function(data, kudzu_list, phi) {
	L <- NROW(kudzu_list$tops)
	p <- NCOL(kudzu_list$tops)
	# array of definite integrals, LxLxp
	ise_def_ints <- array(NA, dim=c(L,L,p))

	# populate diagonals with squares
	for(l in 1:L) {
		for(j in 1:p) {
			ise_def_ints[l,l,j] <- ise_kudzu_2edges_1d_defint(kudzu_list=kudzu_list,
				                                              leaf1=l,
				                                              leaf2=l,
				                                              j=j,
				                                              phi=phi)
		}

	}

	# populate upper triangles and copy to lower triangles
	for(l in 1:(L-1)) {
		for(ll in (l+1):L) {
			for(j in 1:p) {
				ise_def_ints[l,ll,j] <- (ise_switch_1d(kudzu_list=kudzu_list,
					                                   leaf1=l,
					                                   leaf2=ll,
					                                   j=j,
					                                   phi=phi))
				ise_def_ints[ll,l,j] <- ise_def_ints[l,ll,j]
			}
		}
	}

	# product of p dimensions is the integral of squared kudzu density function for that pair of leaves
	pairs_ise_matrix <- apply(ise_def_ints,c(1,2),prod)
	
	# include 1/d^2 normalising denominator and f_DET
	for(l in 1:L) {
		for(ll in 1:L) {
			pairs_ise_matrix[l,ll] <- pairs_ise_matrix[l,ll] * kudzu_list$heights[l] * kudzu_list$heights[ll]
		}
	}

	# sum of L pairs is the ISE contribution from that leaf

	# get predicted densities
# TO DO: check this vectorises properly:
	pred_densities <- apply(data, 1, predict_joint_kudzu, kudzu_list=kudzu_list)

	# get log likelihood as a by-product
	loglik <- sum(log(pred_densities))

	return(list(ise=sum(pairs_ise_matrix) - 2*mean(pred_densities),
		        pairs_ise_matrix=pairs_ise_matrix,
		        mean_pred_densities=mean(pred_densities),
		        loglik=loglik))
}
