library(mosaic)

# Problem 1
cheese = read.csv('../data/cheese.csv')


# Clearly volume is correlated with displays
boxplot(vol~disp, data=cheese)

# Part A: % increase in sales volume under displays

# One way: bootstrap the means
mu = mean(vol~disp, data=cheese)
mu[2]/mu[1]

# Bootstrap this ratio
boot_mean = do(1000)*{
	muboot = mean(vol~disp, data=resample(cheese))
	muboot[2]/muboot[1]	
}
head(boot_mean)
hist(boot_mean$X1)
confint(boot_mean$X1, level=0.95)
confint(boot_mean$X1)  # 95% by default


# However, this analysis misses something... look store by store
boxplot(log(vol)~disp, data=subset(cheese, store=='BALTI/WASH - SAFEWAY'))
boxplot(log(vol)~disp, data=subset(cheese, store=='SYRACUSE - PRICE CHOPPER'))

# Different stores have very different sales volumes,
# and different patterns of display activity.
# This might be a confounder.
# We can account for this using a model:
#  - put in dummy variables store by store
#  - work on the log scale (since log scale gives
#    us multiplicative change)
lm2 = lm(log(vol)~disp + store, data=cheese)
coef(lm2)


# Bootstrap this model
boot2 = do(1000)*{
	lm(log(vol)~disp + store, data=resample(cheese))
}
confint(boot2)

# Displays now have a lower estimated effect.
# Therefore important to account for store-level variability.
exp(confint(boot2$disp))
# This is the best answer for Part A: 35-43%


# Part B
# Now look at price

# lower prices during display weeks
# this is certainly a confounder: people bought more
# cheese at least partially due to lower prices.
boxplot(price ~ disp, data=cheese)

# Fit a model that accounts for price:
# this means fitting a demand curve, like in the milk data set...
# Except now we have dummies for store and display that shift the curve up/down
lm3 = lm(log(vol)~log(price) + store + disp, data=cheese)

# Notice the large negative coefficient on price: highly elastic
coef(lm3)

# Spot check the fit for a single store
# Specifically looking at Kroger DFW
dfwkroger = subset(cheese, store=='DALLAS/FT. WORTH - KROGER CO')
plot(vol~price, data=dfwkroger, las=1)
points(vol~price, data=subset(dfwkroger, disp==1), col='blue', pch=19)
points(vol~price, data=subset(dfwkroger, disp==0), col='red', pch=19)

# Go back and make the curves from the fitted coefficients
# No displays
curve(exp(9.37579 + 1.43461)*x^(-2.53159), add=TRUE, col='red')
# With displays
curve(exp(9.37579 + 1.43461 + 0.18540)*x^(-2.53159), add=TRUE, col='blue')


# Get a confidence interval for the display coefficient
boot3 = do(1000)*{
	lm(log(vol)~log(price) + store + disp, data=resample(cheese))
}

# Confidence interval for the % change due to display
exp(confint(boot3$disp))


# Part C: does the elasticity, i.e. the slope on log(price), change
# in the presence of an in-store display?

# Try a model with an interaction
lm4 = lm(log(vol)~log(price) + store + disp + disp:log(price), data=cheese)
coef(lm4)

# Get a confidence interval for the interaction coefficient
boot4 = do(1000)*{
	lm(log(vol)~log(price) + store + disp + disp:log(price), data=resample(cheese))
}
names(boot4)

# Is zero a plausible value for the interaction coefficient?
# Maybe, not probably not.
confint(boot4$log.price..disp)


plot(vol~price, data=dfwkroger, las=1)
points(vol~price, data=subset(dfwkroger, disp==1), col='blue', pch=19)
points(vol~price, data=subset(dfwkroger, disp==0), col='red', pch=19)

# Go back and make the curves from the fitted coefficients
# No displays
curve(exp(9.2609507 + 1.4292030)*x^(-2.4258328), add=TRUE, col='red')
# With displays
curve(exp(9.2609507 + 1.4292030 + 0.3415304)*x^(-2.4258328 - 0.1476102), add=TRUE, col='blue')


######
# Advanced optional stuff:
# defining and optimizing your own functions
######

# Profit equation when cost = 1.50
# blue = display weeks
# red = no-display weeks
my_cost = 1.5

# Define function for the expected profit under different possible conditions
profit = function(price, store, disp, cost=1.5, model=lm4) {
	predictors = data.frame(price= price, store=rep(store, length(price)), disp=disp)
	log_yhat = predict(model, newdata=predictors)
	exp(log_yhat)*(price-cost)
	# exp(9.2609507 + 1.4292030 + 0.3415304)*x^(-2.4258328 - 0.1476102) * (x-cost)
}


# Show the profit curves for DFW Kroger
curve(profit(x, store='DALLAS/FT. WORTH - KROGER CO', disp=1, cost=my_cost), col='blue', from=1.5, to=4)
curve(profit(x, store='DALLAS/FT. WORTH - KROGER CO', disp=0, cost=my_cost), col='red', add=TRUE)

out1 = optimize(profit, interval=c(0, 10), cost=my_cost, disp=1, store='DALLAS/FT. WORTH - KROGER CO', maximum=TRUE)
out1$maximum

out2 = optimize(profit, interval=c(0, 10), cost=my_cost, disp=0, store='DALLAS/FT. WORTH - KROGER CO', maximum=TRUE)
out2$maximum


# Show the profit curves with optimal prices
abline(v=out1$maximum, col='blue')
abline(v=out2$maximum, col='red')