Weekly Sales Analysis

Loading data

#Loading sales from the downloaded file
sales <- vroom::vroom(here::here("data","sales.csv")) %>% 
  janitor::clean_names()


#Loading details from the downloaded file
details <- vroom::vroom(here::here("data","details.csv")) %>% 
  janitor::clean_names()


#Loading stores from the downloaded file
stores <- vroom::vroom(here::here("data","stores.csv")) %>% 
  janitor::clean_names()

Merging datasets

#Merging sales and details
sales_details <- left_join(sales, details, by = c("store" = "store", "date" = "date", "is_holiday" = "is_holiday"))

# Merging sales_details and stores
sales1 <- left_join(sales_details, stores, by = "store")

glimpse(sales1) 
## Rows: 421,570
## Columns: 16
## $ store        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dept         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ date         <chr> "05/02/2010", "12/02/2010", "19/02/2010", "26/02/2010", "…
## $ is_holiday   <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ weekly_sales <dbl> 25355, 45621, 43678, 19192, 21564, 21474, 22396, 26049, 5…
## $ temperature  <dbl> 42.3, 38.5, 39.9, 46.6, 46.5, 57.8, 54.6, 51.5, 62.3, 65.…
## $ fuel_price   <dbl> 2.57, 2.55, 2.51, 2.56, 2.62, 2.67, 2.72, 2.73, 2.72, 2.7…
## $ mark_down1   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mark_down2   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mark_down3   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mark_down4   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ mark_down5   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ cpi          <dbl> 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 210, 21…
## $ unemployment <dbl> 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 7.81, 7.8…
## $ type         <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ size         <dbl> 151315, 151315, 151315, 151315, 151315, 151315, 151315, 1…
skim(sales1) #check for NAs - exist in mark_down variables
(#tab:merging & inspecting)Data summary
Name sales1
Number of rows 421570
Number of columns 16
_______________________
Column type frequency:
character 2
logical 1
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
date 0 1 10 10 0 143 0
type 0 1 1 1 0 3 0

Variable type: logical

skim_variable n_missing complete_rate mean count
is_holiday 0 1 0.07 FAL: 391909, TRU: 29661

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
store 0 1.00 2.22e+01 12.79 1.00 11.00 2.20e+01 3.30e+01 4.50e+01 ▇▇▇▇▇
dept 0 1.00 4.43e+01 30.49 1.00 18.00 3.70e+01 7.40e+01 9.90e+01 ▇▇▅▂▆
weekly_sales 0 1.00 1.61e+04 22873.10 -4990.98 2095.37 7.67e+03 2.03e+04 6.72e+05 ▇▁▁▁▁
temperature 0 1.00 6.01e+01 18.45 -2.06 46.68 6.21e+01 7.43e+01 1.00e+02 ▁▃▆▇▃
fuel_price 0 1.00 3.36e+00 0.46 2.47 2.93 3.45e+00 3.74e+00 4.47e+00 ▆▆▇▇▁
mark_down1 270889 0.36 7.25e+03 8291.22 0.27 2240.27 5.35e+03 9.21e+03 8.86e+04 ▇▁▁▁▁
mark_down2 310322 0.26 3.33e+03 9475.36 -265.76 41.60 1.92e+02 1.93e+03 1.05e+05 ▇▁▁▁▁
mark_down3 284479 0.33 1.44e+03 9623.08 -29.10 5.08 2.46e+01 1.04e+02 1.42e+05 ▇▁▁▁▁
mark_down4 286603 0.32 3.38e+03 6292.38 0.22 504.22 1.48e+03 3.60e+03 6.75e+04 ▇▁▁▁▁
mark_down5 270138 0.36 4.63e+03 5962.89 135.16 1878.44 3.36e+03 5.56e+03 1.09e+05 ▇▁▁▁▁
cpi 0 1.00 1.71e+02 39.16 126.06 132.02 1.82e+02 2.12e+02 2.27e+02 ▇▁▁▂▆
unemployment 0 1.00 7.96e+00 1.86 3.88 6.89 7.87e+00 8.57e+00 1.43e+01 ▂▇▆▁▁
size 0 1.00 1.37e+05 60980.58 34875.00 93638.00 1.40e+05 2.03e+05 2.20e+05 ▅▂▅▂▇
# treating NAs in markdowns as 0 due to the interpretation of 0 promotional markdown
sales1 <- sales1 %>%
  mutate(mark_down1 = ifelse(is.na(mark_down1),0,mark_down1),
         mark_down2 = ifelse(is.na(mark_down2),0,mark_down2),
         mark_down3 = ifelse(is.na(mark_down3),0,mark_down3),
         mark_down4 = ifelse(is.na(mark_down4),0,mark_down4),
         mark_down5 = ifelse(is.na(mark_down5),0,mark_down5)) %>%
  mutate(date = dmy(date))

# negative weekly_sales values
check <- sales1 %>%
    filter(weekly_sales < 0) # check for negatives - 1200+


# There are some negative weekly sales observations. We have proceeded to interpret them as negative weekly net sales, indicating that these stores made losses in those weeks. 

Exploratory Data Analysis

Inspecting dataset

glimpse(sales1) 
## Rows: 421,570
## Columns: 16
## $ store        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dept         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ date         <date> 2010-02-05, 2010-02-12, 2010-02-19, 2010-02-26, 2010-03-…
## $ is_holiday   <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ weekly_sales <dbl> 25355, 45621, 43678, 19192, 21564, 21474, 22396, 26049, 5…
## $ temperature  <dbl> 42.3, 38.5, 39.9, 46.6, 46.5, 57.8, 54.6, 51.5, 62.3, 65.…
## $ fuel_price   <dbl> 2.57, 2.55, 2.51, 2.56, 2.62, 2.67, 2.72, 2.73, 2.72, 2.7…
## $ mark_down1   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ mark_down2   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ mark_down3   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ mark_down4   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ mark_down5   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cpi          <dbl> 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 210, 21…
## $ unemployment <dbl> 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 8.11, 7.81, 7.8…
## $ type         <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A…
## $ size         <dbl> 151315, 151315, 151315, 151315, 151315, 151315, 151315, 1…
skim(sales1) #check for NAs 
Table 1: Data summary
Name sales1
Number of rows 421570
Number of columns 16
_______________________
Column type frequency:
character 1
Date 1
logical 1
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
type 0 1 1 1 0 3 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 2010-02-05 2012-10-26 2011-06-17 143

Variable type: logical

skim_variable n_missing complete_rate mean count
is_holiday 0 1 0.07 FAL: 391909, TRU: 29661

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
store 0 1 2.22e+01 12.79 1.00 11.00 2.20e+01 3.30e+01 4.50e+01 ▇▇▇▇▇
dept 0 1 4.43e+01 30.49 1.00 18.00 3.70e+01 7.40e+01 9.90e+01 ▇▇▅▂▆
weekly_sales 0 1 1.61e+04 22873.10 -4990.98 2095.37 7.67e+03 2.03e+04 6.72e+05 ▇▁▁▁▁
temperature 0 1 6.01e+01 18.45 -2.06 46.68 6.21e+01 7.43e+01 1.00e+02 ▁▃▆▇▃
fuel_price 0 1 3.36e+00 0.46 2.47 2.93 3.45e+00 3.74e+00 4.47e+00 ▆▆▇▇▁
mark_down1 0 1 2.59e+03 6052.39 0.00 0.00 0.00e+00 2.81e+03 8.86e+04 ▇▁▁▁▁
mark_down2 0 1 8.80e+02 5084.54 -265.76 0.00 0.00e+00 2.20e+00 1.05e+05 ▇▁▁▁▁
mark_down3 0 1 4.68e+02 5528.87 -29.10 0.00 0.00e+00 4.54e+00 1.42e+05 ▇▁▁▁▁
mark_down4 0 1 1.08e+03 3894.53 0.00 0.00 0.00e+00 4.25e+02 6.75e+04 ▇▁▁▁▁
mark_down5 0 1 1.66e+03 4207.63 0.00 0.00 0.00e+00 2.17e+03 1.09e+05 ▇▁▁▁▁
cpi 0 1 1.71e+02 39.16 126.06 132.02 1.82e+02 2.12e+02 2.27e+02 ▇▁▁▂▆
unemployment 0 1 7.96e+00 1.86 3.88 6.89 7.87e+00 8.57e+00 1.43e+01 ▂▇▆▁▁
size 0 1 1.37e+05 60980.58 34875.00 93638.00 1.40e+05 2.03e+05 2.20e+05 ▅▂▅▂▇
# we found that weekly_sales, all of the markdowns (1 to 5), size, unemployment, cpi, temperature and fuel_price 

Summary details:

“Sales1” (created by merging sales, details and stores) contains 16 variables, only four of which are categorical: type, is_holiday, store, dept. The data contains 13 numerical variables, 1 chatacter variable, 1 date and 1 logical variable. “is_holiday” is specified as the logical variable.

The NAs from the markdown files have been replaced with 0s.

Within “sales1”, the variable “weekly_sales” there are some values which are negative. Whereas this should not be possible considering a strict definition of the term sales/revenues, whe interpret the variable as comprising also of depreciated products/lost merchandise. As such, we will keep the negative values as well.

Important Variables

weekly_sales: a density plot shows that most stores have a similar amount of weekly sales, with the density plot presenting a stark peak and being skewed to the right.The median measures around 7667 with the maximum amount being 671916. On a histogram, this value and other like it appear as outliers. We also found clear peaks in sales in the period of late December and late November of both 2011 and 2012.

favstats(sales1$weekly_sales)
minQ1medianQ3maxmeansdnmissing
-4.99e+032.1e+037.67e+032.03e+046.72e+051.61e+042.29e+044215700
sales1 %>%
  ggplot(aes(x= weekly_sales)) +
  geom_histogram(binwidth=5000) +
  scale_x_continuous(labels = scales::comma, limits = c(0,200000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Histogram of Right Skewed distribution of weekly_sales variable",x="Weekly Sales",y=NULL) +
  theme_minimal()

all of the markdowns (1 to 5) The various markdown types represent the kinds of discounts which can be brought on products. We analysed these together to try and see which type seems to be more successful using boxplots. At a glance, it is clear that markdown_1 and markdown_5’s distributions are more skewed to the right, whereas the other markdowns are skewed very little. Furthermore, markdown_3 generates relatively few outliers compared to the other ones.

m1 <- sales1 %>%
  ggplot(aes(x= mark_down1)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::comma, limits = c(-10000,112000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(x="Markdown 1",y=NULL) +
  theme_minimal() +
  theme(axis.text.y = element_blank())

m2 <- sales1 %>%
  ggplot(aes(x= mark_down2)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::comma, limits = c(-10000,112000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(x="Markdown 2",y=NULL) +
  theme_minimal() +
  theme(axis.text.y = element_blank())

m3 <- sales1 %>%
  ggplot(aes(x= mark_down3)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::comma, limits = c(-10000,112000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(x="Markdown 3",y=NULL) +
  theme_minimal() +
  theme(axis.text.y = element_blank())

m4 <- sales1 %>%
  ggplot(aes(x= mark_down4)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::comma, limits = c(-10000,112000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(x="Markdown 4",y=NULL) +
  theme_minimal() +
  theme(axis.text.y = element_blank())

m5 <- sales1 %>%
  ggplot(aes(x= mark_down5)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::comma, limits = c(-10000,112000)) +
  scale_y_continuous(labels = scales::comma) +
  labs(x="Markdown 5",y=NULL) +
  theme_minimal() +
  theme(axis.text.y = element_blank())

ggarrange(m1,m2,m3,m4,m5,nrow=5)

Correlation Exploration between Variables

To Explore the correlation between the various variables, we analysed the correlations between weekly_sales, temperature, fuel_price, markdown_1, markdown_2, markdown_3, markdown_4, markdown_5, cpi, unemployment and size. There is quite a high correlation amongst the markdowns, with 1 and 3 presentin an correlation factor of 0.83, but it is insignificant as it doesn’t make much sense.

cor(sales1[,c(1,2,4:14,16)])
##                  store      dept is_holiday weekly_sales temperature fuel_price
## store         1.000000  0.024004  -0.000548    -0.085257    -0.05010   0.065290
## dept          0.024004  1.000000   0.000916     0.148003     0.00444   0.003572
## is_holiday   -0.000548  0.000916   1.000000     0.012758    -0.15595  -0.078281
## weekly_sales -0.085257  0.148003   0.012758     1.000000    -0.00230  -0.000116
## temperature  -0.050097  0.004437  -0.155949    -0.002299     1.00000   0.143859
## fuel_price    0.065290  0.003572  -0.078281    -0.000116     0.14386   1.000000
## mark_down1   -0.059844  0.001494  -0.003521     0.047231    -0.02642   0.297056
## mark_down2   -0.033829  0.000587   0.207604     0.020738    -0.17967   0.029153
## mark_down3   -0.020331  0.001475   0.266471     0.038528    -0.05603   0.018615
## mark_down4   -0.042724  0.001937   0.011565     0.037473    -0.05028   0.166622
## mark_down5   -0.012452  0.002668  -0.015235     0.050438    -0.01475   0.215420
## cpi          -0.211088 -0.007477  -0.001944    -0.020941     0.18211  -0.164210
## unemployment  0.208552  0.007837   0.010460    -0.025909     0.09673  -0.033853
## size         -0.182881 -0.002966   0.000593     0.243812    -0.05831   0.003361
##              mark_down1 mark_down2 mark_down3 mark_down4 mark_down5      cpi
## store          -0.05984  -0.033829   -0.02033   -0.04272   -0.01245 -0.21109
## dept            0.00149   0.000587    0.00148    0.00194    0.00267 -0.00748
## is_holiday     -0.00352   0.207604    0.26647    0.01157   -0.01524 -0.00194
## weekly_sales    0.04723   0.020738    0.03853    0.03747    0.05044 -0.02094
## temperature    -0.02642  -0.179672   -0.05603   -0.05028   -0.01475  0.18211
## fuel_price      0.29706   0.029153    0.01861    0.16662    0.21542 -0.16421
## mark_down1      1.00000   0.174868   -0.01441    0.83890    0.41505  0.01091
## mark_down2      0.17487   1.000000   -0.00608    0.11325    0.13174 -0.00355
## mark_down3     -0.01441  -0.006080    1.00000   -0.01202    0.04247 -0.00584
## mark_down4      0.83890   0.113250   -0.01202    1.00000    0.30337 -0.00205
## mark_down5      0.41505   0.131735    0.04247    0.30337    1.00000  0.06791
## cpi             0.01091  -0.003554   -0.00584   -0.00205    0.06791  1.00000
## unemployment   -0.10517  -0.041427   -0.01808   -0.07651   -0.12041 -0.29995
## size            0.16979   0.078372    0.03364    0.12733    0.15301 -0.00331
##              unemployment      size
## store             0.20855 -0.182881
## dept              0.00784 -0.002966
## is_holiday        0.01046  0.000593
## weekly_sales     -0.02591  0.243812
## temperature       0.09673 -0.058313
## fuel_price       -0.03385  0.003361
## mark_down1       -0.10517  0.169788
## mark_down2       -0.04143  0.078372
## mark_down3       -0.01808  0.033641
## mark_down4       -0.07651  0.127334
## mark_down5       -0.12041  0.153011
## cpi              -0.29995 -0.003314
## unemployment      1.00000 -0.068238
## size             -0.06824  1.000000

Q1

#number of distinct date, i.e weeks of data in details table
a <- details %>% summarise(n_distinct(date))

#number of distinct date, i.e weeks of data in sales table
b <- sales %>% summarise(n_distinct(date))

#Difference between the number of weeks in details & sales table
a-b
n_distinct(date)
39

In details.csv there are 39 more weeks of data compared to sales.csv

Q2

Before answering, we had to reformat the dates in the sales dataset. We then proceeded to analyse how many days and weeks have passed between the first day of sales and the last one. We found out that 142 weeks had passed. This amount is consistent with the number of years in the data set and previous analyses.

date_min <- min(dmy(sales$date))

date_max <- max(dmy(sales$date))

#Range of date in sales data in days
as.numeric(difftime(date_max, date_min))/7
## [1] 142

Q3

The maximum department count per store is 79, with stores 15, 13 and 19 having this amount of departments in some cases.

sales1 %>%
  group_by(store) %>%
  summarise(dept_count = n_distinct(dept)) %>%
  arrange(desc(dept_count))
storedept_count
1379
1579
1979
278
478
1178
1878
2078
2778
3478
177
677
1077
1477
2177
2377
2477
2577
2877
2977
3277
4077
4177
776
876
1676
1776
2276
2676
3176
1275
3975
3574
4574
973
372
572
3064
3363
3663
3863
3762
4262
4462
4361

Q4

From Question 3, It is noticeable how most stores have a similar amount of maximum departments, except for those stores with codes 30,33,36, 38, 37, 42 and 43. These stores have less departments as they measure in their sixties.

#Look at mean, median & standard deviation of sales across stores
sales1 %>%
  group_by(date,store) %>%
  summarise(base_weekly_sales=sum(weekly_sales)) %>%
  group_by(store) %>%
  summarise(median_weekly_sales=median(base_weekly_sales),mean_weekly_sales=mean(base_weekly_sales),sd_weekly_sales=sd(base_weekly_sales))
storemedian_weekly_salesmean_weekly_salessd_weekly_sales
11.54e+061.57e+061.58e+05
21.89e+061.94e+062.39e+05
33.99e+054.06e+054.7e+04 
42.09e+062.11e+062.69e+05
53.13e+053.2e+05 3.81e+04
61.54e+061.58e+062.15e+05
75.61e+055.75e+051.13e+05
88.98e+059.15e+051.08e+05
95.41e+055.48e+056.96e+04
101.84e+061.91e+063.03e+05
111.33e+061.37e+061.67e+05
129.89e+051.02e+061.4e+05 
131.98e+062.02e+062.67e+05
142.02e+062.03e+063.19e+05
156.07e+056.28e+051.22e+05
165.13e+055.23e+058.63e+04
178.78e+059e+05       1.13e+05
181.07e+061.09e+061.77e+05
191.42e+061.46e+061.93e+05
202.07e+062.12e+062.78e+05
217.43e+057.61e+051.29e+05
221e+06       1.04e+061.63e+05
231.37e+061.4e+06 2.52e+05
241.35e+061.37e+061.69e+05
256.91e+057.12e+051.14e+05
261e+06       1.01e+061.11e+05
271.75e+061.79e+062.42e+05
281.27e+061.33e+061.83e+05
295.23e+055.43e+051e+05       
304.43e+054.42e+052.31e+04
311.39e+061.41e+061.28e+05
321.15e+061.18e+061.39e+05
332.6e+05 2.62e+052.44e+04
349.56e+059.73e+051.05e+05
358.57e+059.26e+052.12e+05
363.75e+053.76e+056.11e+04
375.22e+055.22e+052.22e+04
383.83e+053.88e+054.32e+04
391.42e+061.46e+062.19e+05
409.61e+059.71e+051.2e+05 
411.25e+061.28e+061.9e+05 
425.61e+055.6e+05 5.03e+04
436.37e+056.38e+054.1e+04 
443.01e+053.05e+052.5e+04 
457.68e+057.91e+051.3e+05 

Q5

Store number 4 has the highest number of sales in this timeframe.

#Filtering for the mentioned dates, grouping by store and ordering based on sum of sales
sales1 %>%
  filter(between(date,as.Date('2011-11-07'),as.Date('2011-12-30'))) %>%
  group_by(store) %>%
  summarise(sum_sales = sum(weekly_sales)) %>%
  arrange(desc(sum_sales))
storesum_sales
42.07e+07
202.07e+07
131.99e+07
101.95e+07
141.93e+07
21.83e+07
271.66e+07
61.53e+07
391.49e+07
231.43e+07
11.43e+07
191.39e+07
111.3e+07 
311.29e+07
241.26e+07
411.26e+07
281.2e+07 
321.13e+07
181.08e+07
221.04e+07
121e+07       
269.2e+06 
409.16e+06
349.13e+06
358.91e+06
88.6e+06 
178.14e+06
458.09e+06
217.66e+06
257.38e+06
156.44e+06
75.92e+06
295.5e+06 
95.39e+06
165.14e+06
435e+06       
424.56e+06
374.22e+06
33.81e+06
303.49e+06
383.24e+06
53.08e+06
362.58e+06
442.48e+06
332.01e+06

Q6

Store number 20 has the highest level of weekly median sales in this timeframe.

median_weekly <- sales1 %>%
  group_by(store) %>%
  summarise(median_weekly_sales=median(weekly_sales))

median_weekly %>%
  ggplot(aes(x=median_weekly_sales,y=fct_reorder(factor(store),median_weekly_sales))) +
  geom_col() +
  labs(title = "Store sales ranked by weekly median sales", x = "Weekly median sales",y = "Store") +
  theme_bw()

Q7

The variable which has the highest correlation with weekly_sales is the size variable, which represents the size of the store.

details2<-sales1 %>% 
  select(-c(dept))

cor(details2[,c(4:13,1,15)])
##              weekly_sales temperature fuel_price mark_down1 mark_down2
## weekly_sales     1.000000     -0.0023  -0.000116     0.0472    0.02074
## temperature     -0.002299      1.0000   0.143859    -0.0264   -0.17967
## fuel_price      -0.000116      0.1439   1.000000     0.2971    0.02915
## mark_down1       0.047231     -0.0264   0.297056     1.0000    0.17487
## mark_down2       0.020738     -0.1797   0.029153     0.1749    1.00000
## mark_down3       0.038528     -0.0560   0.018615    -0.0144   -0.00608
## mark_down4       0.037473     -0.0503   0.166622     0.8389    0.11325
## mark_down5       0.050438     -0.0148   0.215420     0.4151    0.13174
## cpi             -0.020941      0.1821  -0.164210     0.0109   -0.00355
## unemployment    -0.025909      0.0967  -0.033853    -0.1052   -0.04143
## store           -0.085257     -0.0501   0.065290    -0.0598   -0.03383
## size             0.243812     -0.0583   0.003361     0.1698    0.07837
##              mark_down3 mark_down4 mark_down5      cpi unemployment   store
## weekly_sales    0.03853    0.03747     0.0504 -0.02094      -0.0259 -0.0853
## temperature    -0.05603   -0.05028    -0.0148  0.18211       0.0967 -0.0501
## fuel_price      0.01861    0.16662     0.2154 -0.16421      -0.0339  0.0653
## mark_down1     -0.01441    0.83890     0.4151  0.01091      -0.1052 -0.0598
## mark_down2     -0.00608    0.11325     0.1317 -0.00355      -0.0414 -0.0338
## mark_down3      1.00000   -0.01202     0.0425 -0.00584      -0.0181 -0.0203
## mark_down4     -0.01202    1.00000     0.3034 -0.00205      -0.0765 -0.0427
## mark_down5      0.04247    0.30337     1.0000  0.06791      -0.1204 -0.0125
## cpi            -0.00584   -0.00205     0.0679  1.00000      -0.3000 -0.2111
## unemployment   -0.01808   -0.07651    -0.1204 -0.29995       1.0000  0.2086
## store          -0.02033   -0.04272    -0.0125 -0.21109       0.2086  1.0000
## size            0.03364    0.12733     0.1530 -0.00331      -0.0682 -0.1829
##                  size
## weekly_sales  0.24381
## temperature  -0.05831
## fuel_price    0.00336
## mark_down1    0.16979
## mark_down2    0.07837
## mark_down3    0.03364
## mark_down4    0.12733
## mark_down5    0.15301
## cpi          -0.00331
## unemployment -0.06824
## store        -0.18288
## size          1.00000

Q8

Stores which have higher mean sales out of the holidays include 30, 36, 37, 38, 44.

#Finding weekly means by store divided by is_holiday variable, compare to find which stores have anomalous holiday sales lower than normal sales
anomaly_mean <- sales1 %>%
  group_by(date,store,is_holiday) %>%
  summarise(base_weekly_sales=sum(weekly_sales)) %>%
  group_by(store,is_holiday) %>%
  summarise(mean_weekly_sales = mean(base_weekly_sales)) %>%
  pivot_wider(names_from = "is_holiday",
              values_from = "mean_weekly_sales")

colnames(anomaly_mean) <- c("store","noholiday","yesholiday")
  
anomaly_mean %>%
    group_by(store) %>%
    summarise(anomaly_flag = ifelse(noholiday>yesholiday,1,0)) %>%
    arrange(desc(anomaly_flag))
storeanomaly_flag
301
361
371
381
441
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
310
320
330
340
350
390
400
410
420
430
450

Q9

Stores which have higher median sales out of the holidays include 25, 32, 36, 38.

#Finding weekly medians by store divided by is_holiday variable, compare to find which stores have anomalous holiday sales lower than normal sales
anomaly_median <- sales1 %>%
  group_by(date,store,is_holiday) %>%
  summarise(base_weekly_sales=sum(weekly_sales)) %>%
  group_by(store,is_holiday) %>%
  summarise(median_weekly_sales = median(base_weekly_sales)) %>%
  pivot_wider(names_from = "is_holiday",
              values_from = "median_weekly_sales")

colnames(anomaly_median) <- c("store","noholiday","yesholiday")
  
  
anomaly_median %>%
    group_by(store) %>%
    summarise(anomaly_flag = ifelse(noholiday>yesholiday,1,0)) %>%
    arrange(desc(anomaly_flag))
storeanomaly_flag
251
321
361
381
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
260
270
280
290
300
310
330
340
350
370
390
400
410
420
430
440
450

Inferential Statistics

Average weekly sales in holidays

Sales are expected to spike during holiday periods as the demand for gifts increase. There are 4 major holiday periods in the US: the Super Bowl in February, Labour Day in September, Thanksgiving in November, and Christmas in December. Between February 2010 and October 2012, each of the holidays have generated around a hundred million dollars of net sales. In this dataset, the Thanksgiving festival in November generates the highest average weekly sales of 22k out of all four holidays.

# Use sales & details dataset holiday
sales_holiday <- sales1 %>%
  filter(is_holiday == TRUE) %>% # filter for holiday periods only
  mutate(month = strftime(date, "%m")) %>% # create new variable as month component of date
  group_by(month) %>% # group by month to show which holiday it is, of all sales of all stores
  summarise(average_sales = mean(weekly_sales), sum_sales = sum(weekly_sales)) %>% # summarise by holiday 
  arrange(desc(average_sales))

# Display summary table
sales_holiday
monthaverage_salessum_sales
112.24e+041.33e+08
021.65e+041.47e+08
091.6e+04 1.42e+08
121.46e+048.71e+07

Labour day, on average, has just above 1.8k fewer weekly net sales relative to other major holiday periods.

sales_holiday_vs_labour <- sales1 %>%
  filter(is_holiday == TRUE) %>% # filter for holiday periods only
  mutate(month = strftime(date, "%m")) %>% # extract month component of date as new variable
  mutate(month = as.numeric(month)) %>% # change class of month to numeric
  group_by(month) %>% # group by holiday (which is by month)
  summarise(average_sales = mean(weekly_sales), sum_sales = sum(weekly_sales)) %>% # summarise sales by holiday 
  mutate(is_labour_day = ifelse(month == 9, TRUE, FALSE)) %>% # generate logical variable to separate holidays - labour day and others
  group_by(is_labour_day) %>% # calculate summary statistics by holiday average sales
  summarise(average_holiday = mean(average_sales))

# Display summary table
sales_holiday_vs_labour
is_labour_dayaverage_holiday
FALSE1.78e+04
TRUE1.6e+04 
# Calculate difference in average weekly sales
15995 - 17834
## [1] -1839

Difference in means of weekly net sales between holiday and non-holiday periods

With Thanksgiving

One question that may be asked is whether holiday weekly net sales differ statistically significantly to non-holiday weekly net sales. Graphically, the 95% confidence intervals do not overlap at all, and that holiday average weekly net sales significantly differ from those in non-holiday periods. Running a t-test with the null hypothesis of average weekly net sales is the same between holiday and non-holiday periods, and alternative hypothesis of average weekly net sales differ between the two periods. The t test returned a very low p-value, suggesting that there is enough evidence to reject the null hypothesis of same mean weekly net sales between holiday and non-holiday periods.

t_test_holiday <- sales1 %>%
  group_by(is_holiday) %>%
  summarise(mean = mean(weekly_sales),
            count = n(),
            t = qt(0.975, count - 1),
            sd = sd(weekly_sales),
            se = sd/sqrt(count),
            margin = t * se,
            l_ci = mean - margin,
            h_ci = mean + margin)

ggplot(data = t_test_holiday, aes(x = mean, y = is_holiday, colour = is_holiday)) +
  geom_point() +
  geom_errorbarh(aes(xmax = h_ci, xmin = l_ci, height = .1)) +
  labs(title = "95% Confidence Interval comparison between holiday and non-holiday periods",
       x = "Mean Weekly Sales",
       y = "Holiday Period") +
  theme_bw() +
  theme (legend.position = "none") +
  NULL

t.test(weekly_sales ~ is_holiday,
       alternative = "two.sided",
       conf.level = 0.95,
       data = sales1)
## 
##  Welch Two Sample t-test
## 
## data:  weekly_sales by is_holiday
## t = -7, df = 32761, p-value = 3e-12
## alternative hypothesis: true difference in means between group FALSE and group TRUE is not equal to 0
## 95 percent confidence interval:
##  -1460  -822
## sample estimates:
## mean in group FALSE  mean in group TRUE 
##               16012               17153
# p-value = 3e-12

Without Thanksgiving

Thanksgiving, as previously determined, has the highest weekly net sales. Keeping in mind that this holiday period may be an outlier, an analysis of the difference between holiday and non-holiday weekly sales excluding thanksgiving is due.

# Excluding thanksgiving
sales_thanksgiving <- sales1 %>%
  mutate(month = strftime(date, "%m")) %>% # extract month component of date as new variable
  mutate(month = as.numeric(month)) %>% # change class of month to numeric
  mutate(is_thanksgiving = ifelse(is_holiday == TRUE & month == 11, TRUE, FALSE)) %>% # generate thanksgiving indicator
  filter(is_thanksgiving == FALSE) # exclude thanksgiving

Firstly, confidence intervals of these subgroups are compared. It seems like the 95% confidence interval of non-holiday periods is completely contained within those of the holiday periods.

t_test_holiday <- sales_thanksgiving %>%
  group_by(is_holiday) %>%
  summarise(mean = mean(weekly_sales),
            count = n(),
            t = qt(0.975, count - 1),
            sd = sd(weekly_sales),
            se = sd/sqrt(count),
            margin = t * se,
            l_ci = mean - margin,
            h_ci = mean + margin)

# ggplot
ggplot(data = t_test_holiday, aes(x = mean, y = is_holiday, colour = is_holiday)) +
  geom_point() +
  geom_errorbarh(aes(xmax = h_ci, xmin = l_ci, height = .1)) +
  labs(title = "95% Confidence Interval comparison between holiday and non-holiday periods",
       x = "Mean Weekly Sales",
       y = "Holiday Period") +
  theme_bw() +
  theme (legend.position = "none") +
  NULL

Running t-test of difference in means of weekly net sales returned not enough evidence to reject the null hypothesis, and that holiday average weekly sales do not differ at 95% from non-holiday periods when thanksgiving is excluded. We can safely conclude our analysis of holiday vs non-holiday weekly net sales, concluding that Thanksgiving is indeed an outlier of the holiday periods with significantly higher weekly sales relative to both other holiday and non-holiday periods.

t.test(weekly_sales ~ is_holiday,
       alternative = "two.sided",
       conf.level = 0.95,
       data = sales_thanksgiving)
## 
##  Welch Two Sample t-test
## 
## data:  weekly_sales by is_holiday
## t = 1, df = 26693, p-value = 0.3
## alternative hypothesis: true difference in means between group FALSE and group TRUE is not equal to 0
## 95 percent confidence interval:
##  -124  462
## sample estimates:
## mean in group FALSE  mean in group TRUE 
##               16012               15844
# p-value = 0.3

Regression Models

The purpose of this exercise is to explain the variability of the weekly sales in the retailer’s stores.

Running regression with all variables

In the first attempt to explain weekly sales, we are using all available variables in their current format.

model1 <- lm(weekly_sales ~ ., data = sales1)

get_regression_table(model1)
termestimatestd_errorstatisticp_valuelower_ciupper_ci
intercept2.64e+043.28e+038.05 0    2e+04       3.28e+04
store-142       3.1     -45.8  0    -148       -136       
dept111       1.1     101    0    109       113       
date-1.38    0.237   -5.81 0    -1.84    -0.912   
is_holidayTRUE652       142       4.6  0    374       929       
temperature15.9     2.01    7.89 0    11.9     19.8     
fuel_price50.5     130       0.3880.698-205       306       
mark_down10.028   0.012   2.41 0.0160.005   0.051   
mark_down20       0.007   -0.0480.962-0.014   0.013   
mark_down30.117   0.006   18.4  0    0.105   0.13    
mark_down4-0.002   0.016   -0.1270.899-0.034   0.03    
mark_down50.106   0.009   11.2  0    0.087   0.124   
cpi-25.9     0.991   -26.1  0    -27.8     -23.9     
unemployment-263       20.6     -12.7  0    -303       -223       
type: B-347       108       -3.2  0.001-560       -135       
type: C5.81e+03185       31.4  0    5.45e+036.17e+03
size0.098   0.001   100    0    0.096   0.1     
get_regression_summaries(model1)
r_squaredadj_r_squaredmsermsesigmastatisticp_valuedfnobs
0.090.094.76e+082.18e+042.18e+042.6e+030164.22e+05
mosaic::msummary(model1)
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.64e+04   3.28e+03    8.05  8.2e-16 ***
## store          -1.42e+02   3.10e+00  -45.82  < 2e-16 ***
## dept            1.11e+02   1.10e+00  100.67  < 2e-16 ***
## date           -1.38e+00   2.37e-01   -5.81  6.3e-09 ***
## is_holidayTRUE  6.52e+02   1.42e+02    4.60  4.2e-06 ***
## temperature     1.59e+01   2.01e+00    7.89  3.0e-15 ***
## fuel_price      5.05e+01   1.30e+02    0.39   0.6981    
## mark_down1      2.83e-02   1.17e-02    2.41   0.0158 *  
## mark_down2     -3.39e-04   7.05e-03   -0.05   0.9617    
## mark_down3      1.17e-01   6.36e-03   18.43  < 2e-16 ***
## mark_down4     -2.07e-03   1.63e-02   -0.13   0.8990    
## mark_down5      1.06e-01   9.42e-03   11.21  < 2e-16 ***
## cpi            -2.59e+01   9.91e-01  -26.13  < 2e-16 ***
## unemployment   -2.63e+02   2.06e+01  -12.75  < 2e-16 ***
## typeB          -3.47e+02   1.08e+02   -3.20   0.0014 ** 
## typeC           5.81e+03   1.85e+02   31.42  < 2e-16 ***
## size            9.78e-02   9.77e-04  100.12  < 2e-16 ***
## 
## Residual standard error: 21800 on 421553 degrees of freedom
## Multiple R-squared:  0.0899, Adjusted R-squared:  0.0899 
## F-statistic: 2.6e+03 on 16 and 421553 DF,  p-value: <2e-16

When regression is run on dataset as given, then our model explains only 9% of variability of the data. Similarly, ‘date’ variable, even though significant, is not interpretable. Therefore, we decided to split this variable into year, and seasons.

sales_reg <- sales1 %>% 
  mutate(year= factor(year(date)),
         month = month(date),
         season_name = case_when(
           month %in%  c("12", "1", "2")  ~ "Winter",
           month %in%  c("3", "4", "5")  ~ "Spring",
           month %in%  c("6", "7", "8")  ~ "Summer",
           month %in%  c("9", "10", "11")  ~ "Autumn",
    ),
    season_name = factor(season_name, 
                         levels = c("Winter", "Spring", "Summer", "Autumn")))

Running model 2

We run the next model after removing the date and month variables and keeping seasons and year as a factor instead.

model2 <- lm(weekly_sales~ . - date - month, data = sales_reg)

get_regression_table(model2)
termestimatestd_errorstatisticp_valuelower_ciupper_ci
intercept261       596    0.4390.661-906       1.43e+03
store-137       3.14 -43.6  0    -143       -131       
dept111       1.1  101    0    109       113       
is_holidayTRUE229       146    1.57 0.117-57.2     515       
temperature44.8     3.17 14.1  0    38.6     51       
fuel_price2.55e+03181    14.1  0    2.2e+03 2.91e+03
mark_down10.071   0.0125.84 0    0.047   0.095   
mark_down2-0.012   0.007-1.64 0.1  -0.026   0.002   
mark_down30.134   0.00620.8  0    0.121   0.147   
mark_down4-0.037   0.016-2.23 0.026-0.069   -0.004   
mark_down50.104   0.00911.1  0    0.085   0.122   
cpi-22.8     1.07 -21.3  0    -24.9     -20.7     
unemployment-411       21.1  -19.5  0    -452       -370       
type: B-314       110    -2.86 0.004-529       -98.7     
type: C5.71e+03185    30.9  0    5.35e+036.08e+03
size0.097   0.00199.3  0    0.096   0.099   
year: 2011-2.66e+03163    -16.3  0    -2.98e+03-2.34e+03
year: 2012-4.1e+03 200    -20.5  0    -4.49e+03-3.7e+03 
season_name: Spring-2.47e+03129    -19.1  0    -2.72e+03-2.21e+03
season_name: Summer-2.69e+03161    -16.7  0    -3.01e+03-2.38e+03
season_name: Autumn-2.76e+03133    -20.7  0    -3.02e+03-2.5e+03 
get_regression_summaries(model2)
r_squaredadj_r_squaredmsermsesigmastatisticp_valuedfnobs
0.0910.0914.75e+082.18e+042.18e+042.12e+030204.22e+05
mosaic::msummary(model2)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.61e+02   5.96e+02    0.44   0.6609    
## store             -1.37e+02   3.14e+00  -43.64  < 2e-16 ***
## dept               1.11e+02   1.10e+00  100.75  < 2e-16 ***
## is_holidayTRUE     2.29e+02   1.46e+02    1.57   0.1169    
## temperature        4.48e+01   3.17e+00   14.11  < 2e-16 ***
## fuel_price         2.55e+03   1.81e+02   14.08  < 2e-16 ***
## mark_down1         7.08e-02   1.21e-02    5.85  5.1e-09 ***
## mark_down2        -1.17e-02   7.13e-03   -1.64   0.1000    
## mark_down3         1.34e-01   6.44e-03   20.83  < 2e-16 ***
## mark_down4        -3.66e-02   1.64e-02   -2.23   0.0260 *  
## mark_down5         1.04e-01   9.32e-03   11.11  < 2e-16 ***
## cpi               -2.28e+01   1.07e+00  -21.35  < 2e-16 ***
## unemployment      -4.11e+02   2.11e+01  -19.45  < 2e-16 ***
## typeB             -3.14e+02   1.10e+02   -2.86   0.0042 ** 
## typeC              5.71e+03   1.85e+02   30.86  < 2e-16 ***
## size               9.75e-02   9.82e-04   99.29  < 2e-16 ***
## year2011          -2.66e+03   1.63e+02  -16.34  < 2e-16 ***
## year2012          -4.10e+03   2.00e+02  -20.46  < 2e-16 ***
## season_nameSpring -2.47e+03   1.29e+02  -19.13  < 2e-16 ***
## season_nameSummer -2.69e+03   1.61e+02  -16.72  < 2e-16 ***
## season_nameAutumn -2.76e+03   1.33e+02  -20.75  < 2e-16 ***
## 
## Residual standard error: 21800 on 421549 degrees of freedom
## Multiple R-squared:  0.0914, Adjusted R-squared:  0.0913 
## F-statistic: 2.12e+03 on 20 and 421549 DF,  p-value: <2e-16

We create a variable for each of the holiday weeks (Christmas, Thanksgiving, Labour day and Superbowl) to compare weekly sales for diffreent holidays.

sales_reg<-sales_reg %>% 
  mutate(superbowl = ifelse(month == "2" & is_holiday =="TRUE", TRUE, FALSE),
         christmas = ifelse(month == "12" & is_holiday =="TRUE", TRUE, FALSE),
         labor = ifelse(month == "9" & is_holiday =="TRUE", TRUE, FALSE),
         thanksgiving = ifelse(month == "11" & is_holiday =="TRUE", TRUE, FALSE))

Additionally, variables ‘store’ and ‘dept’ are interpreted as numerical variables, when in fact they are factors. Therefore, this will be accounted for in our next model.

Running model 3

model3 <- lm(weekly_sales ~ factor(dept) + factor(store) + temperature + cpi + unemployment + type + size  + season_name + fuel_price + superbowl + christmas + labor + thanksgiving + mark_down1  + mark_down2  + mark_down3  + mark_down4  + mark_down5, data = sales_reg)

get_regression_table(model3)
termestimatestd_errorstatisticp_valuelower_ciupper_ci
intercept3.01e+042.61e+0311.5  0    2.5e+04 3.52e+04
factor(dept): 22.46e+04236       104    0    2.41e+042.5e+04 
factor(dept): 3-7.46e+03236       -31.7  0    -7.93e+03-7e+03       
factor(dept): 46.81e+03236       28.9  0    6.35e+037.27e+03
factor(dept): 52.04e+03236       8.64 0    1.58e+032.51e+03
factor(dept): 6-1.54e+04240       -64    0    -1.58e+04-1.49e+04
factor(dept): 74.97e+03236       21.1  0    4.5e+03 5.43e+03
factor(dept): 81.1e+04 236       46.9  0    1.06e+041.15e+04
factor(dept): 9837       236       3.54 0    373       1.3e+03 
factor(dept): 10-902       236       -3.83 0    -1.36e+03-440       
factor(dept): 11-4.75e+03236       -20.1  0    -5.21e+03-4.28e+03
factor(dept): 12-1.52e+04236       -64.4  0    -1.57e+04-1.47e+04
factor(dept): 131.15e+04236       48.9  0    1.11e+041.2e+04 
factor(dept): 14-4.37e+03236       -18.5  0    -4.83e+03-3.91e+03
factor(dept): 16-5e+03       236       -21.2  0    -5.47e+03-4.54e+03
factor(dept): 17-9.13e+03236       -38.7  0    -9.59e+03-8.67e+03
factor(dept): 18-1.27e+04252       -50.3  0    -1.31e+04-1.22e+04
factor(dept): 19-2.06e+04267       -77    0    -2.11e+04-2e+04       
factor(dept): 20-1.44e+04240       -60.2  0    -1.49e+04-1.4e+04 
factor(dept): 21-1.43e+04236       -60.8  0    -1.48e+04-1.39e+04
factor(dept): 22-1.02e+04245       -41.5  0    -1.07e+04-9.7e+03 
factor(dept): 233.56e+03242       14.7  0    3.08e+034.03e+03
factor(dept): 24-1.49e+04247       -60.4  0    -1.54e+04-1.44e+04
factor(dept): 25-1.05e+04236       -44.2  0    -1.09e+04-1e+04       
factor(dept): 26-1.28e+04243       -52.6  0    -1.33e+04-1.23e+04
factor(dept): 27-1.93e+04244       -79.1  0    -1.98e+04-1.88e+04
factor(dept): 28-1.94e+04239       -81.1  0    -1.98e+04-1.89e+04
factor(dept): 29-1.59e+04248       -64    0    -1.64e+04-1.54e+04
factor(dept): 30-1.75e+04248       -70.3  0    -1.8e+04 -1.7e+04 
factor(dept): 31-1.77e+04239       -74.1  0    -1.82e+04-1.72e+04
factor(dept): 32-1.33e+04240       -55.4  0    -1.38e+04-1.29e+04
factor(dept): 33-1.48e+04247       -60.1  0    -1.53e+04-1.44e+04
factor(dept): 34-5.79e+03248       -23.4  0    -6.27e+03-5.3e+03 
factor(dept): 35-1.87e+04248       -75.1  0    -1.91e+04-1.82e+04
factor(dept): 36-1.96e+04248       -78.8  0    -2e+04       -1.91e+04
factor(dept): 37-2.26e+04313       -72.2  0    -2.32e+04-2.2e+04 
factor(dept): 384.22e+04236       179    0    4.17e+044.26e+04
factor(dept): 39-2.73e+043.35e+03-8.16 0    -3.39e+04-2.08e+04
factor(dept): 402.59e+04236       110    0    2.54e+042.63e+04
factor(dept): 41-1.93e+04247       -78.2  0    -1.98e+04-1.88e+04
factor(dept): 42-1.42e+04236       -60.1  0    -1.47e+04-1.37e+04
factor(dept): 43-1.85e+043.86e+03-4.79 0    -2.61e+04-1.09e+04
factor(dept): 44-1.65e+04246       -67.1  0    -1.7e+04 -1.6e+04 
factor(dept): 45-2.24e+04347       -64.5  0    -2.31e+04-2.17e+04
factor(dept): 46743       236       3.15 0.002281       1.21e+03
factor(dept): 47-2.27e+04552       -41.1  0    -2.38e+04-2.16e+04
factor(dept): 48-2.39e+04362       -66    0    -2.46e+04-2.32e+04
factor(dept): 49-1.44e+04260       -55.4  0    -1.49e+04-1.39e+04
factor(dept): 50-2.34e+04379       -61.9  0    -2.42e+04-2.27e+04
factor(dept): 51-2.15e+04396       -54.3  0    -2.22e+04-2.07e+04
factor(dept): 52-1.77e+04237       -74.8  0    -1.82e+04-1.73e+04
factor(dept): 54-2.2e+04 256       -86    0    -2.25e+04-2.15e+04
factor(dept): 55-1e+04       244       -40.9  0    -1.05e+04-9.53e+03
factor(dept): 56-1.65e+04241       -68.5  0    -1.7e+04 -1.61e+04
factor(dept): 58-1.93e+04262       -73.8  0    -1.99e+04-1.88e+04
factor(dept): 59-1.92e+04239       -80.3  0    -1.97e+04-1.87e+04
factor(dept): 60-1.89e+04242       -77.9  0    -1.93e+04-1.84e+04
factor(dept): 652.77e+041.14e+0324.3  0    2.54e+042.99e+04
factor(dept): 67-1.19e+04236       -50.3  0    -1.23e+04-1.14e+04
factor(dept): 71-1.58e+04248       -63.7  0    -1.63e+04-1.53e+04
factor(dept): 723.08e+04239       129    0    3.04e+043.13e+04
factor(dept): 74-5.45e+03236       -23.1  0    -5.91e+03-4.99e+03
factor(dept): 77-2.46e+041.11e+03-22.3  0    -2.68e+04-2.25e+04
factor(dept): 78-2.32e+04888       -26.2  0    -2.5e+04 -2.15e+04
factor(dept): 792.78e+03236       11.8  0    2.31e+033.24e+03
factor(dept): 80-7.73e+03241       -32.1  0    -8.21e+03-7.26e+03
factor(dept): 81-3.77e+03236       -16    0    -4.23e+03-3.31e+03
factor(dept): 82-3.77e+03236       -16    0    -4.24e+03-3.31e+03
factor(dept): 83-1.67e+04241       -69.2  0    -1.71e+04-1.62e+04
factor(dept): 85-1.78e+04239       -74.3  0    -1.83e+04-1.73e+04
factor(dept): 87-5.9e+03 236       -25    0    -6.36e+03-5.43e+03
factor(dept): 902.62e+04236       111    0    2.57e+042.66e+04
factor(dept): 911.46e+04236       61.9  0    1.41e+041.5e+04 
factor(dept): 925.64e+04236       239    0    5.59e+045.68e+04
factor(dept): 937.13e+03241       29.6  0    6.65e+037.6e+03 
factor(dept): 941.38e+04243       56.7  0    1.33e+041.43e+04
factor(dept): 955.1e+04 236       216    0    5.05e+045.14e+04
factor(dept): 96-3.8e+03 254       -14.9  0    -4.3e+03 -3.3e+03 
factor(dept): 97-5.17e+03237       -21.8  0    -5.64e+03-4.71e+03
factor(dept): 98-1.31e+04242       -54    0    -1.35e+04-1.26e+04
factor(dept): 99-2.51e+04486       -51.8  0    -2.61e+04-2.42e+04
factor(store): 25.19e+03187       27.8  0    4.82e+035.56e+03
factor(store): 3-1.67e+04199       -83.9  0    -1.71e+04-1.63e+04
factor(store): 47.62e+031.1e+03 6.9  0    5.45e+039.78e+03
factor(store): 5-1.84e+04203       -90.8  0    -1.88e+04-1.8e+04 
factor(store): 6-255       192       -1.33 0.184-631       121       
factor(store): 7-1.35e+04334       -40.3  0    -1.41e+04-1.28e+04
factor(store): 8-9.65e+03202       -47.8  0    -1e+04       -9.25e+03
factor(store): 9-1.48e+04208       -71.3  0    -1.52e+04-1.44e+04
factor(store): 106.04e+031.09e+035.54 0    3.9e+03 8.17e+03
factor(store): 11-3.01e+03192       -15.7  0    -3.39e+03-2.64e+03
factor(store): 12-4.56e+031.06e+03-4.3  0    -6.64e+03-2.48e+03
factor(store): 136.6e+03 1.09e+036.03 0    4.45e+038.74e+03
factor(store): 147.7e+03 412       18.7  0    6.89e+038.51e+03
factor(store): 15-1.22e+041.02e+03-12    0    -1.42e+04-1.02e+04
factor(store): 16-1.49e+04353       -42.2  0    -1.56e+04-1.42e+04
factor(store): 17-8.63e+031.1e+03 -7.86 0    -1.08e+04-6.48e+03
factor(store): 18-5.13e+031e+03       -5.12 0    -7.09e+03-3.16e+03
factor(store): 19-506       1.02e+03-0.4960.62 -2.5e+03 1.49e+03
factor(store): 208.13e+03216       37.6  0    7.71e+038.55e+03
factor(store): 21-1.12e+04190       -58.9  0    -1.16e+04-1.08e+04
factor(store): 22-6.26e+03964       -6.5  0    -8.15e+03-4.37e+03
factor(store): 23-2.31e+031.06e+03-2.18 0.029-4.38e+03-238       
factor(store): 24-1.64e+031.01e+03-1.62 0.105-3.63e+03345       
factor(store): 25-1.18e+04218       -53.9  0    -1.22e+04-1.13e+04
factor(store): 26-6.96e+031.01e+03-6.87 0    -8.94e+03-4.97e+03
factor(store): 274.03e+03973       4.14 0    2.12e+035.93e+03
factor(store): 28-434       1.06e+03-0.4090.683-2.51e+031.65e+03
factor(store): 29-1.29e+04991       -13    0    -1.49e+04-1.1e+04 
factor(store): 30-1.79e+04207       -86.6  0    -1.83e+04-1.75e+04
factor(store): 31-2.31e+03188       -12.3  0    -2.68e+03-1.94e+03
factor(store): 32-4.9e+03 327       -15    0    -5.54e+03-4.26e+03
factor(store): 33-2.06e+041.09e+03-19    0    -2.28e+04-1.85e+04
factor(store): 34-7.41e+031.05e+03-7.04 0    -9.48e+03-5.35e+03
factor(store): 35-7.58e+03955       -7.94 0    -9.45e+03-5.71e+03
factor(store): 36-1.94e+04216       -89.7  0    -1.98e+04-1.9e+04 
factor(store): 37-1.63e+04207       -78.6  0    -1.67e+04-1.58e+04
factor(store): 38-1.61e+041.06e+03-15.2  0    -1.82e+04-1.4e+04 
factor(store): 39-1.5e+03 193       -7.77 0    -1.87e+03-1.12e+03
factor(store): 40-8.57e+031.06e+03-8.1  0    -1.06e+04-6.5e+03 
factor(store): 41-4.01e+03344       -11.7  0    -4.69e+03-3.34e+03
factor(store): 42-1.39e+041.09e+03-12.8  0    -1.6e+04 -1.18e+04
factor(store): 43-1.31e+04241       -54.2  0    -1.35e+04-1.26e+04
factor(store): 44-2.03e+041.09e+03-18.6  0    -2.25e+04-1.82e+04
factor(store): 45-1.01e+04410       -24.7  0    -1.09e+04-9.32e+03
temperature4.36    2.88    1.52 0.13 -1.28    10       
cpi7.97    12.2     0.6550.513-15.9     31.8     
unemployment-371       45.6     -8.15 0    -460       -282       
type: B                                    
type: C                                    
size                                    
season_name: Spring-1.25e+0384       -14.8  0    -1.41e+03-1.08e+03
season_name: Summer-1.03e+03128       -8.05 0    -1.28e+03-778       
season_name: Autumn-1.81e+0395.6     -18.9  0    -1.99e+03-1.62e+03
fuel_price-434       80.9     -5.36 0    -592       -275       
superbowlTRUE-396       151       -2.63 0.009-692       -101       
christmasTRUE-1.71e+03203       -8.43 0    -2.1e+03 -1.31e+03
laborTRUE674       153       4.4  0    374       975       
thanksgivingTRUE6.07e+03229       26.5  0    5.62e+036.52e+03
mark_down10.005   0.007   0.74 0.459-0.009   0.019   
mark_down2-0.029   0.005   -5.92 0    -0.038   -0.019   
mark_down30.043   0.005   9.01 0    0.034   0.052   
mark_down4-0.038   0.01    -3.72 0    -0.057   -0.018   
mark_down50.034   0.006   5.6  0    0.022   0.045   
mosaic::msummary(model3)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.01e+04   2.61e+03   11.54  < 2e-16 ***
## factor(dept)2      2.46e+04   2.36e+02  104.18  < 2e-16 ***
## factor(dept)3     -7.46e+03   2.36e+02  -31.68  < 2e-16 ***
## factor(dept)4      6.81e+03   2.36e+02   28.89  < 2e-16 ***
## factor(dept)5      2.04e+03   2.36e+02    8.64  < 2e-16 ***
## factor(dept)6     -1.54e+04   2.40e+02  -64.01  < 2e-16 ***
## factor(dept)7      4.97e+03   2.36e+02   21.07  < 2e-16 ***
## factor(dept)8      1.10e+04   2.36e+02   46.88  < 2e-16 ***
## factor(dept)9      8.37e+02   2.36e+02    3.54  0.00040 ***
## factor(dept)10    -9.02e+02   2.36e+02   -3.83  0.00013 ***
## factor(dept)11    -4.75e+03   2.36e+02  -20.14  < 2e-16 ***
## factor(dept)12    -1.52e+04   2.36e+02  -64.43  < 2e-16 ***
## factor(dept)13     1.15e+04   2.36e+02   48.92  < 2e-16 ***
## factor(dept)14    -4.37e+03   2.36e+02  -18.54  < 2e-16 ***
## factor(dept)16    -5.00e+03   2.36e+02  -21.23  < 2e-16 ***
## factor(dept)17    -9.13e+03   2.36e+02  -38.71  < 2e-16 ***
## factor(dept)18    -1.27e+04   2.52e+02  -50.26  < 2e-16 ***
## factor(dept)19    -2.06e+04   2.67e+02  -76.95  < 2e-16 ***
## factor(dept)20    -1.44e+04   2.40e+02  -60.20  < 2e-16 ***
## factor(dept)21    -1.43e+04   2.36e+02  -60.81  < 2e-16 ***
## factor(dept)22    -1.02e+04   2.45e+02  -41.52  < 2e-16 ***
## factor(dept)23     3.56e+03   2.42e+02   14.67  < 2e-16 ***
## factor(dept)24    -1.49e+04   2.47e+02  -60.44  < 2e-16 ***
## factor(dept)25    -1.05e+04   2.36e+02  -44.24  < 2e-16 ***
## factor(dept)26    -1.28e+04   2.43e+02  -52.65  < 2e-16 ***
## factor(dept)27    -1.93e+04   2.44e+02  -79.08  < 2e-16 ***
## factor(dept)28    -1.94e+04   2.39e+02  -81.12  < 2e-16 ***
## factor(dept)29    -1.59e+04   2.48e+02  -63.99  < 2e-16 ***
## factor(dept)30    -1.75e+04   2.48e+02  -70.33  < 2e-16 ***
## factor(dept)31    -1.77e+04   2.39e+02  -74.05  < 2e-16 ***
## factor(dept)32    -1.33e+04   2.40e+02  -55.44  < 2e-16 ***
## factor(dept)33    -1.48e+04   2.47e+02  -60.08  < 2e-16 ***
## factor(dept)34    -5.79e+03   2.48e+02  -23.35  < 2e-16 ***
## factor(dept)35    -1.87e+04   2.48e+02  -75.13  < 2e-16 ***
## factor(dept)36    -1.96e+04   2.48e+02  -78.80  < 2e-16 ***
## factor(dept)37    -2.26e+04   3.13e+02  -72.22  < 2e-16 ***
## factor(dept)38     4.22e+04   2.36e+02  178.97  < 2e-16 ***
## factor(dept)39    -2.73e+04   3.35e+03   -8.16  3.3e-16 ***
## factor(dept)40     2.59e+04   2.36e+02  109.85  < 2e-16 ***
## factor(dept)41    -1.93e+04   2.47e+02  -78.22  < 2e-16 ***
## factor(dept)42    -1.42e+04   2.36e+02  -60.14  < 2e-16 ***
## factor(dept)43    -1.85e+04   3.86e+03   -4.79  1.7e-06 ***
## factor(dept)44    -1.65e+04   2.46e+02  -67.12  < 2e-16 ***
## factor(dept)45    -2.24e+04   3.47e+02  -64.54  < 2e-16 ***
## factor(dept)46     7.43e+02   2.36e+02    3.15  0.00161 ** 
## factor(dept)47    -2.27e+04   5.52e+02  -41.12  < 2e-16 ***
## factor(dept)48    -2.39e+04   3.62e+02  -65.97  < 2e-16 ***
## factor(dept)49    -1.44e+04   2.60e+02  -55.42  < 2e-16 ***
## factor(dept)50    -2.34e+04   3.79e+02  -61.91  < 2e-16 ***
## factor(dept)51    -2.15e+04   3.96e+02  -54.27  < 2e-16 ***
## factor(dept)52    -1.77e+04   2.37e+02  -74.79  < 2e-16 ***
## factor(dept)54    -2.20e+04   2.56e+02  -86.05  < 2e-16 ***
## factor(dept)55    -1.00e+04   2.44e+02  -40.92  < 2e-16 ***
## factor(dept)56    -1.65e+04   2.41e+02  -68.54  < 2e-16 ***
## factor(dept)58    -1.93e+04   2.62e+02  -73.81  < 2e-16 ***
## factor(dept)59    -1.92e+04   2.39e+02  -80.33  < 2e-16 ***
## factor(dept)60    -1.89e+04   2.42e+02  -77.87  < 2e-16 ***
## factor(dept)65     2.77e+04   1.14e+03   24.32  < 2e-16 ***
## factor(dept)67    -1.19e+04   2.36e+02  -50.30  < 2e-16 ***
## factor(dept)71    -1.58e+04   2.48e+02  -63.71  < 2e-16 ***
## factor(dept)72     3.08e+04   2.39e+02  128.77  < 2e-16 ***
## factor(dept)74    -5.45e+03   2.36e+02  -23.11  < 2e-16 ***
## factor(dept)77    -2.46e+04   1.11e+03  -22.28  < 2e-16 ***
## factor(dept)78    -2.32e+04   8.88e+02  -26.17  < 2e-16 ***
## factor(dept)79     2.78e+03   2.36e+02   11.78  < 2e-16 ***
## factor(dept)80    -7.73e+03   2.41e+02  -32.08  < 2e-16 ***
## factor(dept)81    -3.77e+03   2.36e+02  -16.00  < 2e-16 ***
## factor(dept)82    -3.77e+03   2.36e+02  -16.02  < 2e-16 ***
## factor(dept)83    -1.67e+04   2.41e+02  -69.20  < 2e-16 ***
## factor(dept)85    -1.78e+04   2.39e+02  -74.33  < 2e-16 ***
## factor(dept)87    -5.90e+03   2.36e+02  -24.97  < 2e-16 ***
## factor(dept)90     2.62e+04   2.36e+02  111.08  < 2e-16 ***
## factor(dept)91     1.46e+04   2.36e+02   61.86  < 2e-16 ***
## factor(dept)92     5.64e+04   2.36e+02  239.17  < 2e-16 ***
## factor(dept)93     7.13e+03   2.41e+02   29.58  < 2e-16 ***
## factor(dept)94     1.38e+04   2.43e+02   56.72  < 2e-16 ***
## factor(dept)95     5.10e+04   2.36e+02  216.29  < 2e-16 ***
## factor(dept)96    -3.80e+03   2.54e+02  -14.93  < 2e-16 ***
## factor(dept)97    -5.17e+03   2.37e+02  -21.82  < 2e-16 ***
## factor(dept)98    -1.31e+04   2.42e+02  -54.02  < 2e-16 ***
## factor(dept)99    -2.51e+04   4.86e+02  -51.76  < 2e-16 ***
## factor(store)2     5.19e+03   1.87e+02   27.76  < 2e-16 ***
## factor(store)3    -1.67e+04   1.99e+02  -83.89  < 2e-16 ***
## factor(store)4     7.62e+03   1.10e+03    6.90  5.2e-12 ***
## factor(store)5    -1.84e+04   2.03e+02  -90.83  < 2e-16 ***
## factor(store)6    -2.55e+02   1.92e+02   -1.33  0.18437    
## factor(store)7    -1.35e+04   3.34e+02  -40.31  < 2e-16 ***
## factor(store)8    -9.65e+03   2.02e+02  -47.83  < 2e-16 ***
## factor(store)9    -1.48e+04   2.08e+02  -71.32  < 2e-16 ***
## factor(store)10    6.04e+03   1.09e+03    5.54  3.0e-08 ***
## factor(store)11   -3.01e+03   1.92e+02  -15.69  < 2e-16 ***
## factor(store)12   -4.56e+03   1.06e+03   -4.30  1.7e-05 ***
## factor(store)13    6.60e+03   1.09e+03    6.03  1.7e-09 ***
## factor(store)14    7.70e+03   4.12e+02   18.69  < 2e-16 ***
## factor(store)15   -1.22e+04   1.02e+03  -12.02  < 2e-16 ***
## factor(store)16   -1.49e+04   3.53e+02  -42.24  < 2e-16 ***
## factor(store)17   -8.63e+03   1.10e+03   -7.86  3.7e-15 ***
## factor(store)18   -5.13e+03   1.00e+03   -5.12  3.0e-07 ***
## factor(store)19   -5.06e+02   1.02e+03   -0.50  0.61954    
## factor(store)20    8.13e+03   2.16e+02   37.64  < 2e-16 ***
## factor(store)21   -1.12e+04   1.90e+02  -58.94  < 2e-16 ***
## factor(store)22   -6.26e+03   9.64e+02   -6.50  8.3e-11 ***
## factor(store)23   -2.31e+03   1.06e+03   -2.19  0.02889 *  
## factor(store)24   -1.64e+03   1.01e+03   -1.62  0.10529    
## factor(store)25   -1.18e+04   2.18e+02  -53.90  < 2e-16 ***
## factor(store)26   -6.96e+03   1.01e+03   -6.87  6.5e-12 ***
## factor(store)27    4.03e+03   9.73e+02    4.14  3.4e-05 ***
## factor(store)28   -4.34e+02   1.06e+03   -0.41  0.68290    
## factor(store)29   -1.29e+04   9.91e+02  -13.04  < 2e-16 ***
## factor(store)30   -1.79e+04   2.07e+02  -86.60  < 2e-16 ***
## factor(store)31   -2.31e+03   1.88e+02  -12.33  < 2e-16 ***
## factor(store)32   -4.90e+03   3.27e+02  -14.98  < 2e-16 ***
## factor(store)33   -2.06e+04   1.09e+03  -18.98  < 2e-16 ***
## factor(store)34   -7.41e+03   1.05e+03   -7.03  2.0e-12 ***
## factor(store)35   -7.58e+03   9.55e+02   -7.94  2.1e-15 ***
## factor(store)36   -1.94e+04   2.16e+02  -89.69  < 2e-16 ***
## factor(store)37   -1.63e+04   2.07e+02  -78.64  < 2e-16 ***
## factor(store)38   -1.61e+04   1.06e+03  -15.22  < 2e-16 ***
## factor(store)39   -1.50e+03   1.93e+02   -7.77  8.1e-15 ***
## factor(store)40   -8.57e+03   1.06e+03   -8.10  5.3e-16 ***
## factor(store)41   -4.01e+03   3.44e+02  -11.67  < 2e-16 ***
## factor(store)42   -1.39e+04   1.09e+03  -12.78  < 2e-16 ***
## factor(store)43   -1.31e+04   2.41e+02  -54.21  < 2e-16 ***
## factor(store)44   -2.03e+04   1.09e+03  -18.59  < 2e-16 ***
## factor(store)45   -1.01e+04   4.10e+02  -24.69  < 2e-16 ***
## temperature        4.36e+00   2.88e+00    1.52  0.12954    
## cpi                7.97e+00   1.22e+01    0.65  0.51267    
## unemployment      -3.71e+02   4.56e+01   -8.15  3.8e-16 ***
## season_nameSpring -1.25e+03   8.40e+01  -14.84  < 2e-16 ***
## season_nameSummer -1.03e+03   1.28e+02   -8.06  7.9e-16 ***
## season_nameAutumn -1.81e+03   9.56e+01  -18.91  < 2e-16 ***
## fuel_price        -4.34e+02   8.09e+01   -5.36  8.4e-08 ***
## superbowlTRUE     -3.96e+02   1.51e+02   -2.63  0.00864 ** 
## christmasTRUE     -1.71e+03   2.03e+02   -8.43  < 2e-16 ***
## laborTRUE          6.74e+02   1.53e+02    4.40  1.1e-05 ***
## thanksgivingTRUE   6.07e+03   2.29e+02   26.46  < 2e-16 ***
## mark_down1         5.34e-03   7.22e-03    0.74  0.45925    
## mark_down2        -2.86e-02   4.83e-03   -5.92  3.3e-09 ***
## mark_down3         4.28e-02   4.76e-03    9.01  < 2e-16 ***
## mark_down4        -3.76e-02   1.01e-02   -3.72  0.00020 ***
## mark_down5         3.36e-02   5.99e-03    5.60  2.1e-08 ***
## 
## Residual standard error: 13400 on 421429 degrees of freedom
## Multiple R-squared:  0.659,  Adjusted R-squared:  0.658 
## F-statistic: 5.81e+03 on 140 and 421429 DF,  p-value: <2e-16
# vif(model3) problem of collinearity arise

There is problem of collinearity between variables in the model. Hence we remove store as a variable. By doing this,although we arrive at a model with less model explainability, the problem of collinearity is resolved.

Running final model

model_final <- lm(weekly_sales ~ factor(dept)  + temperature + cpi + unemployment + type + size  + season_name + fuel_price + superbowl + christmas + labor + thanksgiving + mark_down1  + mark_down2  + mark_down3  + mark_down4  + mark_down5, data = sales_reg)

get_regression_table(model_final)
termestimatestd_errorstatisticp_valuelower_ciupper_ci
intercept1e+04       328       30.5  0    9.37e+031.07e+04
factor(dept): 22.46e+04246       99.7  0    2.41e+042.5e+04 
factor(dept): 3-7.46e+03246       -30.3  0    -7.95e+03-6.98e+03
factor(dept): 46.81e+03246       27.6  0    6.33e+037.29e+03
factor(dept): 52e+03       247       8.09 0    1.52e+032.49e+03
factor(dept): 6-1.54e+04251       -61.2  0    -1.59e+04-1.49e+04
factor(dept): 74.97e+03246       20.2  0    4.48e+035.45e+03
factor(dept): 81.1e+04 246       44.8  0    1.06e+041.15e+04
factor(dept): 9841       247       3.4  0.001356       1.33e+03
factor(dept): 10-902       246       -3.66 0    -1.38e+03-419       
factor(dept): 11-4.75e+03246       -19.3  0    -5.23e+03-4.26e+03
factor(dept): 12-1.52e+04247       -61.6  0    -1.57e+04-1.47e+04
factor(dept): 131.15e+04246       46.8  0    1.1e+04 1.2e+04 
factor(dept): 14-4.37e+03246       -17.7  0    -4.85e+03-3.89e+03
factor(dept): 16-5e+03       246       -20.3  0    -5.49e+03-4.52e+03
factor(dept): 17-9.13e+03247       -37    0    -9.61e+03-8.64e+03
factor(dept): 18-1.26e+04263       -47.9  0    -1.31e+04-1.21e+04
factor(dept): 19-2.02e+04279       -72.4  0    -2.08e+04-1.97e+04
factor(dept): 20-1.44e+04250       -57.6  0    -1.49e+04-1.39e+04
factor(dept): 21-1.43e+04246       -58.2  0    -1.48e+04-1.38e+04
factor(dept): 22-1.01e+04256       -39.6  0    -1.07e+04-9.65e+03
factor(dept): 233.54e+03253       14    0    3.04e+034.04e+03
factor(dept): 24-1.49e+04258       -57.8  0    -1.54e+04-1.44e+04
factor(dept): 25-1.04e+04247       -42.2  0    -1.09e+04-9.96e+03
factor(dept): 26-1.28e+04254       -50.5  0    -1.33e+04-1.23e+04
factor(dept): 27-1.93e+04255       -75.6  0    -1.98e+04-1.88e+04
factor(dept): 28-1.93e+04250       -77.5  0    -1.98e+04-1.88e+04
factor(dept): 29-1.58e+04259       -61    0    -1.63e+04-1.53e+04
factor(dept): 30-1.74e+04260       -67.1  0    -1.79e+04-1.69e+04
factor(dept): 31-1.77e+04250       -70.6  0    -1.81e+04-1.72e+04
factor(dept): 32-1.34e+04251       -53.2  0    -1.39e+04-1.29e+04
factor(dept): 33-1.48e+04258       -57.4  0    -1.53e+04-1.43e+04
factor(dept): 34-5.75e+03259       -22.2  0    -6.26e+03-5.24e+03
factor(dept): 35-1.86e+04259       -71.7  0    -1.91e+04-1.81e+04
factor(dept): 36-1.95e+04260       -75.2  0    -2e+04       -1.9e+04 
factor(dept): 37-2.23e+04326       -68.4  0    -2.3e+04 -2.17e+04
factor(dept): 384.22e+04246       171    0    4.17e+044.27e+04
factor(dept): 39-2.66e+043.5e+03 -7.62 0    -3.35e+04-1.98e+04
factor(dept): 402.59e+04246       105    0    2.54e+042.64e+04
factor(dept): 41-1.93e+04258       -74.8  0    -1.98e+04-1.88e+04
factor(dept): 42-1.42e+04247       -57.5  0    -1.47e+04-1.37e+04
factor(dept): 43-2.07e+044.04e+03-5.14 0    -2.87e+04-1.28e+04
factor(dept): 44-1.65e+04257       -64    0    -1.7e+04 -1.6e+04 
factor(dept): 45-2.16e+04362       -59.7  0    -2.24e+04-2.09e+04
factor(dept): 46743       246       3.02 0.003260       1.23e+03
factor(dept): 47-2.2e+04 577       -38.2  0    -2.32e+04-2.09e+04
factor(dept): 48-2.18e+04378       -57.8  0    -2.26e+04-2.11e+04
factor(dept): 49-1.43e+04272       -52.5  0    -1.48e+04-1.38e+04
factor(dept): 50-2.15e+04395       -54.5  0    -2.23e+04-2.07e+04
factor(dept): 51-2.12e+04413       -51.2  0    -2.2e+04 -2.04e+04
factor(dept): 52-1.77e+04248       -71.5  0    -1.82e+04-1.73e+04
factor(dept): 54-2.18e+04267       -81.5  0    -2.23e+04-2.13e+04
factor(dept): 55-1e+04       256       -39.2  0    -1.05e+04-9.52e+03
factor(dept): 56-1.65e+04252       -65.4  0    -1.7e+04 -1.6e+04 
factor(dept): 58-1.89e+04274       -69    0    -1.94e+04-1.84e+04
factor(dept): 59-1.92e+04250       -76.8  0    -1.97e+04-1.87e+04
factor(dept): 60-1.87e+04253       -73.9  0    -1.92e+04-1.82e+04
factor(dept): 652.4e+04 1.18e+0320.3  0    2.17e+042.63e+04
factor(dept): 67-1.19e+04246       -48.1  0    -1.23e+04-1.14e+04
factor(dept): 71-1.58e+04259       -60.8  0    -1.63e+04-1.53e+04
factor(dept): 723.09e+04250       123    0    3.04e+043.14e+04
factor(dept): 74-5.45e+03246       -22.1  0    -5.93e+03-4.96e+03
factor(dept): 77-2.48e+041.16e+03-21.5  0    -2.71e+04-2.25e+04
factor(dept): 78-2.31e+04928       -24.9  0    -2.49e+04-2.13e+04
factor(dept): 792.78e+03246       11.3  0    2.29e+033.26e+03
factor(dept): 80-7.57e+03252       -30    0    -8.06e+03-7.07e+03
factor(dept): 81-3.77e+03246       -15.3  0    -4.25e+03-3.29e+03
factor(dept): 82-3.77e+03246       -15.3  0    -4.26e+03-3.29e+03
factor(dept): 83-1.66e+04252       -65.8  0    -1.71e+04-1.61e+04
factor(dept): 85-1.78e+04250       -71.1  0    -1.83e+04-1.73e+04
factor(dept): 87-5.91e+03247       -24    0    -6.4e+03 -5.43e+03
factor(dept): 902.62e+04246       106    0    2.57e+042.67e+04
factor(dept): 911.46e+04246       59.2  0    1.41e+041.51e+04
factor(dept): 925.64e+04246       229    0    5.59e+045.68e+04
factor(dept): 937.22e+03252       28.7  0    6.73e+037.71e+03
factor(dept): 941.38e+04254       54.2  0    1.33e+041.43e+04
factor(dept): 955.1e+04 246       207    0    5.05e+045.15e+04
factor(dept): 96-3.9e+03 266       -14.7  0    -4.42e+03-3.38e+03
factor(dept): 97-5.02e+03248       -20.2  0    -5.5e+03 -4.53e+03
factor(dept): 98-1.3e+04 253       -51.4  0    -1.35e+04-1.25e+04
factor(dept): 99-2.52e+04508       -49.6  0    -2.62e+04-2.42e+04
temperature72.9     2.01    36.2  0    69       76.9     
cpi-25.8     0.634   -40.6  0    -27       -24.5     
unemployment-505       13       -38.9  0    -530       -479       
type: B2.36e+0368.9     34.2  0    2.22e+032.49e+03
type: C3.04e+03116       26.2  0    2.81e+033.27e+03
size0.126   0.001   202    0    0.125   0.128   
season_name: Spring-2.25e+0377.9     -28.9  0    -2.4e+03 -2.1e+03 
season_name: Summer-3.5e+03 103       -34    0    -3.7e+03 -3.29e+03
season_name: Autumn-3.19e+0385.9     -37.2  0    -3.36e+03-3.03e+03
fuel_price-665       54       -12.3  0    -771       -560       
superbowlTRUE-118       158       -0.7520.452-427       190       
christmasTRUE-2.18e+03212       -10.3  0    -2.6e+03 -1.77e+03
laborTRUE-138       158       -0.8750.382-448       171       
thanksgivingTRUE6.36e+03238       26.7  0    5.89e+036.82e+03
mark_down10.024   0.007   3.29 0.0010.01    0.038   
mark_down20.009   0.005   1.83 0.068-0.001   0.019   
mark_down30.067   0.005   13.4  0    0.057   0.076   
mark_down4-0.017   0.01    -1.58 0.114-0.037   0.004   
mark_down50.049   0.006   8.48 0    0.038   0.061   
mosaic::msummary(model_final)
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.00e+04   3.29e+02   30.47  < 2e-16 ***
## factor(dept)2      2.46e+04   2.46e+02   99.65  < 2e-16 ***
## factor(dept)3     -7.46e+03   2.46e+02  -30.30  < 2e-16 ***
## factor(dept)4      6.81e+03   2.46e+02   27.63  < 2e-16 ***
## factor(dept)5      2.00e+03   2.47e+02    8.09  5.8e-16 ***
## factor(dept)6     -1.54e+04   2.51e+02  -61.20  < 2e-16 ***
## factor(dept)7      4.97e+03   2.46e+02   20.16  < 2e-16 ***
## factor(dept)8      1.10e+04   2.46e+02   44.85  < 2e-16 ***
## factor(dept)9      8.41e+02   2.47e+02    3.40  0.00067 ***
## factor(dept)10    -9.02e+02   2.46e+02   -3.66  0.00025 ***
## factor(dept)11    -4.75e+03   2.46e+02  -19.27  < 2e-16 ***
## factor(dept)12    -1.52e+04   2.47e+02  -61.61  < 2e-16 ***
## factor(dept)13     1.15e+04   2.46e+02   46.79  < 2e-16 ***
## factor(dept)14    -4.37e+03   2.46e+02  -17.74  < 2e-16 ***
## factor(dept)16    -5.00e+03   2.46e+02  -20.31  < 2e-16 ***
## factor(dept)17    -9.13e+03   2.47e+02  -37.02  < 2e-16 ***
## factor(dept)18    -1.26e+04   2.63e+02  -47.86  < 2e-16 ***
## factor(dept)19    -2.02e+04   2.79e+02  -72.39  < 2e-16 ***
## factor(dept)20    -1.44e+04   2.50e+02  -57.57  < 2e-16 ***
## factor(dept)21    -1.43e+04   2.46e+02  -58.17  < 2e-16 ***
## factor(dept)22    -1.01e+04   2.56e+02  -39.61  < 2e-16 ***
## factor(dept)23     3.54e+03   2.53e+02   13.97  < 2e-16 ***
## factor(dept)24    -1.49e+04   2.58e+02  -57.77  < 2e-16 ***
## factor(dept)25    -1.04e+04   2.47e+02  -42.25  < 2e-16 ***
## factor(dept)26    -1.28e+04   2.54e+02  -50.49  < 2e-16 ***
## factor(dept)27    -1.93e+04   2.55e+02  -75.58  < 2e-16 ***
## factor(dept)28    -1.93e+04   2.50e+02  -77.48  < 2e-16 ***
## factor(dept)29    -1.58e+04   2.59e+02  -61.04  < 2e-16 ***
## factor(dept)30    -1.74e+04   2.60e+02  -67.10  < 2e-16 ***
## factor(dept)31    -1.77e+04   2.50e+02  -70.60  < 2e-16 ***
## factor(dept)32    -1.34e+04   2.51e+02  -53.21  < 2e-16 ***
## factor(dept)33    -1.48e+04   2.58e+02  -57.40  < 2e-16 ***
## factor(dept)34    -5.75e+03   2.59e+02  -22.19  < 2e-16 ***
## factor(dept)35    -1.86e+04   2.59e+02  -71.71  < 2e-16 ***
## factor(dept)36    -1.95e+04   2.60e+02  -75.21  < 2e-16 ***
## factor(dept)37    -2.23e+04   3.26e+02  -68.44  < 2e-16 ***
## factor(dept)38     4.22e+04   2.46e+02  171.20  < 2e-16 ***
## factor(dept)39    -2.66e+04   3.50e+03   -7.62  2.6e-14 ***
## factor(dept)40     2.59e+04   2.46e+02  105.07  < 2e-16 ***
## factor(dept)41    -1.93e+04   2.58e+02  -74.82  < 2e-16 ***
## factor(dept)42    -1.42e+04   2.47e+02  -57.49  < 2e-16 ***
## factor(dept)43    -2.07e+04   4.04e+03   -5.14  2.8e-07 ***
## factor(dept)44    -1.65e+04   2.57e+02  -64.05  < 2e-16 ***
## factor(dept)45    -2.16e+04   3.62e+02  -59.71  < 2e-16 ***
## factor(dept)46     7.43e+02   2.46e+02    3.02  0.00256 ** 
## factor(dept)47    -2.20e+04   5.77e+02  -38.18  < 2e-16 ***
## factor(dept)48    -2.18e+04   3.78e+02  -57.79  < 2e-16 ***
## factor(dept)49    -1.43e+04   2.72e+02  -52.50  < 2e-16 ***
## factor(dept)50    -2.15e+04   3.95e+02  -54.46  < 2e-16 ***
## factor(dept)51    -2.12e+04   4.13e+02  -51.23  < 2e-16 ***
## factor(dept)52    -1.77e+04   2.48e+02  -71.51  < 2e-16 ***
## factor(dept)54    -2.18e+04   2.67e+02  -81.52  < 2e-16 ***
## factor(dept)55    -1.00e+04   2.56e+02  -39.19  < 2e-16 ***
## factor(dept)56    -1.65e+04   2.52e+02  -65.40  < 2e-16 ***
## factor(dept)58    -1.89e+04   2.74e+02  -69.05  < 2e-16 ***
## factor(dept)59    -1.92e+04   2.50e+02  -76.81  < 2e-16 ***
## factor(dept)60    -1.87e+04   2.53e+02  -73.95  < 2e-16 ***
## factor(dept)65     2.40e+04   1.18e+03   20.29  < 2e-16 ***
## factor(dept)67    -1.19e+04   2.46e+02  -48.11  < 2e-16 ***
## factor(dept)71    -1.58e+04   2.59e+02  -60.77  < 2e-16 ***
## factor(dept)72     3.09e+04   2.50e+02  123.32  < 2e-16 ***
## factor(dept)74    -5.45e+03   2.46e+02  -22.11  < 2e-16 ***
## factor(dept)77    -2.48e+04   1.16e+03  -21.45  < 2e-16 ***
## factor(dept)78    -2.31e+04   9.28e+02  -24.86  < 2e-16 ***
## factor(dept)79     2.78e+03   2.46e+02   11.27  < 2e-16 ***
## factor(dept)80    -7.57e+03   2.52e+02  -30.03  < 2e-16 ***
## factor(dept)81    -3.77e+03   2.46e+02  -15.30  < 2e-16 ***
## factor(dept)82    -3.77e+03   2.46e+02  -15.32  < 2e-16 ***
## factor(dept)83    -1.66e+04   2.52e+02  -65.82  < 2e-16 ***
## factor(dept)85    -1.78e+04   2.50e+02  -71.14  < 2e-16 ***
## factor(dept)87    -5.91e+03   2.47e+02  -23.95  < 2e-16 ***
## factor(dept)90     2.62e+04   2.46e+02  106.26  < 2e-16 ***
## factor(dept)91     1.46e+04   2.46e+02   59.17  < 2e-16 ***
## factor(dept)92     5.64e+04   2.46e+02  228.78  < 2e-16 ***
## factor(dept)93     7.22e+03   2.52e+02   28.68  < 2e-16 ***
## factor(dept)94     1.38e+04   2.54e+02   54.24  < 2e-16 ***
## factor(dept)95     5.10e+04   2.46e+02  206.90  < 2e-16 ***
## factor(dept)96    -3.90e+03   2.66e+02  -14.68  < 2e-16 ***
## factor(dept)97    -5.02e+03   2.48e+02  -20.24  < 2e-16 ***
## factor(dept)98    -1.30e+04   2.53e+02  -51.42  < 2e-16 ***
## factor(dept)99    -2.52e+04   5.08e+02  -49.59  < 2e-16 ***
## temperature        7.29e+01   2.01e+00   36.20  < 2e-16 ***
## cpi               -2.58e+01   6.34e-01  -40.60  < 2e-16 ***
## unemployment      -5.05e+02   1.30e+01  -38.88  < 2e-16 ***
## typeB              2.36e+03   6.89e+01   34.25  < 2e-16 ***
## typeC              3.04e+03   1.16e+02   26.17  < 2e-16 ***
## size               1.26e-01   6.25e-04  201.95  < 2e-16 ***
## season_nameSpring -2.25e+03   7.79e+01  -28.86  < 2e-16 ***
## season_nameSummer -3.50e+03   1.03e+02  -34.02  < 2e-16 ***
## season_nameAutumn -3.19e+03   8.59e+01  -37.21  < 2e-16 ***
## fuel_price        -6.65e+02   5.40e+01  -12.32  < 2e-16 ***
## superbowlTRUE     -1.18e+02   1.58e+02   -0.75  0.45224    
## christmasTRUE     -2.18e+03   2.12e+02  -10.30  < 2e-16 ***
## laborTRUE         -1.38e+02   1.58e+02   -0.88  0.38155    
## thanksgivingTRUE   6.36e+03   2.38e+02   26.74  < 2e-16 ***
## mark_down1         2.39e-02   7.26e-03    3.29  0.00101 ** 
## mark_down2         9.18e-03   5.02e-03    1.83  0.06761 .  
## mark_down3         6.67e-02   4.96e-03   13.44  < 2e-16 ***
## mark_down4        -1.65e-02   1.05e-02   -1.58  0.11383    
## mark_down5         4.95e-02   5.83e-03    8.48  < 2e-16 ***
## 
## Residual standard error: 14000 on 421470 degrees of freedom
## Multiple R-squared:  0.627,  Adjusted R-squared:  0.627 
## F-statistic: 7.15e+03 on 99 and 421470 DF,  p-value: <2e-16
vif(model_final) 
##              GVIF Df GVIF^(1/(2*Df))
## factor(dept) 1.07 80            1.00
## temperature  2.98  1            1.73
## cpi          1.33  1            1.15
## unemployment 1.26  1            1.12
## type         3.38  2            1.36
## size         3.14  1            1.77
## season_name  3.60  3            1.24
## fuel_price   1.32  1            1.15
## superbowl    1.11  1            1.05
## christmas    1.34  1            1.16
## labor        1.11  1            1.05
## thanksgiving 1.70  1            1.30
## mark_down1   4.16  1            2.04
## mark_down2   1.41  1            1.19
## mark_down3   1.63  1            1.28
## mark_down4   3.58  1            1.89
## mark_down5   1.30  1            1.14

We arrive at our final model with an adjusted R square of 62.7%, i.e around 63% of the variability in weekly sales is explained by the explanatory variables incorporated in the final model. Moreover, the colinearity problem has been resolved as VIF coefficient for all variables is less than 5.

Checking for assumption of Linear Regression

library(ggfortify)
autoplot(model_final) +
theme_bw()

Residuals vs. Fitted: Residuals seem no to follow any patterns

Normal Q-Q: Apart from the two ends of the line residuals seem no to deviate from straight line

Scale-Location: The clear trend is not recognizable.

Residuals vs. Leverage: There are some outliers that could lead to biased estimated of the model.

In conclusion, given more time, more analytics should be done in order to assess whether the model meets the assumptions of the linear regression.

Comparing all models

huxreg(model1, model2, 
       model3, model_final,
       statistics = c('#observations' = 'nobs', 
                      'R squared' = 'r.squared', 
                      'Adj. R Squared' = 'adj.r.squared', 
                      'Residual SE' = 'sigma'), 
       bold_signif = 0.05, 
       stars = NULL
) %>% 
  set_caption('Comparison of models')
Comparison of models
(1)(2)(3)(4)
(Intercept)26402.925 261.270 30095.562 10009.413 
(3279.520)(595.617)(2608.255)(328.500)
store-142.187 -137.237           
(3.103)(3.145)          
dept111.041 111.046           
(1.103)(1.102)          
date-1.376                
(0.237)               
is_holidayTRUE651.533 228.658           
(141.668)(145.850)          
temperature15.872 44.780 4.364 72.914 
(2.012)(3.174)(2.879)(2.014)
fuel_price50.519 2550.268 -433.504 -665.377 
(130.229)(181.123)(80.912)(54.001)
mark_down10.028 0.071 0.005 0.024 
(0.012)(0.012)(0.007)(0.007)
mark_down2-0.000 -0.012 -0.029 0.009 
(0.007)(0.007)(0.005)(0.005)
mark_down30.117 0.134 0.043 0.067 
(0.006)(0.006)(0.005)(0.005)
mark_down4-0.002 -0.037 -0.038 -0.017 
(0.016)(0.016)(0.010)(0.010)
mark_down50.106 0.104 0.034 0.049 
(0.009)(0.009)(0.006)(0.006)
cpi-25.887 -22.842 7.969 -25.752 
(0.991)(1.070)(12.172)(0.634)
unemployment-262.948 -410.937 -371.165 -504.709 
(20.625)(21.125)(45.561)(12.981)
typeB-347.400 -313.618      2359.473 
(108.415)(109.668)     (68.899)
typeC5812.229 5712.684      3039.403 
(185.008)(185.120)     (116.126)
size0.098 0.097      0.126 
(0.001)(0.001)     (0.001)
year2011     -2664.309           
     (163.037)          
year2012     -4095.732           
     (200.194)          
season_nameSpring     -2467.574 -1247.136 -2249.424 
     (128.969)(84.012)(77.933)
season_nameSummer     -2693.031 -1028.243 -3495.199 
     (161.091)(127.646)(102.752)
season_nameAutumn     -2762.360 -1806.772 -3194.682 
     (133.147)(95.553)(85.862)
factor(dept)2          24550.460 24550.460 
          (235.662)(246.364)
factor(dept)3          -7464.951 -7464.951 
          (235.662)(246.364)
factor(dept)4          6807.887 6807.887 
          (235.662)(246.364)
factor(dept)5          2044.060 2000.986 
          (236.483)(247.218)
factor(dept)6          -15369.094 -15360.873 
          (240.112)(250.977)
factor(dept)7          4966.376 4966.376 
          (235.662)(246.364)
factor(dept)8          11048.673 11048.673 
          (235.662)(246.364)
factor(dept)9          836.523 840.894 
          (236.417)(247.151)
factor(dept)10          -901.882 -901.882 
          (235.662)(246.364)
factor(dept)11          -4746.484 -4746.689 
          (235.671)(246.374)
factor(dept)12          -15198.006 -15195.109 
          (235.901)(246.614)
factor(dept)13          11528.060 11528.060 
          (235.662)(246.364)
factor(dept)14          -4369.432 -4369.432 
          (235.662)(246.364)
factor(dept)16          -5003.584 -5003.584 
          (235.662)(246.364)
factor(dept)17          -9129.862 -9127.402 
          (235.874)(246.585)
factor(dept)18          -12651.841 -12594.189 
          (251.705)(263.127)
factor(dept)19          -20556.063 -20204.112 
          (267.134)(279.083)
factor(dept)20          -14423.773 -14419.751 
          (239.601)(250.481)
factor(dept)21          -14330.502 -14330.502 
          (235.662)(246.364)
factor(dept)22          -10176.644 -10148.775 
          (245.105)(256.215)
factor(dept)23          3556.142 3540.483 
          (242.405)(253.380)
factor(dept)24          -14917.200 -14904.947 
          (246.817)(257.996)
factor(dept)25          -10459.859 -10441.416 
          (236.416)(247.151)
factor(dept)26          -12783.636 -12815.385 
          (242.814)(253.838)
factor(dept)27          -19310.808 -19292.940 
          (244.203)(255.251)
factor(dept)28          -19363.452 -19332.807 
          (238.697)(249.508)
factor(dept)29          -15874.748 -15830.505 
          (248.095)(259.327)
factor(dept)30          -17464.132 -17417.897 
          (248.321)(259.564)
factor(dept)31          -17716.658 -17657.853 
          (239.249)(250.095)
factor(dept)32          -13329.371 -13371.186 
          (240.422)(251.304)
factor(dept)33          -14840.893 -14821.944 
          (247.023)(258.209)
factor(dept)34          -5788.604 -5748.264 
          (247.856)(259.080)
factor(dept)35          -18651.178 -18607.912 
          (248.241)(259.481)
factor(dept)36          -19562.499 -19516.895 
          (248.267)(259.508)
factor(dept)37          -22578.155 -22339.107 
          (312.644)(326.403)
factor(dept)38          42176.789 42176.789 
          (235.662)(246.364)
factor(dept)39          -27324.214 -26638.886 
          (3347.119)(3498.144)
factor(dept)40          25886.272 25886.272 
          (235.662)(246.364)
factor(dept)41          -19319.282 -19316.990 
          (246.980)(258.172)
factor(dept)42          -14191.136 -14181.353 
          (235.975)(246.691)
factor(dept)43          -18503.328 -20749.800 
          (3863.258)(4038.031)
factor(dept)44          -16510.076 -16469.636 
          (245.993)(257.136)
factor(dept)45          -22393.035 -21643.719 
          (346.975)(362.485)
factor(dept)46          743.113 743.113 
          (235.662)(246.364)
factor(dept)47          -22697.506 -22028.384 
          (551.932)(576.924)
factor(dept)48          -23895.165 -21828.941 
          (362.239)(377.759)
factor(dept)49          -14429.437 -14285.038 
          (260.346)(272.100)
factor(dept)50          -23445.327 -21499.487 
          (378.698)(394.774)
factor(dept)51          -21474.110 -21175.914 
          (395.681)(413.342)
factor(dept)52          -17744.784 -17736.132 
          (237.248)(248.011)
factor(dept)54          -22002.839 -21788.399 
          (255.701)(267.280)
factor(dept)55          -10005.113 -10016.155 
          (244.486)(255.588)
factor(dept)56          -16538.714 -16497.927 
          (241.312)(252.246)
factor(dept)58          -19339.767 -18907.165 
          (262.032)(273.836)
factor(dept)59          -19186.577 -19176.543 
          (238.834)(249.673)
factor(dept)60          -18873.231 -18734.786 
          (242.374)(253.360)
factor(dept)65          27676.312 23993.362 
          (1137.862)(1182.307)
factor(dept)67          -11853.442 -11853.442 
          (235.662)(246.364)
factor(dept)71          -15805.499 -15758.581 
          (248.081)(259.314)
factor(dept)72          30840.713 30872.114 
          (239.494)(250.332)
factor(dept)74          -5447.499 -5447.707 
          (235.708)(246.412)
factor(dept)77          -24641.477 -24808.642 
          (1106.216)(1156.378)
factor(dept)78          -23238.203 -23081.829 
          (888.087)(928.339)
factor(dept)79          2775.514 2775.514 
          (235.662)(246.364)
factor(dept)80          -7734.455 -7568.909 
          (241.100)(252.014)
factor(dept)81          -3769.705 -3769.705 
          (235.662)(246.364)
factor(dept)82          -3774.982 -3774.982 
          (235.662)(246.364)
factor(dept)83          -16672.741 -16576.438 
          (240.934)(251.828)
factor(dept)85          -17793.930 -17801.316 
          (239.400)(250.236)
factor(dept)87          -5895.587 -5911.375 
          (236.068)(246.788)
factor(dept)90          26178.239 26178.239 
          (235.662)(246.364)
factor(dept)91          14577.031 14577.031 
          (235.662)(246.364)
factor(dept)92          56363.489 56363.489 
          (235.662)(246.364)
factor(dept)93          7125.448 7220.329 
          (240.880)(251.772)
factor(dept)94          13801.812 13798.958 
          (243.347)(254.388)
factor(dept)95          50972.276 50972.276 
          (235.662)(246.364)
factor(dept)96          -3796.627 -3900.554 
          (254.366)(265.751)
factor(dept)97          -5173.814 -5017.316 
          (237.150)(247.902)
factor(dept)98          -13055.080 -12989.591 
          (241.687)(252.634)
factor(dept)99          -25141.474 -25179.899 
          (485.755)(507.748)
factor(store)2          5190.972      
          (186.999)     
factor(store)3          -16678.397      
          (198.822)     
factor(store)4          7615.901      
          (1103.753)     
factor(store)5          -18403.192      
          (202.609)     
factor(store)6          -254.896      
          (192.025)     
factor(store)7          -13472.387      
          (334.200)     
factor(store)8          -9646.926      
          (201.684)     
factor(store)9          -14803.724      
          (207.568)     
factor(store)10          6038.412      
          (1089.953)     
factor(store)11          -3014.971      
          (192.191)     
factor(store)12          -4562.588      
          (1062.030)     
factor(store)13          6596.793      
          (1094.199)     
factor(store)14          7697.932      
          (411.974)     
factor(store)15          -12214.417      
          (1015.861)     
factor(store)16          -14900.642      
          (352.787)     
factor(store)17          -8631.184      
          (1097.539)     
factor(store)18          -5127.144      
          (1001.247)     
factor(store)19          -505.983      
          (1019.108)     
factor(store)20          8131.028      
          (216.037)     
factor(store)21          -11205.881      
          (190.139)     
factor(store)22          -6264.581      
          (964.415)     
factor(store)23          -2309.614      
          (1057.018)     
factor(store)24          -1642.241      
          (1013.883)     
factor(store)25          -11771.681      
          (218.384)     
factor(store)26          -6955.942      
          (1012.793)     
factor(store)27          4028.338      
          (972.613)     
factor(store)28          -433.747      
          (1061.787)     
factor(store)29          -12916.640      
          (990.678)     
factor(store)30          -17902.910      
          (206.739)     
factor(store)31          -2312.131      
          (187.546)     
factor(store)32          -4900.433      
          (327.155)     
factor(store)33          -20634.136      
          (1087.104)     
factor(store)34          -7410.359      
          (1053.410)     
factor(store)35          -7582.403      
          (955.137)     
factor(store)36          -19384.388      
          (216.124)     
factor(store)37          -16254.347      
          (206.692)     
factor(store)38          -16088.749      
          (1057.228)     
factor(store)39          -1495.484      
          (192.578)     
factor(store)40          -8567.425      
          (1057.185)     
factor(store)41          -4014.737      
          (343.959)     
factor(store)42          -13896.678      
          (1087.644)     
factor(store)43          -13058.958      
          (240.883)     
factor(store)44          -20335.066      
          (1093.721)     
factor(store)45          -10128.505      
          (410.300)     
superbowlTRUE          -396.348 -118.404 
          (150.922)(157.516)
christmasTRUE          -1707.703 -2180.540 
          (202.575)(211.608)
laborTRUE          674.462 -138.230 
          (153.240)(157.968)
thanksgivingTRUE          6071.727 6356.345 
          (229.451)(237.744)
#observations421570     421570     421570     421570     
R squared0.090 0.091 0.659 0.627 
Adj. R Squared0.090 0.091 0.658 0.627 
Residual SE21821.319 21803.689 13367.442 13974.515 

Question 1

On average, change in one unit of temperature is associated with an increase of weekly sales by $72.9. Whereas, the change in fuel price by $1 is associated with decrease in weekly sales of $665.

Question 2

Based on estimated statistics, the impact of both Labor Day and Super Bowl weeks is not different from zero. Therefore, there is no significant difference in weekly sales during those weeks and non-holiday weeks. Probably, our retailer is not selling product widely purchased during Super Bowl and Labor Day.