Module #7 Assignment: Visualizing Distributions in R
# Load required libraries
# -------------------------------
library(ggplot2)
# -------------------------------
# Step 1: Load and Inspect Dataset
# -------------------------------
data("mtcars")
# View first few rows
head(mtcars)
# Check structure
str(mtcars)
# -------------------------------
# Step 2: Histogram of MPG
# -------------------------------
hist_mpg <- ggplot(mtcars, aes(x = mpg)) +
geom_histogram(binwidth = 2, fill = "steelblue", color = "black") +
labs(title = "Distribution of Miles per Gallon (mpg)",
x = "Miles per Gallon",
y = "Count") +
theme_minimal()
# Display plot
print(hist_mpg)
# Save plot as image
ggsave("hist_mpg.png", plot = hist_mpg, width = 6, height = 4, dpi = 300)
# -------------------------------
# Step 3: Density Plot of Horsepower by Cylinder
# -------------------------------
density_hp <- ggplot(mtcars, aes(x = hp, fill = factor(cyl))) +
geom_density(alpha = 0.5) +
labs(title = "Density of Horsepower by Cylinder Count",
x = "Horsepower",
y = "Density",
fill = "Cylinders") +
theme_minimal()
# Display plot
print(density_hp)
# Save plot as image
ggsave("density_hp.png", plot = density_hp, width = 6, height = 4, dpi = 300)
# -------------------------------
# Step 4: Faceted Scatter Plot of MPG vs Horsepower
# -------------------------------
scatter_mpg_hp <- ggplot(mtcars, aes(x = hp, y = mpg)) +
geom_point(color = "darkgreen") +
facet_wrap(~cyl) +
labs(title = "MPG vs Horsepower by Cylinder Count",
x = "Horsepower",
y = "Miles per Gallon") +
theme_minimal()
# Display plot
print(scatter_mpg_hp)
# Save plot as image
ggsave("scatter_mpg_hp.png", plot = scatter_mpg_hp, width = 6, height = 4, dpi = 300)
Results:
> # Load required libraries
> # -------------------------------
> library(ggplot2)
>
> # -------------------------------
> # Step 1: Load and Inspect Dataset
> # -------------------------------
> data("mtcars")
>
> # View first few rows
> head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
>
> # Check structure
> str(mtcars)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
>
> # -------------------------------
> # Step 2: Histogram of MPG
> # -------------------------------
> hist_mpg <- ggplot(mtcars, aes(x = mpg)) +
+ geom_histogram(binwidth = 2, fill = "steelblue", color = "black") +
+ labs(title = "Distribution of Miles per Gallon (mpg)",
+ x = "Miles per Gallon",
+ y = "Count") +
+ theme_minimal()
>
> # Display plot
> print(hist_mpg)
>
> # Save plot as image
> ggsave("hist_mpg.png", plot = hist_mpg, width = 6, height = 4, dpi = 300)
>
> # -------------------------------
> # Step 3: Density Plot of Horsepower by Cylinder
> # -------------------------------
> density_hp <- ggplot(mtcars, aes(x = hp, fill = factor(cyl))) +
+ geom_density(alpha = 0.5) +
+ labs(title = "Density of Horsepower by Cylinder Count",
+ x = "Horsepower",
+ y = "Density",
+ fill = "Cylinders") +
+ theme_minimal()
>
> # Display plot
> print(density_hp)
>
> # Save plot as image
> ggsave("density_hp.png", plot = density_hp, width = 6, height = 4, dpi = 300)
>
> # -------------------------------
> # Step 4: Faceted Scatter Plot of MPG vs Horsepower
> # -------------------------------
> scatter_mpg_hp <- ggplot(mtcars, aes(x = hp, y = mpg)) +
+ geom_point(color = "darkgreen") +
+ facet_wrap(~cyl) +
+ labs(title = "MPG vs Horsepower by Cylinder Count",
+ x = "Horsepower",
+ y = "Miles per Gallon") +
+ theme_minimal()
>
> # Display plot
> print(scatter_mpg_hp)
>
> # Save plot as image
> ggsave("scatter_mpg_hp.png", plot = scatter_mpg_hp, width = 6, height = 4, dpi = 300)
| |
|
For this assignment, I used the mtcars dataset to explore the distributions of miles per gallon (mpg) and horsepower (hp). The histogram of mpg showed that most cars cluster around 15–25 mpg, while a few cars exceed 30 mpg. The density plot of horsepower grouped by cylinder count revealed that cars with more cylinders tend to have higher horsepower, with overlapping distributions for 4- and 6-cylinder cars. The faceted scatter plots allowed me to compare mpg vs horsepower across cylinder groups, highlighting how engine size impacts fuel efficiency.
In designing these visualizations, I followed Few’s and Yau’s recommendations by using aligned axes for comparisons, meaningful color to differentiate groups, and clean grid lines to support interpretation. I avoided unnecessary visual embellishments, such as 3D effects or distracting backgrounds. I agree that many common visualizations fail to communicate the structure of distributions clearly, and by focusing on clarity and comparison, these plots effectively reveal patterns in the data.
Comments
Post a Comment