Presentation Exercise

Author

Antonio Flores

Recreating Chart with AI Prompts

This is the chart we will attempt to recreate. It comes from a Gapminder article regarding Life Expectancy

The data for this chart was easily found on their site.

Prompts for generating chart/code from AI

I will be using ChatGPT-4 for this exercise.

Unfortunately I hit some kind of wall here. Apparently there’s a limit to the amount of Data Analysis prompts you can issue on the free version of ChatGPT.

Code from Chatgpt

This will not run because of incorrect y variable name

# Load necessary library
#library(ggplot2)

# Load the data
#data <- read.csv('Life_expectancy.csv')

# Create the plot
#ggplot(data, aes(x = time, y = Life.expectancy.)) +
 # geom_line(color = 'black') +
  #geom_ribbon(aes(ymin = 0, ymax = Life.expectancy.), fill = 'black', alpha = 0.1) +
  #labs(title = 'AVERAGE LIFE EXPECTANCY FROM 1800 TO TODAY', x = 'Year', y = 'Life Expectancy') +
  #annotate("text", x = 1845, y = 30, label = "Famine", size = 4, hjust = 0) +
  #annotate("text", x = 1918, y = 26, label = "Spanish flu", size = 4, hjust = 0) +
  #annotate("text", x = 1945, y = 47, label = "World War II", size = 4, hjust = 0) +
  #geom_point(aes(x = 1800, y = 31), color = 'black', size = 3) +
  #geom_point(aes(x = 2017, y = 72), color = 'black', size = 3) +
  #annotate("text", x = 1800, y = 31, label = "1800\n31 years", hjust = -0.2, vjust = 1.5, size = 4) +
  #annotate("text", x = 2017, y = 72, label = "2017\n72 years", hjust = 1.2, vjust = -0.5, size = 4) +
  #theme_minimal()

If we correct the variable name and verify that the data will be read in correctly, we get the below plot.

 #Load necessary library
library(here)
here() starts at C:/Users/Client/Documents/antonioflores-P2-portfolio
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.3.3
data_location <-here("presentation-exercise","Life_expectancy.csv")
data <- read.csv(data_location)

# Create the plot
ggplot(data, aes(x = time, y = Life.expectancy)) +
  geom_line(color = 'black') +
 geom_ribbon(aes(ymin = 0, ymax = Life.expectancy), fill = 'black', alpha = 0.1) +
 labs(title = 'AVERAGE LIFE EXPECTANCY FROM 1800 TO TODAY', x = 'Year', y = 'Life Expectancy') +
 annotate("text", x = 1845, y = 30, label = "Famine", size = 4, hjust = 0) +
 annotate("text", x = 1918, y = 26, label = "Spanish flu", size = 4, hjust = 0) +
 annotate("text", x = 1945, y = 47, label = "World War II", size = 4, hjust = 0) +
 geom_point(aes(x = 1800, y = 31), color = 'black', size = 3) +
 geom_point(aes(x = 2017, y = 72), color = 'black', size = 3) +
 annotate("text", x = 1800, y = 31, label = "1800\n31 years", hjust = -0.2, vjust = 1.5, size = 4) +
 annotate("text", x = 2017, y = 72, label = "2017\n72 years", hjust = 1.2, vjust = -0.5, size = 4) +
 theme_minimal()
Warning in geom_point(aes(x = 1800, y = 31), color = "black", size = 3): All aesthetics have length 1, but the data has 301 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_point(aes(x = 2017, y = 72), color = "black", size = 3): All aesthetics have length 1, but the data has 301 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

My edits

The main changes:
- Small variables name fix
- Rotating annotations
- A few cosmetic fixes

# Load necessary library
library(ggplot2)
library(here)
library(tidyr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Load the data
data <- read.csv(data_location)
#a little cleaning
data1 = data %>% 
  filter(time < 2018)
# Create the plot
ggplot(data1, aes(x = time, y = Life.expectancy)) +
  geom_line(color = 'black', size=.75) +
  geom_ribbon(aes(ymin = 0, ymax = Life.expectancy), fill = 'black', alpha = 0.1) +
  labs(title = 'AVERAGE LIFE EXPECTANCY FROM 1800 TO TODAY', x = 'Year', y = 'Life Expectancy') +
  annotate("text", x = 1877, y = 35, label = "Famine", size = 4, hjust = 0, angle = 90) +
  annotate("text", x = 1918, y = 40, label = "Spanish flu", size = 4, hjust = 0, angle = 90) +
  annotate("text", x = 1941, y = 50, label = "World War II", size = 4, hjust = 0, angle = 90) +
  geom_point(aes(x = 1800, y = 31), color = 'black', size = 3) +
  geom_point(aes(x = 2017, y = 72), color = 'black', size = 3) +
  annotate("text", x = 1806, y = 43, label = "1800", vjust = 1.5, size = 4) +
  annotate("text", x = 2011, y = 75, label = "2017", hjust = 1.2, vjust = -0.5, size = 4) +
  annotate("text", x = 2017, y = 70, label = "72 years", hjust = 1.2, vjust = -0.5, size = 6) +
  annotate("text", x = 1817, y = 40, label = "31 years", vjust = 1.5, size = 6) +

  theme_minimal()+
  theme(panel.grid = element_blank(), axis.line = element_line(colour = "black"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning in geom_point(aes(x = 1800, y = 31), color = "black", size = 3): All aesthetics have length 1, but the data has 218 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_point(aes(x = 2017, y = 72), color = "black", size = 3): All aesthetics have length 1, but the data has 218 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

Attaching for final comparison

Creating Table

The prompt for this code was fairly straightforward

Initial Code from Chat-GPT

I just changed the data reading code so it could run properly

#install.packages("knitr")
library(knitr)
Warning: package 'knitr' was built under R version 4.3.3
# Sample data
data <- read.csv(data_location)

# Create a professional-looking table
kable(data[1:10, ], caption = "Sample Life Expectancy Data")
Sample Life Expectancy Data
geo name time Life.expectancy
world World 1800 30.64173
world World 1801 30.71239
world World 1802 30.60052
world World 1803 30.27759
world World 1804 30.19749
world World 1805 30.78082
world World 1806 30.79082
world World 1807 30.73985
world World 1808 30.70976
world World 1809 30.73241

This code will not run because it using an old method

#install.packages("gt")

#library(gt)

# Sample data
#data_location <- here("presentation-exercise","Life_expectancy.csv")
#data <- read.csv(data_location)


# Create a professional-looking table with gt
#gt_table <- gt(data[1:10, ]) %>%
 # tab_header(
  #  title = "Sample Life Expectancy Data",
   # subtitle = "First 10 rows of the dataset"
  #) %>%
  #fmt_number(
   # columns = vars(`Life expectancy `),
    #decimals = 2
#  ) %>%
  #tab_options(
   # table.font.names = "Arial",
    #table.border.top.width = px(2),
    #table.border.bottom.width = px(2)
#  )

# Display the table
#print(gt_table)

My edits

#install.packages("gt")

library(gt)
Warning: package 'gt' was built under R version 4.3.3
# Sample data
data <- read.csv('Life_expectancy.csv')

# Create a professional-looking table with gt
gt_table <- gt(data[1:10,3:4 ]) %>%
   cols_label(
    Life.expectancy = "Life Expectancy",
    time = "Year"
  ) %>% 
   tab_header(
    title = "Sample Life Expectancy Data",
    subtitle = "First 10 rows of the dataset"
  ) %>%
  fmt_number(
    columns = c("Life.expectancy"),
    decimals = 2
  ) %>%
  tab_options(
    table.font.names = "Arial",
    table.border.top.width = px(2),
    table.border.bottom.width = px(2)
  )

# Display the table
gt_table
Sample Life Expectancy Data
First 10 rows of the dataset
Year Life Expectancy
1800 30.64
1801 30.71
1802 30.60
1803 30.28
1804 30.20
1805 30.78
1806 30.79
1807 30.74
1808 30.71
1809 30.73