Download All Years of the SCF Locally

Download SCF data to your device using R.

Version 1.10

This script will download all years of the United States Federal Reserve’s Survey of Consumer Finances data set. This post walks through this process with explanations. This script builds on earlier work by Anthony Damico.

Setting Up Your Session

I start every R script with the same block of code. It ensures that your script runs from a clean memory, seeks and writes files to a project-dedicated folder, and sets a random seed so that others can replicate my scripts. A random seed is a list of numbers that an analyst can use when they need random numbers to perform operations. Two analyses that use the same seed will use the same sequence of “random” numbers whenever they perform a analytical operation that requires random numbers, which makes it possible to replicate analyses.

Do not forget to substitute your working directory path in this chunk, and make sure that it has a “Data” subdirectory:

# Clear the memory
rm(list=ls())
gc()

# Set the directory
directory <- "<ADD YOUR WORKING DIRECTORY PATH HERE>"
data_directory <- paste0(directory, "/Data")
setwd(directory)

# Set seed
set.seed(123)

# Packages for this script:
library(httr)
library(haven)
library(survey)

# Minimum and Maximum year of SCF survey to download
min_year = 1989
max_year = 2022

# Clear the memory

rm(list=ls())

gc()

# Set the directory

directory <- "<ADD YOUR WORKING DIRECTORY PATH HERE>"

data_directory <- paste0(directory, "/Data")

setwd(directory)

# Set seed

set.seed(123)

# Packages for this script:

library(httr)

library(haven)

library(survey)

# Minimum and Maximum year of SCF survey to download

min_year = 1989

max_year = 2022

Download the Data

First, we will download the data from the Federal Reserve servers. I am going to download the SAS format data, because the SAS data sets have converted all years’ values to 2022 real dollars. I copied the link addresses of the data sets and tried to discern a pattern in the way that the data files were named. Recall that it is a triennial survey.

setwd(data_directory)

# Function to check if a URL exists
check_url_existence <- function(url) {
  response <- HEAD(url)
  return(status_code(response) == 200)
}

# Function to download the file
download_file <- function(url, dest_file) {
  response <- GET(url, write_disk(dest_file, overwrite = TRUE))
  if (status_code(response) == 200) {
    print(paste("Downloaded file for year:", dest_file))
  } else {
    print(paste("Failed to download file for year:", dest_file))
  }
}

# Loop for all years
for (year in seq(min_year, max_year, by = 3)) {
  # Handle different main data files and the replicate weight files with specific year formats
  for (suffix in c("s", "rw1s")) {
    # Different file suffix for replicate weight files before 2001
    file_suffix <- ifelse(year < 2001 && suffix == "rw1s", sprintf("%02d", year %% 100), year)

    # Determine the correct URL and destination file name
    zip_url <- if (suffix == "s") {
      paste0("https://www.federalreserve.gov/econres/files/scfp", year, suffix, ".zip")
    } else {
      paste0("https://www.federalreserve.gov/econres/files/scf", file_suffix, suffix, ".zip")
    }
    dest_file <- if (suffix == "s") {
      paste0("scfp", year, suffix, ".zip")
    } else {
      paste0("scf", file_suffix, suffix, ".zip")
    }

    # Check if the URL exists and download the file
    if (check_url_existence(zip_url)) {
      download_file(zip_url, dest_file)
    } else {
      print(paste("Data not available for year:", year, " with suffix: ", suffix))
    }
  }
}

setwd(data_directory)

# Function to check if a URL exists

check_url_existence <- function(url) {

response <- HEAD(url)

return(status_code(response) == 200)

}

# Function to download the file

download_file <- function(url, dest_file) {

response <- GET(url, write_disk(dest_file, overwrite = TRUE))

if (status_code(response) == 200) {

print(paste("Downloaded file for year:", dest_file))

} else {

print(paste("Failed to download file for year:", dest_file))

}

# Loop for all years

for (year in seq(min_year, max_year, by = 3)) {

# Handle different main data files and the replicate weight files with specific year formats

for (suffix in c("s", "rw1s")) {

# Different file suffix for replicate weight files before 2001

file_suffix <- ifelse(year < 2001 && suffix == "rw1s", sprintf("%02d", year %% 100), year)

# Determine the correct URL and destination file name

zip_url <- if (suffix == "s") {

paste0("https://www.federalreserve.gov/econres/files/scfp", year, suffix, ".zip")

} else {

paste0("https://www.federalreserve.gov/econres/files/scf", file_suffix, suffix, ".zip")

}

dest_file <- if (suffix == "s") {

paste0("scfp", year, suffix, ".zip")

} else {

paste0("scf", file_suffix, suffix, ".zip")

}

# Check if the URL exists and download the file

if (check_url_existence(zip_url)) {

download_file(zip_url, dest_file)

} else {

print(paste("Data not available for year:", year, " with suffix: ", suffix))

}

Unzip files

The next step is to unzip these files and convert the data files within them from SAS COPY to RDS format. This script will utilize the unzip function to extract the files and then the file.remove function to delete the original zip files after extraction. You’ll need to ensure that the working directory is set to the directory where the zip files are located, which here is the folder whose path is encoded in the object data_directory.

# Set the working directory to the data directory
setwd(data_directory)

# List all zip files in the directory
zip_files <- list.files(pattern = "\\.zip$")

# Unzip each file and then delete the zip file
for (zip_file in zip_files) {
  # Unzip the file
  unzip(zip_file, exdir = data_directory)
  
  # Remove the original zip file
  file.remove(zip_file)
}

# Set the working directory to the data directory

setwd(data_directory)

# List all zip files in the directory

zip_files <- list.files(pattern = "\\.zip$")

# Unzip each file and then delete the zip file

for (zip_file in zip_files) {

# Unzip the file

unzip(zip_file, exdir = data_directory)

# Remove the original zip file

file.remove(zip_file)

}

Convert to RDS format.

The next step is convert these individual data tables, which are encoded in Stata format. I will save the new files with a standardized file naming scheme, so as to facilitate iterative operations on the data in the next step.

## Convert to RDS format.
# Set the working directory to the data directory
setwd(data_directory)

# Loop to convert Stata data files, change column names to lower case, and save in RDS format
for (year in seq(min_year, max_year, by = 3)) {
  # Define the standard naming pattern for data files
  # Special case for 2001
  if (year == 2001) {
    data_file_name <- paste0("scf2001rw1s.dta")
  } else {
    data_file_name <- paste0("rscfp2001.dta")
  }

  # Define naming pattern for replicate weight files with two-digit year format
  rw_file_name <- paste0("p", sprintf("%02d", year %% 100), "_rw1.dta")
  
  # Define naming patter for data files
  data_file_name <- paste0("rscfp", year, ".dta")

  # Define file paths
  data_file <- paste0(data_directory, "/", data_file_name)
  rw_file <- paste0(data_directory, "/", rw_file_name)

  # Load, convert column names to lower case, and save the main data file
  if (file.exists(data_file)) {
    data <- haven::read_dta(data_file)
    colnames(data) <- tolower(colnames(data))
    saveRDS(data, file = paste0(data_directory, "/scf", year, ".rds"))
    file.remove(data_file)  # Delete the original .dta file
  }

  # Load, convert column names to lower case, and save the replicate weights file if it exists
  if (file.exists(rw_file)) {
    rw_data <- haven::read_dta(rw_file)
    colnames(rw_data) <- tolower(colnames(rw_data))
    saveRDS(rw_data, file = paste0(data_directory, "/scf", year, "_rw.rds"))
    file.remove(rw_file)  # Delete the original .dta file
  }

  print(paste("Processed data for year:", year))
}

# Convert the 2001 replicate weight file
rw_file_2001 <- paste0(data_directory, "/scf2001rw1s.dta")
if (file.exists(rw_file_2001)) {
  rw_data_2001 <- haven::read_dta(rw_file_2001)
  colnames(rw_data_2001) <- tolower(colnames(rw_data_2001))
  saveRDS(rw_data_2001, file = paste0(data_directory, "/scf2001_rw.rds"))
  file.remove(rw_file_2001)  # Delete the original .dta file
}

## Convert to RDS format.

# Set the working directory to the data directory

setwd(data_directory)

# Loop to convert Stata data files, change column names to lower case, and save in RDS format

for (year in seq(min_year, max_year, by = 3)) {

# Define the standard naming pattern for data files

# Special case for 2001

if (year == 2001) {

data_file_name <- paste0("scf2001rw1s.dta")

} else {

data_file_name <- paste0("rscfp2001.dta")

}

# Define naming pattern for replicate weight files with two-digit year format

rw_file_name <- paste0("p", sprintf("%02d", year %% 100), "_rw1.dta")

# Define naming patter for data files

data_file_name <- paste0("rscfp", year, ".dta")

# Define file paths

data_file <- paste0(data_directory, "/", data_file_name)

rw_file <- paste0(data_directory, "/", rw_file_name)

# Load, convert column names to lower case, and save the main data file

if (file.exists(data_file)) {

data <- haven::read_dta(data_file)

colnames(data) <- tolower(colnames(data))

saveRDS(data, file = paste0(data_directory, "/scf", year, ".rds"))

file.remove(data_file) # Delete the original .dta file

}

# Load, convert column names to lower case, and save the replicate weights file if it exists

if (file.exists(rw_file)) {

rw_data <- haven::read_dta(rw_file)

colnames(rw_data) <- tolower(colnames(rw_data))

saveRDS(rw_data, file = paste0(data_directory, "/scf", year, "_rw.rds"))

file.remove(rw_file) # Delete the original .dta file

}

print(paste("Processed data for year:", year))

}

# Convert the 2001 replicate weight file

rw_file_2001 <- paste0(data_directory, "/scf2001rw1s.dta")

if (file.exists(rw_file_2001)) {

rw_data_2001 <- haven::read_dta(rw_file_2001)

colnames(rw_data_2001) <- tolower(colnames(rw_data_2001))

saveRDS(rw_data_2001, file = paste0(data_directory, "/scf2001_rw.rds"))

file.remove(rw_file_2001) # Delete the original .dta file

}

Data Cleaning and Diagnostics

The next step is to check the data and ensure that it looks as we would expect from the documentation. First off, I am going to change all the variable names to lower-case because I find it easier to work with.


# Data Cleaning and Diagnostics

# Set the working directory to the data directory
setwd(data_directory)

# Define the years for which data has been downloaded
years <- seq(min_year, max_year, by = 3)

# Load and diagnose the data
for (i in years){
  print(paste("Diagnosing data for year:", i))

  # Load the main data and replicate weights
  main_data_fname <- paste0(data_directory, "/scf", i, ".rds")
  rw_data_fname <- paste0(data_directory, "/scf", i, "_rw.rds")
  
  if(file.exists(main_data_fname) && file.exists(rw_data_fname)) {
    main_data <- readRDS(main_data_fname)
    rw_data <- readRDS(rw_data_fname)

    # Check for row consistency between main data and replicate weights
    stopifnot(nrow(main_data) == 5 * nrow(rw_data)) # Data should be 5 times replicate weights due to five replicates

    # Renaming identifiers in the dataset for consistency (if applicable)
    if (i == 1989) {
      names(main_data)[names(main_data) == 'x1'] <- 'y1'
      names(main_data)[names(main_data) == 'xx1'] <- 'yy1'
      names(rw_data)[names(rw_data) == 'x1'] <- 'y1'
      names(rw_data)[names(rw_data) == 'xx1'] <- 'yy1'
    }
    
    # Save the cleaned data back to RDS format
    saveRDS(main_data, file = main_data_fname)
    saveRDS(rw_data, file = rw_data_fname)

    # Remove loaded data from memory
    rm(main_data, rw_data)
  } else {
    print(paste("Data files missing for year:", i))
  }
}


# Cleaning Set
for (i in years){
  temp_file1 <- paste0("scf", i, ".rds")
  temp_file2 <- paste0("scf", i, "_rw.rds")
  
  temp_dat <- readRDS(temp_file1)
  temp_rw <- readRDS(temp_file2)
  
  for (col in names(temp_rw)) {
    # Replace NA with 0 in each column
    temp_rw[[col]][is.na(temp_rw[[col]])] <- 0
  }
  
  #Confirming that sets don't overlap except identifiers
  stopifnot(all.equal(sort(intersect(names(temp_dat), names(temp_rw))), c('y1', 'yy1')))
  
  # Clean out unused identifier
  temp_rw$y1 <- NULL
  
  #To work with imputatinon recombine
  gc()
  
  #Demarcate imputations
  scf.1 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 1 , ]
  scf.2 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 2 , ]
  scf.3 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 3 , ]
  scf.4 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 4 , ]
  scf.5 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 5 , ]
  m.rows <- nrow(temp_dat)
  rm(temp_dat)
  gc()
  
  # Merge Replicate Weights
  imp1 <- merge(scf.1, temp_rw, by = "yy1")
  imp2 <- merge(scf.2, temp_rw, by = "yy1")
  imp3 <- merge(scf.3, temp_rw, by = "yy1")
  imp4 <- merge(scf.4, temp_rw, by = "yy1")
  imp5 <- merge(scf.5, temp_rw, by = "yy1")
  
  rm(scf.1, scf.2, scf.3, scf.4, scf.5)
  
  # confirm that the number of records did not change
  stopifnot(
    sum( nrow( imp1 ) , nrow( imp2 ) , nrow( imp3 ) , nrow( imp4 ) , nrow( imp5 ) ) == m.rows
  )
  
  imp1$imp <- 1
  imp2$imp <- 2
  imp3$imp <- 3
  imp4$imp <- 4
  imp5$imp <- 5
  data <- rbind(imp1, imp2, imp3, imp4, imp5)
  
  saveRDS(data, file = paste0("scf", i, ".rds"))
  file.remove(paste0("scf", i, "_rw.rds"))
}

100

# Data Cleaning and Diagnostics

# Set the working directory to the data directory

setwd(data_directory)

# Define the years for which data has been downloaded

years <- seq(min_year, max_year, by = 3)

# Load and diagnose the data

for (i in years){

print(paste("Diagnosing data for year:", i))

# Load the main data and replicate weights

main_data_fname <- paste0(data_directory, "/scf", i, ".rds")

rw_data_fname <- paste0(data_directory, "/scf", i, "_rw.rds")

if(file.exists(main_data_fname) && file.exists(rw_data_fname)) {

main_data <- readRDS(main_data_fname)

rw_data <- readRDS(rw_data_fname)

# Check for row consistency between main data and replicate weights

stopifnot(nrow(main_data) == 5 * nrow(rw_data)) # Data should be 5 times replicate weights due to five replicates

# Renaming identifiers in the dataset for consistency (if applicable)

if (i == 1989) {

names(main_data)[names(main_data) == 'x1'] <- 'y1'

names(main_data)[names(main_data) == 'xx1'] <- 'yy1'

names(rw_data)[names(rw_data) == 'x1'] <- 'y1'

names(rw_data)[names(rw_data) == 'xx1'] <- 'yy1'

}

# Save the cleaned data back to RDS format

saveRDS(main_data, file = main_data_fname)

saveRDS(rw_data, file = rw_data_fname)

# Remove loaded data from memory

rm(main_data, rw_data)

} else {

print(paste("Data files missing for year:", i))

}

# Cleaning Set

for (i in years){

temp_file1 <- paste0("scf", i, ".rds")

temp_file2 <- paste0("scf", i, "_rw.rds")

temp_dat <- readRDS(temp_file1)

temp_rw <- readRDS(temp_file2)

for (col in names(temp_rw)) {

# Replace NA with 0 in each column

temp_rw[[col]][is.na(temp_rw[[col]])] <- 0

}

#Confirming that sets don't overlap except identifiers

stopifnot(all.equal(sort(intersect(names(temp_dat), names(temp_rw))), c('y1', 'yy1')))

# Clean out unused identifier

temp_rw$y1 <- NULL

#To work with imputatinon recombine

gc()

#Demarcate imputations

scf.1 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 1 , ]

scf.2 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 2 , ]

scf.3 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 3 , ]

scf.4 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 4 , ]

scf.5 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 5 , ]

m.rows <- nrow(temp_dat)

rm(temp_dat)

gc()

# Merge Replicate Weights

imp1 <- merge(scf.1, temp_rw, by = "yy1")

imp2 <- merge(scf.2, temp_rw, by = "yy1")

imp3 <- merge(scf.3, temp_rw, by = "yy1")

imp4 <- merge(scf.4, temp_rw, by = "yy1")

imp5 <- merge(scf.5, temp_rw, by = "yy1")

rm(scf.1, scf.2, scf.3, scf.4, scf.5)

# confirm that the number of records did not change

stopifnot(

sum( nrow( imp1 ) , nrow( imp2 ) , nrow( imp3 ) , nrow( imp4 ) , nrow( imp5 ) ) == m.rows

)

imp1$imp <- 1

imp2$imp <- 2

imp3$imp <- 3

imp4$imp <- 4

imp5$imp <- 5

data <- rbind(imp1, imp2, imp3, imp4, imp5)

saveRDS(data, file = paste0("scf", i, ".rds"))

file.remove(paste0("scf", i, "_rw.rds"))

}

The following code cleans up the data and replicate weights table. Adapted from Anthony Damico‘s scripts with some modifications:

<em># Cleaning Set</em>
<strong>for</strong> (i <strong>in</strong> years){
  temp_file1 <- paste0("scf", i, ".rds")
  temp_file2 <- paste0("scf", i, "_rw.rds")
  
  temp_dat <- readRDS(temp_file1)
  temp_rw <- readRDS(temp_file2)
  
for (col in names(temp_rw)) {
  # Replace NA with 0 in each column
  temp_rw[[col]][is.na(temp_rw[[col]])] <- 0
  }

  <em>#Confirming that sets don't overlap except identifiers</em>
  stopifnot(all.equal(sort(intersect(names(temp_dat), names(temp_rw))), c('y1', 'yy1')))
  
  <em># Clean out unused identifier</em>
  temp_rw$y1 <- NULL
  
  <em>#To work with imputatinon recombine</em>
  gc()
  
  <em>#Demarcate imputations</em>
  scf.1 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 1 , ]
  scf.2 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 2 , ]
  scf.3 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 3 , ]
  scf.4 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 4 , ]
  scf.5 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 5 , ]
  m.rows <- nrow(temp_dat)
  rm(temp_dat)
  gc()
  
  <em># Merge with Replicate weights</em>
  imp1 <- merge(scf.1, temp_rw)
  imp2 <- merge(scf.2, temp_rw)
  imp3 <- merge(scf.3, temp_rw)
  imp4 <- merge(scf.4, temp_rw)
  imp5 <- merge(scf.5, temp_rw)
  gc()
  
  rm(scf.1, scf.2, scf.3, scf.4, scf.5)
  gc()
  
  <em># confirm that the number of records did not change</em>
  stopifnot(
    sum( nrow( imp1 ) , nrow( imp2 ) , nrow( imp3 ) , nrow( imp4 ) , nrow( imp5 ) ) == m.rows
  )
  gc()
  
  imp1$imp <- 1
  imp2$imp <- 2
  imp3$imp <- 3
  imp4$imp <- 4
  imp5$imp <- 5
  data <- rbind(imp1, imp2, imp3, imp4, imp5)
  
  saveRDS(data, file = paste0("scf", i, ".rds"))
}

# Cleaning Set

for (i in years){

temp_file1 <- paste0("scf", i, ".rds")

temp_file2 <- paste0("scf", i, "_rw.rds")

temp_dat <- readRDS(temp_file1)

temp_rw <- readRDS(temp_file2)

for (col in names(temp_rw)) {

# Replace NA with 0 in each column

temp_rw[[col]][is.na(temp_rw[[col]])] <- 0

}

#Confirming that sets don't overlap except identifiers

stopifnot(all.equal(sort(intersect(names(temp_dat), names(temp_rw))), c('y1', 'yy1')))

# Clean out unused identifier

temp_rw$y1 <- NULL

#To work with imputatinon recombine

gc()

#Demarcate imputations

scf.1 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 1 , ]

scf.2 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 2 , ]

scf.3 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 3 , ]

scf.4 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 4 , ]

scf.5 <- temp_dat[ substr( temp_dat$y1 , nchar( temp_dat$y1 ) , nchar( temp_dat$y1 ) ) == 5 , ]

m.rows <- nrow(temp_dat)

rm(temp_dat)

gc()

# Merge with Replicate weights

imp1 <- merge(scf.1, temp_rw)

imp2 <- merge(scf.2, temp_rw)

imp3 <- merge(scf.3, temp_rw)

imp4 <- merge(scf.4, temp_rw)

imp5 <- merge(scf.5, temp_rw)

gc()

rm(scf.1, scf.2, scf.3, scf.4, scf.5)

gc()

# confirm that the number of records did not change

stopifnot(

sum( nrow( imp1 ) , nrow( imp2 ) , nrow( imp3 ) , nrow( imp4 ) , nrow( imp5 ) ) == m.rows

)

gc()

imp1$imp <- 1

imp2$imp <- 2

imp3$imp <- 3

imp4$imp <- 4

imp5$imp <- 5

data <- rbind(imp1, imp2, imp3, imp4, imp5)

saveRDS(data, file = paste0("scf", i, ".rds"))

}

Rescale Weights

Next, we adjust the replicate weights. The replicate weights file has missing values, and the documentation states that observations are intended to be left out of the analysis when these values are missing. As such, we set missing replicate weights to zero. The weights are then scaled by a factor that accounts for the number of times each case appears in the sample replicates. This procedure is known as “replicate weight adjustment”.

# Set the working directory to the data directory
setwd(data_directory)

# Loop through each year's combined data file to adjust replicate weights
for (year in years) {
  # File name for the combined data and replicate weights
  temp_name <- paste0("scf", year, ".rds")

  # Check if the combined file exists
  if (file.exists(temp_name)) {
    # Load the combined data
    temp_dat <- readRDS(temp_name)
    
         
    # Adjust the replicate weights
    for (i in 1:999) {
      weight_col <- paste0("wt1b", i)
      multiplicity_col <- paste0("mm", i)
      if (weight_col %in% names(temp_dat) && multiplicity_col %in% names(temp_dat)) {
        temp_dat[[weight_col]] <- temp_dat[[weight_col]] * temp_dat[[multiplicity_col]]
        }
    }

    # Save the temp_name data back to RDS format
    saveRDS(temp_dat, file = temp_name)

    # Optional: print a confirmation message
    print(paste("Adjusted replicate weights for year:", year))
  } else {
    print(paste("Combined data file not found for year:", year))
  }
}

# Set the working directory to the data directory

setwd(data_directory)

# Loop through each year's combined data file to adjust replicate weights

for (year in years) {

# File name for the combined data and replicate weights

temp_name <- paste0("scf", year, ".rds")

# Check if the combined file exists

if (file.exists(temp_name)) {

# Load the combined data

temp_dat <- readRDS(temp_name)

# Adjust the replicate weights

for (i in 1:999) {

weight_col <- paste0("wt1b", i)

multiplicity_col <- paste0("mm", i)

if (weight_col %in% names(temp_dat) && multiplicity_col %in% names(temp_dat)) {

temp_dat[[weight_col]] <- temp_dat[[weight_col]] * temp_dat[[multiplicity_col]]

}

# Save the temp_name data back to RDS format

saveRDS(temp_dat, file = temp_name)

# Optional: print a confirmation message

print(paste("Adjusted replicate weights for year:", year))

} else {

print(paste("Combined data file not found for year:", year))

}

Joseph Nathan Cohen

Department of Sociology, CUNY Queens College, New York, NY

Download All Years of the SCF Locally

Setting Up Your Session

Download the Data

Unzip files

Convert to RDS format.

Data Cleaning and Diagnostics

Rescale Weights

Related

Leave a Reply Cancel reply

Tags

Latest Stuff

Creating Podcast Episode Pages

Fast Transcription with Adobe Premiere

Notes: Analyzing Complex Survey Data

Assignment: Political Messaging Strategy

Notes: Basic Linear Regression

Contact

About this Site