GitHub - robertovacante/US-Immigration-Analysis: A Python project that uses web scraping techniques to collect U.S. immigration decisions from TRAC data and applies simple data analysis and visualization to reveal key trends and insights.

US-Immigration-Analysis

A Python project that uses web scraping techniques to collect U.S. immigration decisions from TRAC data and applies simple data analysis and visualization to reveal key trends and insights.

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

url = "https://trac.syr.edu/immigration/reports/judgereports/"

president_party_mapping = {
    (2021, 2024): ("Joe Biden", "Democrat"),
    (2017, 2020): ("Donald Trump", "Republican"),
    (2009, 2016): ("Barack Obama", "Democrat"),
    (2001, 2008): ("George W. Bush", "Republican"),
    (1993, 2000): ("Bill Clinton", "Democrat"),
    (1989, 1992): ("George H. W. Bush", "Republican"),
    (1981, 1988): ("Ronald Reagan", "Republican"),
    (1977, 1980): ("Jimmy Carter", "Democrat"),
    (1974, 1976): ("Gerald Ford", "Republican"),
    (1969, 1974): ("Richard Nixon", "Republican"),
    (1963, 1968): ("Lyndon B. Johnson", "Democrat"),
    (1961, 1963): ("John F. Kennedy", "Democrat"),
}

def get_president_and_party(year):
    year = int(year)
    for years, (president, party) in president_party_mapping.items():
        if years[0] <= year <= years[1]:
            return president, party
    return None, None

response = requests.get(url)
response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('table')

data = []
current_court = ""
rowspan_counter = 0

for row in table.find_all('tr'):
    cols = row.find_all('td')
    
    if len(cols) > 0:
        if 'rowspan' in cols[0].attrs:
            current_court = cols[0].get_text(strip=True)
            rowspan_counter = int(cols[0]['rowspan'])
            
            if len(cols) >= 6:
                judge = cols[1].get_text(strip=True)
                judge_link = cols[1].find('a')['href'] if cols[1].find('a') else None
                total_decisions = cols[2].get_text(strip=True)
                percent_granted_asylum = cols[3].get_text(strip=True)
                percent_granted_other = cols[4].get_text(strip=True)
                percent_denied = cols[5].get_text(strip=True)
            else:
                continue
        else:
            if len(cols) >= 5:
                judge = cols[0].get_text(strip=True)
                judge_link = cols[0].find('a')['href'] if cols[0].find('a') else None
                total_decisions = cols[1].get_text(strip=True)
                percent_granted_asylum = cols[2].get_text(strip=True)
                percent_granted_other = cols[3].get_text(strip=True)
                percent_denied = cols[4].get_text(strip=True)
            else:
                continue
        
        rowspan_counter -= 1
        
        appointment_year = None
        juris_doctor_year = None
        
        if judge_link:
            judge_page_url = url + judge_link
            judge_page = requests.get(judge_page_url)
            judge_soup = BeautifulSoup(judge_page.text, 'html.parser')
            
            bio_paragraph = judge_soup.select_one("div div div p:nth-of-type(2)")
            if bio_paragraph:
                year_match = re.findall(r'\b(\d{4})\b', bio_paragraph.get_text())
                if year_match:
                    appointment_year = year_match[0]
            
            juris_match = re.search(r'Juris.*?(\d{4})', judge_soup.get_text(), re.IGNORECASE)
            if juris_match:
                juris_doctor_year = juris_match.group(1)
        
        president, party = get_president_and_party(appointment_year) if appointment_year else (None, None)
        democrat_appointer = 1 if party == "Democrat" else 0
        
        data.append([current_court, judge, total_decisions, percent_granted_asylum, 
                     percent_granted_other, percent_denied, appointment_year, 
                     juris_doctor_year, president, party, democrat_appointer])

columns = ["Immigration Court", "Judge", "Total Decisions", "% Granted Asylum", 
           "% Granted Other Relief", "% Denied", "Appointment Date", "Juris Doctor Year",
           "Appointing President", "Party", "Democrat Appointer"]
df = pd.DataFrame(data, columns=columns)

df.to_csv('immigration_judges.csv', index=False)
print("Data saved to 'immigration_judges.csv'")

import pandas as pd
import statsmodels.api as sm
df = pd.read_csv('immigration_judges.csv')

Simple Data Analysis (some possible models)

## 1. Impact of Judicial Factors on Denial Rate

X = df[["% Granted Asylum", "% Granted Other Relief", "Democrat Appointer",
        "Juris Doctor Year", "Appointing President"]]
X = pd.get_dummies(X, columns=["Democrat Appointer", "Appointing President", ], drop_first=True)
y = pd.to_numeric(df["% Denied"], errors='coerce')

## 2. Effect of Appointment Factors on Asylum Grant Rates

X = df[["Total Decisions", "% Granted Other Relief", "Juris Doctor Year", "Appointing President", "Party", "Democrat Appointer"]]
X = pd.get_dummies(X, columns=["Democrat Appointer", "Appointing President", "Party"], drop_first=True)
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["% Granted Asylum"], errors='coerce')

## 3. Impact of Judicial and Appointment Factors on Granting Other Relief

X = df[["Total Decisions", "% Granted Asylum", "% Denied", "Juris Doctor Year", "Appointing President", "Party"]]
X = pd.get_dummies(X, columns=["Appointing President", "Party"], drop_first=True)
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["% Granted Other Relief"], errors='coerce')

X = df[["Total Decisions", "% Granted Asylum", "% Granted Other Relief", "Juris Doctor Year", "Democrat Appointer"]]
X = pd.get_dummies(X, columns=["Democrat Appointer"], drop_first=True)
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["Total Decisions"], errors='coerce')

X = X.apply(pd.to_numeric, errors='coerce')
X = X.dropna()
y = y.loc[X.index]  # Align y with the cleaned X
X = X.astype(int)
X = sm.add_constant(X)

model = sm.OLS(y, X).fit()
print(model.summary())

Name		Name	Last commit message	Last commit date
Latest commit History 3 Commits
Immigration_Judges.ipynb		Immigration_Judges.ipynb
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

US-Immigration-Analysis

Simple Data Analysis (some possible models)

About

Releases

Packages

Languages

robertovacante/US-Immigration-Analysis

Folders and files

Latest commit

History

Repository files navigation

US-Immigration-Analysis

Simple Data Analysis (some possible models)

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages