Skip to content

Commit 5119448

Browse files
authored
Add files via upload
1 parent 8059ea7 commit 5119448

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed

Load_Public_Chauffeurs.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#Name: Vineet Dcunha
2+
#"I have not given or received any unauthorized assistance on this assignment."
3+
4+
import csv
5+
import sqlite3
6+
import os
7+
import ast
8+
import datetime as dt
9+
10+
conn = sqlite3.connect('dsc450.db')
11+
c = conn.cursor()
12+
13+
createdrivertbl = """
14+
CREATE TABLE IF NOT EXISTS DRIVER_DTL (
15+
LICENSE_NUMBER NUMBER(20),
16+
RENEWED VARCHAR2(10),
17+
STATUS VARCHAR2(20),
18+
STATUS_DATE DATE,
19+
DRIVER_TYPE VARCHAR2(20),
20+
LICENSE_TYPE VARCHAR2(20),
21+
ORIGINAL_ISSUE_DATE DATE,
22+
NAME VARCHAR2(100),
23+
SEX VARCHAR2(8),
24+
CHAUFFEUR_CITY VARCHAR2(25),
25+
CHAUFFEUR_STATE VARCHAR2(5),
26+
RECORD_NUMBER VARCHAR2(20)
27+
);
28+
"""
29+
30+
c.execute('DROP TABLE IF EXISTS DRIVER_DTL;')
31+
c.execute(createdrivertbl) # create the DRIVER_DTL table
32+
33+
os.chdir("C:/Users/USER/Desktop/DSC/DSC_450 Database For Analytics/Assignment/Assignment_5")
34+
35+
fd = open('Public_Chauffeurs_Short_hw3.csv', 'r') # Read csv file
36+
reader = csv.reader(fd)
37+
next(reader) # skip header
38+
data = list()
39+
for row in reader:
40+
data.append(row) # loop thru the csv file and add data to the list
41+
cleandata = list()
42+
for i in data: # loop thru the list to clean and format the data
43+
col2 = i[1]
44+
if i[1] is None or i[1] == "" or 'null' in i[1].lower(): # check for null and blank values
45+
col2 = None
46+
else:
47+
col2 = col2.replace('-','/') # replace '-' with '/' to have a consistent format
48+
col4 = i[3]
49+
if i[3] is None or i[3] == "" or 'null' in i[3].lower(): # check for null and blank values
50+
col4 = 'None'
51+
else:
52+
col4 = col4.replace('/','-') # replace '-' with '/' to have a consistent format
53+
col4 = dt.datetime.strptime(str(col4),"%m-%d-%Y").date() # cast to date type
54+
col6 = i[5]
55+
if i[5] is None or i[5] == "" or 'null' in i[5].lower():
56+
col6 = 'None'
57+
col7 = i[6]
58+
if i[6] is None or i[6] == "" or 'null' in i[6].lower():
59+
col7 = 'None'
60+
else:
61+
col7 = col7.replace('/','-') # replace '-' with '/' to have a consistent format
62+
col7 = dt.datetime.strptime(str(col7),"%m-%d-%Y").date() # cast to date type
63+
col1,col3,col5,col8,col9,col10,col11,col12 = i[0],i[2],i[4],i[7],i[8],i[9],i[10],i[11]
64+
cleandata.append([col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12])# combine all columns together and insert in a list
65+
c.executemany('INSERT INTO DRIVER_DTL VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )', cleandata) # insert into table
66+
67+
result = c.execute('SELECT COUNT(1) FROM DRIVER_DTL') # count number of records
68+
r = result.fetchall()
69+
print('Total number of records:',r[0][0])
70+
71+
result2 = c.execute('SELECT COUNT(DISTINCT LICENSE_NUMBER) FROM DRIVER_DTL') # count number of distinct LICENSE_NUMBER
72+
r2 = result2.fetchall()
73+
print('Total distinct number of LICENSE_NUMBER:',r2[0][0])
74+
75+
result3 = c.execute('SELECT COUNT(1) FROM DRIVER_DTL WHERE RENEWED IS NULL') # count number of distinct LICENSE_NUMBER
76+
r3 = result3.fetchall()
77+
print('Total number of NULL RENEWED record:',r3[0][0])
78+
79+
80+
fd.close()
81+
82+
conn.commit() # finalize inserted data
83+
conn.close() # close the connection

Load_Tweet_Data.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#Name: Vineet Dcunha
2+
#"I have not given or received any unauthorized assistance on this assignment."
3+
4+
import csv
5+
import sqlite3
6+
import os
7+
import ast
8+
import datetime as dt
9+
import json
10+
11+
conn = sqlite3.connect('dsc450.db')
12+
c = conn.cursor()
13+
c.row_factory = sqlite3.Row
14+
createtweettbl = """
15+
CREATE TABLE IF NOT EXISTS TWEET (
16+
CREATED_AT DATE,
17+
ID_STR VARCHAR2(20),
18+
TEXT VARCHAR2(200),
19+
SOURCE VARCHAR2(100),
20+
IN_REPLY_TO_USER_ID NUMBER(30),
21+
IN_REPLY_TO_SCREEN_NAME VARCHAR2(50),
22+
IN_REPLY_TO_STATUS_ID NUMBER(30),
23+
RETWEET_COUNT NUMBER(30),
24+
CONTRIBUTORS VARCHAR2(50)
25+
);
26+
"""
27+
c.execute('DROP TABLE IF EXISTS TWEET;')
28+
c.execute(createtweettbl) # create the DRIVER_DTL table
29+
30+
os.chdir("C:/Users/USER/Desktop/DSC/DSC_450 Database For Analytics/Assignment/Assignment_5")
31+
data ={}
32+
fd = open('Assignment4.txt', 'r',encoding="utf8").read() # Read txt file
33+
#data = fd.read().split('EndOfTweet')
34+
35+
data = [json.loads(str(item)) for item in fd.strip().split('EndOfTweet')]
36+
cleandata = []
37+
#print(data)
38+
39+
for sub in data:
40+
col1 = sub['created_at']
41+
if sub['created_at'] is None or sub['created_at'] == "" or 'null' in sub['created_at'].lower(): # check for null and blank values
42+
col1 = None
43+
col2 = sub['id_str']
44+
if sub['id_str'] is None or sub['id_str'] == "" or 'null' in sub['id_str'].lower(): # check for null and blank values
45+
col2 = None
46+
col3 = sub['text']
47+
if sub['text'] is None or sub['text'] == "" or 'null' in sub['text'].lower(): # check for null and blank values
48+
col3 = None
49+
col4 = sub['source']
50+
if sub['source'] is None or sub['source'] == "" or 'null' in sub['source'].lower(): # check for null and blank values
51+
col4 = None
52+
col5 = sub['in_reply_to_user_id']
53+
if sub['in_reply_to_user_id'] is None or sub['in_reply_to_user_id'] == "": # check for null and blank values
54+
col5 = None
55+
col6 = sub['in_reply_to_screen_name']
56+
if sub['in_reply_to_screen_name'] is None or sub['in_reply_to_screen_name'] == "": # check for null and blank values
57+
col6 = None
58+
col7 = sub['in_reply_to_status_id']
59+
if sub['in_reply_to_status_id'] is None or sub['in_reply_to_status_id'] == "": # check for null and blank values
60+
col7 = None
61+
col8 = sub['retweet_count']
62+
if sub['retweet_count'] is None or sub['retweet_count'] == "": # check for null and blank values
63+
col8 = None
64+
col9 = sub['contributors']
65+
if sub['contributors'] is None or sub['contributors'] == "" or 'null' in sub['contributors'].lower(): # check for null and blank values
66+
col9 = None
67+
cleandata.append([col1,col2,col3,col4,col5,col6,col7,col8,col9])# combine all columns together and insert in a list
68+
69+
c.executemany('INSERT INTO TWEET VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', cleandata)
70+
71+
result = c.execute('SELECT COUNT(1) FROM TWEET') # count number of records
72+
r = result.fetchall()
73+
print('Total number of records:',r[0][0])
74+
75+
result = c.execute('SELECT * FROM TWEET WHERE ID_STR in (397513618134024192,397513618100461568)') # count number of records
76+
r = result.fetchall()
77+
for i in r:
78+
print(*i, sep='\t')
79+
80+
conn.commit() # finalize inserted data
81+
conn.close() # close the connection

0 commit comments

Comments
 (0)