Project #6: Data Science Web App with Streamlit and Python

About the Instructor:  Snehan Kekre is a Developer Advocate at Snowflake, where he specializes in the Streamlit open-source Python library. In the past, he has worked for Streamlit (pre-acquisition) as a Developer Advocate , and has authored and taught over 40+ guided projects on machine learning and data science at Coursera. He has also worked as a skills consultant at Coursera, and as content strategist at

Skilled Learned / Improved: Web Development, Data Science Visualisation, Streamlit Library, Python Programming

Project Highlights & Modules

Date of Project Completion: 5th July 2023

Project Images

Code & Project Resources

Dataset Provided:

Library Requirements : numpy==1.16.4, pandas==0.24.2, pydeck==0.3.0, streamlit==0.57.3, plotly==4.0.0

My Final Code: 

import streamlit as st

import pandas as pd

import numpy as np

import pydeck as pdk

import as px

st.title("Data Science Web #1")

st.markdown("## Motor Vehicle Collisions in New York City")

st.markdown("#### Made by Arjun Raghunandanan following the instructor using Python & Streamlit for Coursera Project Network")

# Modify This URL according to where you have stored your csv file or from where you are fetching the csv file

#option 1 : fetch code from online
#DATA_URL = ""

#option 2 : download csv file and load it offline (I used this method)

DATA_URL = ("/home/rhyme/Desktop/Project/Motor_Vehicle_Collisions_-_Crashes.csv")

# Function to load the data

@st.cache(persist=True)  # Cache the data to avoid reloading on each run

def load_data(nrows):

    data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']])

    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)

    lowercase = lambda x: str(x).lower()

    data.rename(lowercase, axis='columns', inplace=True)

    data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True)

    return data

# Load the data

data = load_data(100000)

original_data = data.copy()  # Create a copy of the original data for later use

# Display header and slider for selecting the number of injured people

st.header("Where are the most people injured in NYC?")

injured_people = st.slider("Number of People Injured in Vehicle Collisions", 0, 19)"injured_persons >= @injured_people")[["latitude", "longitude"]].dropna(how="any"))

# Display header and slider for selecting the hour

st.header("How many collisions occur during a given time of day?")

hour = st.slider("Hour to look at", 0, 23)

filtered_data = data[data['date/time'].dt.hour == hour]

st.markdown("Vehicle Collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24))

midpoint = np.average(filtered_data['latitude']), np.average(filtered_data['longitude'])




        "latitude": midpoint[0],

        "longitude": midpoint[1],

        "zoom": 11,

        "pitch": 50,





            data=filtered_data[['date/time', 'latitude', 'longitude']],

            get_position=['longitude', 'latitude'],



            elevation_range=[0, 1000],






# Display subheader and histogram chart for breakdown by minute

st.subheader("Breakdown by minute between %i:00 and %i:00" % (hour, (hour + 1) % 24))

filtered = data[

    (data['date/time'].dt.hour >= hour) & (data['date/time'].dt.hour < (hour + 1))


hist = np.histogram(filtered['date/time'].dt.minute, bins=60, range=(0, 60))[0]

chart_data = pd.DataFrame({'minute': range(60), 'crashes': hist})

fig =, x='minute', y='crashes', hover_data=['minute', 'crashes'], height=400)


# Display header and selectbox for selecting the affected type

st.header("Top 5 Dangerous Streets by Affected Type")

select = st.selectbox('Affected Type of People', ['Pedestrians', 'Cyclists', 'Motorists'])

if select == 'Pedestrians':

    st.write(original_data.query("injured_pedestrians >= 1")[["on_street_name", "injured_pedestrians"]]

             .sort_values(by=['injured_pedestrians'], ascending=False).dropna(how='any')[:5])

elif select == 'Cyclists':

    st.write(original_data.query("injured_cyclists >= 1")[["on_street_name", "injured_cyclists"]]

             .sort_values(by=['injured_cyclists'], ascending=False).dropna(how='any')[:5])


    st.write(original_data.query("injured_motorists >= 1")[["on_street_name", "injured_motorists"]]

             .sort_values(by=['injured_motorists'], ascending=False).dropna(how='any')[:5])

# Checkbox to display raw data

if st.checkbox("Show Raw Data", False):

    st.subheader('Raw Data')
