Dataframe creation
Code Examples
# A simple dataframe with Name, Age, and City columns.
import pandas as pd
df = pd.DataFrame({
"Name": ["Alice", "Bob"],
"Age": [25, 30],
"City": ["New York", "Los Angeles"]
})
print(df)
# List of dictionaries
data = [
{"Name": "Charlie", "Age": 35, "City": "Chicago"},
{"Name": "David", "Age": 40, "City": "Miami"}
]
df = pd.DataFrame(data)
print(df)
# Converts a list of dictionaries to a DataFrame.
df = pd.DataFrame({
"Name": ["Eve", "Frank"],
"Age": [28, 34],
"City": ["Boston", "Seattle"]
}, index=["Row1", "Row2"])
print(df)
# Uses custom labels for rows instead of default numeric index.
Creating DataFrames
df_list = pd.DataFrame([
["Grace", 29],
["Henry", 32]
], columns=["Name", "Age"])
print(df_list)
# Uses separate lists for each row and specifies column labels.
data_dict = {"Name": ["Ivy", "Jack"], "Age": [24, 36]}
df_dict = pd.DataFrame(data_dict)
print(df_dict)
Importing data from CSV files
the transition from file-based storage to in-memory data manipulation efficiently. This method is straightforward and essential for handling large datasets typically stored in CSV format.
df_csv = pd.read_csv("data.csv")
print(df_csv)
Search
import pandas as pd Open in app
# Create a DataFrame
df = pd.DataFrame({
"Name": ["Alice", "Bob", "Charlie"],
"Age": [25, 30, 35],
"City": ["New York", "Los Angeles", "Chicago"]
})
# Use .loc to select by label
first_row = df.loc[0] # Select the first row
specific_cell = df.loc[1, 'Age'] # Access a specific cell
print("First Row:\n", first_row)
print("Specific Cell (Row 1, Age):", specific_cell)
# Use .iloc to select by position
first_column = df.iloc[:, 0] # Select the first column
specific_rows = df.iloc[0:2] # Retrieve the first two rows
print("First Column:\n", first_column)
print("Specific Rows:\n", specific_rows)
# Apply boolean indexing to filter data
adults = df[df['Age'] > 25] # Filter rows where age is greater than 25
los_angeles_residents = df[df['City'] == "Los Angeles"]
print("Adults:\n", adults)
print("Los Angeles Residents:\n", los_angeles_residents)
Modifying DataFrames
# Add a new column 'Height' to the DataFrame
df['Height'] = [165, 180, 175] # Heights corresponding to each person
print("DataFrame with Height:\n", df)
# Remove the 'City' column from the DataFrame
df = df.drop('City', axis=1)
print("DataFrame without City:\n", df)
# Updating specific cell values in the DataFrame
df.loc[0, 'Age'] = 26
df.iloc[2, 1] = 36 # Update Charlie's age using integer position
print("Updated DataFrame:\n", df)
Data Cleaning
Example 1: Filling NaN with Mean
import pandas as pd
import numpy as np
# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, np.nan, 30]}
df = pd.DataFrame(data)
# Fill NaN values in 'Age' with the mean of the column
df['Age'] = df['Age'].fillna(df['Age'].mean())
print(df)
Example 2: Removing Rows with NaN
import pandas as pd
import numpy as np
# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, np.nan, 30],
'City': [np.nan, 'New York', 'Los Angeles']}
df = pd.DataFrame(data)
# Drop rows where any element is NaN
df_clean = df.dropna()
print(df_clean)
Example 3: Filling NaN with Forward Fill Method
import pandas as pd
import numpy as np
# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, np.nan, np.nan],
'City': ['New York', np.nan, 'Los Angeles']}
df = pd.DataFrame(data)
# Forward fill NaN values in the DataFrame
df.ffill(inplace=True)
print(df)
Example 4: Converting String to Integer
import pandas as pd
# Sample DataFrame
data = {'ID': ['101', '102', '103'],
'Amount': ['1000', '1500', '1200']}
df = pd.DataFrame(data)
# Convert 'Amount' from string to integer
df['Amount'] = df['Amount'].astype(int)
print(df.dtypes)
Example 5: Converting Integer to Float
import pandas as pd
# Sample DataFrame
data = {'Product': ['A', 'B', 'C'],
'Price': [200, 150, 300]}
df = pd.DataFrame(data)
# Convert 'Price' from int to float
df['Price'] = df['Price'].astype(float)
print(df.dtypes)
Example 6: Handling Conversion Errors
import pandas as pd
# Sample DataFrame
data = {'Year': ['2020', 'NaN', '2021'],
'Value': ['100', '200.5', '300']}
df = pd.DataFrame(data)
try:
# Attempt to convert 'Year' to int
df['Year'] = df['Year'].astype(int)
except ValueError:
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
print(df)