Skip to content

Pandas Cheatsheet

This page maps common pandas operations to equivalent Pivotal patterns. It is aimed at pandas users who already know what they want to do and need the Pivotal code for the same operation.

The examples assume a pandas DataFrame named sales unless the operation loads or combines data.

Load and Inspect

Task pandas Pivotal
Read a CSV
sales = pd.read_csv("sales.csv")
load "sales.csv" as sales
Read with options
sales = pd.read_csv(
    "sales.csv",
    sep=";",
    header=0,
)
load "sales.csv" as sales
    sep ";"
    header 0
Show rows
sales.head()
with sales
    show head
Summary statistics
sales.describe()
with sales
    show summary

Rows and Columns

Task pandas Pivotal
Filter rows
active = sales[sales["status"] == "active"]
with sales as active
    filter status == "active"
Filter with multiple conditions
high_value = sales[
    (sales["status"] == "active")
    & (sales["revenue"] > 1000)
]
with sales as high_value
    filter status == "active"
    filter revenue > 1000
Filter with a list
east_west = sales[
    sales["region"].isin(["East", "West"])
]
with sales as east_west
    filter region in ["East", "West"]
Select columns
report = sales[["order_id", "region", "revenue"]]
with sales as report
    select order_id, region, revenue
Drop columns
clean = sales.drop(
    columns=["debug_flag", "notes"]
)
with sales as clean
    drop debug_flag, notes
Rename columns
renamed = sales.rename(
    columns={
        "qty": "quantity",
        "rev": "revenue",
    }
)
with sales as renamed
    rename qty as quantity, rev as revenue
Sort rows
top = sales.sort_values(
    "revenue",
    ascending=False,
)
with sales as top
    sort revenue desc
Remove duplicate rows
unique_sales = sales.drop_duplicates()
with sales as unique_sales
    distinct
Remove duplicates by selected columns
unique_products = sales.drop_duplicates(
    ["product", "category"]
)
with sales as unique_products
    distinct product, category

Create and Clean Columns

Task pandas Pivotal
Create a column
sales["margin"] = sales["revenue"] - sales["cost"]
with sales
    margin = revenue - cost
Create a conditional column
sales["tier"] = np.where(
    sales["revenue"] > 1000,
    "high",
    "standard",
)
with sales
    tier = "high"
        where revenue > 1000
        else "standard"
Fill missing values
clean = sales.fillna({
    "region": "Unknown",
    "revenue": 0,
})
with sales as clean
    fillna
        region = "Unknown"
        revenue = 0
Drop rows with missing values
complete = sales.dropna(
    subset=["customer_id", "revenue"]
)
with sales as complete
    dropna customer_id, revenue
Cast a column
sales["order_date"] = pd.to_datetime(
    sales["order_date"]
)
with sales
    cast order_date as datetime
Round values
sales["revenue"] = sales["revenue"].round(2)
with sales
    round revenue 2

Aggregation and Reshaping

Task pandas Pivotal
Group and sum
by_region = (
    sales
    .groupby("region", as_index=False)["revenue"]
    .sum()
)
with sales as by_region
    group by region
        agg sum revenue as revenue
Group with several aggregations
summary = (
    sales
    .groupby("region", as_index=False)
    .agg(
        total=("revenue", "sum"),
        avg_order=("revenue", "mean"),
        orders=("order_id", "count"),
    )
)
with sales as summary
    group by region
        agg sum revenue as total
        agg mean revenue as avg_order
        agg count order_id as orders
Aggregate all rows
totals = sales.agg(
    total=("revenue", "sum"),
    avg=("revenue", "mean"),
)
with sales as totals
    agg sum revenue as total, mean revenue as avg
Pivot table
wide = sales.pivot_table(
    index="product",
    columns="region",
    values="revenue",
    aggfunc="sum",
)
with sales as wide
    pivot
        rows product
        cols region
        agg sum revenue
Melt wide data to long
long = monthly_sales.melt(
    id_vars=["region"],
    value_vars=["jan", "feb", "mar"],
    var_name="month",
    value_name="revenue",
)
with monthly_sales as long
    unpivot
        id region
        cols jan, feb, mar
        variable "month"
        value "revenue"

Combine Tables

Task pandas Pivotal
Left join
enriched = sales.merge(
    customers,
    on="customer_id",
    how="left",
)
with sales as enriched
    left merge customers on customer_id
Join with different key names
enriched = sales.merge(
    customers,
    left_on="customer_id",
    right_on="id",
    how="left",
)
with sales as enriched
    left merge customers
        left_on customer_id
        right_on id
Stack rows
all_sales = pd.concat([
    jan_sales,
    feb_sales,
    mar_sales,
])
with jan_sales as all_sales
    concat feb_sales, mar_sales

Window-Style Operations

Task pandas Pivotal
Rank within groups
sales["regional_rank"] = (
    sales.groupby("region")["revenue"]
    .rank(ascending=False)
)
with sales
    rank revenue desc as regional_rank
        by region
Lag a value
sales = sales.sort_values("date")
sales["prev_revenue"] = (
    sales.groupby("region")["revenue"]
    .shift(1)
)
with sales
    lag revenue 1 as prev_revenue
        by region
        order date
Running total
sales = sales.sort_values("date")
sales["running_revenue"] = (
    sales.groupby("region")["revenue"]
    .cumsum()
)
with sales
    cumsum revenue as running_revenue
        by region
        order date
Rolling average
sales = sales.sort_values("date")
sales["rolling_avg"] = (
    sales.groupby("region")["revenue"]
    .rolling(7)
    .mean()
    .reset_index(level=0, drop=True)
)
with sales
    rolling mean revenue 7 as rolling_avg
        by region
        order date

Python Variables and Functions

Use : when a Pivotal statement should read a Python object from the surrounding notebook or script.

Task pandas Pivotal
Use a Python threshold
threshold = 1000
top = sales[sales["revenue"] > threshold]
with sales as top
    filter revenue > :threshold
Use a Python column list
cols = ["order_id", "region", "revenue"]
report = sales[cols]
with sales as report
    select :cols
Apply a Python function to the table
clean = clean_sales(sales)
with sales as clean
    apply :clean_sales

For exact command forms and less common options, see the Syntax Reference.