Indexing and slicing a pd.Series

Select one row.

Give loc an item from the index. In this Series, each item from the index is a two-letter string.

Give iloc the same integer (non-negative or negative) that you would use to index a Python list.

Since a two-letter string is a valid Python identifier (variable name), you can abbreviate series["CA"] to series.CA with the attribute reference dot.

import sys
import pandas as pd

data = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California"
}

series = pd.Series(data = data, name = "state")
series.index.name = "abbr"

print(f'{series.loc["CA"] = }')
print(f'{series.iloc[4]   = }')
print(f'{series.iloc[-1]  = }')

print(f'{series["CA"]     = }') #Same as series.loc["CA"], because "CA" is in the index.
print(f'{series.CA        = }') #abbreviation for the above statement.

print(f'{series[4]        = }') #Same as series.iloc[4],  because  4 is not in the index.
print(f'{series[-1]       = }') #Same as series.iloc[-1], because -1 is not in the index.
print()

series["CA"] = "Cali-for-ni-a"
print(series)

sys.exit(0)

series.loc["CA"] = 'California'
series.iloc[4]   = 'California'
series.iloc[-1]  = 'California'
series["CA"]     = 'California'
series.CA        = 'California'
series[4]        = 'California'
series[-1]       = 'California'

abbr
AL          Alabama
AK           Alaska
AZ          Arizona
AR         Arkansas
CA    Cali-for-ni-a
Name: state, dtype: object

An index of integers

import sys
import numpy as np
import pandas as pd

data = [
    "Alabama",
    "Alaska",
    "Arizona",
    "Arkansas",
    "California"
]

index = pd.RangeIndex(1, len(data) + 1, name = "#")
series = pd.Series(data = data, index = index, name = "state")
print(series)
print()

print(f"{series.dtype.name = }")
print(f"{series.index.dtype.name = }")
print()

print(f"{series.loc[5]   = }")
print(f"{series.iloc[4]  = }")
print(f"{series.iloc[-1] = }")

print(f'{series[5]       = }') #Same as series.loc[5], because 5 is in the index.

#Would raise a KeyError exception because the index is an index of integers:
#print(f'{series.loc[-1]  = }')
#print(f'{series[-1]      = }')

sys.exit(0)

#
1       Alabama
2        Alaska
3       Arizona
4      Arkansas
5    California
Name: state, dtype: object

series.dtype.name = 'object'
series.index.dtype.name = 'int64'

series.loc[5]   = 'California'
series.iloc[4]  = 'California'
series.iloc[-1] = 'California'
series[5]       = 'California'

An index containing duplicates

import sys
import pandas as pd

#Can't store this data into a dict, because there are more than one "LOL".

data = [
    ["IMHO", "in my humble opinion"],
    ["LOL",  "laughing out loud"],
    ["LOL",  "lots of luck"],
    ["OMG",  "oh my God"],
    ["ROFL", "rolling on the floor laughing"]
]

index  = pd.Index( data = [datum[0] for datum in data], name = "abbr")
series = pd.Series(data = [datum[1] for datum in data], index = index, name = "Texting Abbreviations")
print(series)
print()

print(f'{series["ROFL"] = }') #series["ROFL"] is a str
print()

print(series["LOL"])          #series["LOL"] is a pd.Series
sys.exit(0)

abbr
IMHO             in my humble opinion
LOL                 laughing out loud
LOL                      lots of luck
OMG                         oh my God
ROFL    rolling on the floor laughing
Name: Texting Abbreviations, dtype: object

series["ROFL"] = 'rolling on the floor laughing'

LOL    laughing out loud
LOL         lots of luck
Name: Texting Abbreviations, dtype: object

A list

Select as many items as you want, in any order.

import sys
import pandas as pd

data = {
    "CT": "Connecticut",
    "NJ": "New Jersey",
    "NY": "New York"
}

series = pd.Series(data = data, name = "state")
series.index.name = "abbr"
print(series)
print()

print(series.loc[["NY", "NJ", "CT"]])
print()

print(series.iloc[[2, 1, 0]])
print()

print(series[["NY", "NJ", "NJ"]])   #same as series.loc[["NY", "NJ", "NJ"]]
print()

sys.exit(0)

abbr
CT    Connecticut
NJ     New Jersey
NY       New York
Name: state, dtype: object

abbr
NY       New York
NJ     New Jersey
CT    Connecticut
Name: state, dtype: object

abbr
NY       New York
NJ     New Jersey
CT    Connecticut
Name: state, dtype: object

abbr
NY       New York
NJ     New Jersey
NJ     New Jersey
Name: state, dtype: object

A slice

import sys
import pandas as pd

data = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California"
}

series = pd.Series(data = data, name = "state")
series.index.name = "abbr"

print('series["AK": "AR"]')         #Warning: the slice is inclusive.  It contains 3 states.
print( series["AK": "AR"])          #could also say print(series.loc["AK": "AR"])
print()

print('series[1:3]')                #Warning: the slice is exclusive.  It contains only 2 states.
print( series[1:3])
print()

print('series[["AK", "AR"]]')       #["AK", "AR"] is a Python list.
print( series[["AK", "AR"]])        #could also say print(series.loc[["AK", "AR"]])
print()

print('series[["AK", "AR", "AZ"]]')
print( series[["AK", "AR", "AZ"]])  #could also say print(series.loc[["AK", "AR", "AZ"]])
print()

print("series.iloc[1:3]")           #Warning: the slice is exclusive.  It contains only 2 states.
print( series.iloc[1:3])
print()

print("series.iloc[[1, 3]]")
print( series.iloc[[1, 3]])
print()

print("series.iloc[[1, 3, 2]]")
print( series.iloc[[1, 3, 2]])
sys.exit(0)

series["AK": "AR"]
abbr
AK      Alaska
AZ     Arizona
AR    Arkansas
Name: state, dtype: object

series[1:3]
abbr
AK     Alaska
AZ    Arizona
Name: state, dtype: object

series[["AK", "AR"]]
abbr
AK      Alaska
AR    Arkansas
Name: state, dtype: object

series[["AK", "AR", "AZ"]]
abbr
AK      Alaska
AR    Arkansas
AZ     Arizona
Name: state, dtype: object

series.iloc[1:3]
abbr
AK     Alaska
AZ    Arizona
Name: state, dtype: object

series.iloc[[1, 3]]
abbr
AK      Alaska
AR    Arkansas
Name: state, dtype: object

series.iloc[[1, 3, 2]]
abbr
AK      Alaska
AR    Arkansas
AZ     Arizona
Name: state, dtype: object

Print the lines in the correct order.

I didn’t bother to specify an index, so the index defaults to 0, 1, 2, 3, 4, 5, 6, 7.

import sys
import pandas as pd

data = [
    "A word that means the world to me.",         #0
    "O means only that she's growing old.",       #1
    "T is for the tears she shed to save me.",    #2
    "E is for her eyes with love-light shining.", #3
    "M is for the million things she gave me.",   #4
    "R means right and right she'll always be.",  #5
    "Put them all together, they spell mother,",  #6
    "H is for her heart of purest gold."          #7
]

series = pd.Series(data = data, name = "poem")

#To unscramble, you will have to write something like the following.
print(series.iloc[[1, 0, 2, 7, 5, 6, 3, 4]])
print()

#Left-justify the lines.
s = series.to_string(index = False)                #s is one big string
lines = [line.lstrip() for line in s.splitlines()] #lines is a list of smaller strings
s = "\n".join(lines)                               #rejoin them back into one big string
print(s)

sys.exit(0)

Put the lines in the correct order with sort_index.

import sys
import pandas as pd

data = [
    "A word that means the world to me.",         #0
    "O means only that she's growing old.",       #1
    "T is for the tears she shed to save me.",    #2
    "E is for her eyes with love-light shining.", #3
    "M is for the million things she gave me.",   #4
    "R means right and right she'll always be.",  #5
    "Put them all together, they spell mother,",  #6
    "H is for her heart of purest gold."          #7
]

series = pd.Series(data = data, name = "poem")
print(series)
print()

data = [7, 1, 2, 4, 0, 5, 6, 3]
index = pd.Index(data = data)
series.index = index   #Give the Series a new index.
print(series)
print()

series.sort_index(inplace = True)   #or series = series.sort_index()
print(series)

sys.exit(0)

0            A word that means the world to me.
1          O means only that she's growing old.
2       T is for the tears she shed to save me.
3    E is for her eyes with love-light shining.
4      M is for the million things she gave me.
5     R means right and right she'll always be.
6     Put them all together, they spell mother,
7            H is for her heart of purest gold.
Name: poem, dtype: object

7            A word that means the world to me.
1          O means only that she's growing old.
2       T is for the tears she shed to save me.
4    E is for her eyes with love-light shining.
0      M is for the million things she gave me.
5     R means right and right she'll always be.
6     Put them all together, they spell mother,
3            H is for her heart of purest gold.
Name: poem, dtype: object

0      M is for the million things she gave me.
1          O means only that she's growing old.
2       T is for the tears she shed to save me.
3            H is for her heart of purest gold.
4    E is for her eyes with love-light shining.
5     R means right and right she'll always be.
6     Put them all together, they spell mother,
7            A word that means the world to me.
Name: poem, dtype: object

Index with a sequence of bools

"Keep only the rows with a correctly spelled index."

import sys
import pandas as pd

data = [
    ["Brooklyn",      "Brad"],
    ["Manhattan",     "Mark"],
    ["Queens",        "Quentin"],
    ["Staten Island", "Stanley"],
    ["Staten Island", "Stanislav"],
    ["Manhattan",     "Mary"],
    ["Broklyn",       "Bill"],   #misspelled
    ["Manhattan",     "Manfred"]
]

valids = [
    "Bronx",
    "Brooklyn",
    "Manhattan",
    "Queens",
    "Staten Island"
]

index  = pd.Index( data = [person[0] for person in data], name = "borough")
series = pd.Series(data = [person[1] for person in data], index = index, name = "People")
print(series)
print()

indexOfInvalids = series.index.difference(valids)
print(f"{indexOfInvalids = }")
print()

series.drop(indexOfInvalids, inplace = True)
print(series)
sys.exit(0)

borough
Brooklyn              Brad
Manhattan             Mark
Queens             Quentin
Staten Island      Stanley
Staten Island    Stanislav
Manhattan             Mary
Broklyn               Bill
Manhattan          Manfred
Name: People, dtype: object

indexOfInvalids = Index(['Broklyn'], dtype='object', name='borough')

borough
Brooklyn              Brad
Manhattan             Mark
Queens             Quentin
Staten Island      Stanley
Staten Island    Stanislav
Manhattan             Mary
Manhattan          Manfred
Name: People, dtype: object