Give an explicit index to a pd.Series

An index containing consecutive integers

Humans would rather number the days starting at 1, not 0. Call reset_index to revert to the default index.

"Provide an explicit index for a pd.Series.  Give the index a name."

import sys
import pandas as pd

index = [1, 2, 3, 4, 5]               #or index = range(1, 6) or index = np.arange(1, 6)
data = [10.0, 20.0, 30.0, 40.0, 50.0] #or data = np.arange(10.0, 60.0, 10.0)
series = pd.Series(data = data, index = index, name = "temperature")
series.index.name = "day"
print(series)
print()

print(f"{series[1]      = }") #In the brackets you write one of the items in the index.
print(f"{series.loc[1]  = }") #same as previous statement
print(f"{series.iloc[1] = }") #In the brackets you write an integer.
print()

print(f"{series.index = }")
print(f"{type(series.index) = }")
print(f"{series.index.name = }")
print(f"{series.index.dtype.name = }")
print(f"{len(series.index) = }")

sys.exit(0)
day
1    10.0
2    20.0
3    30.0
4    40.0
5    50.0
Name: temperature, dtype: float64

series[1]      = 10.0
series.loc[1]  = 10.0
series.iloc[1] = 20.0

series.index = Int64Index([1, 2, 3, 4, 5], dtype='int64', name='day')
type(series.index) = <class 'pandas.core.indexes.numeric.Int64Index'>
series.index.name = 'day'
series.index.dtype.name = 'int64'
len(series.index) = 5

Create the index as a separate object.

In the above program, change

index = [1, 2, 3, 4, 5]
data = [10.0, 20.0, 30.0, 40.0, 50.0]
series = pd.Series(data = data, index = index, name = "temperature")
series.index.name = "day"
to
#Create the pd.Index, complete with a name.
data = [1, 2, 3, 4, 5]
index = pd.Index(data = data, name = "day")

#Create the pd.Series, and put the pd.Index into it.
data = [10.0, 20.0, 30.0, 40.0, 50.0]
series = pd.Series(data = data, index = index, name = "temperature")
or to
#Make sure the data of the pd.Series and the index of the pd.Series are the same length.

data = [10.0, 20.0, 30.0, 40.0, 50.0]
index = pd.RangeIndex(1, len(data) + 1, name = "day")
series = pd.Series(data = data, index = index, name = "temperature")

Two pd.Series sharing the same pd.Index

You might want to create the index as a separate object if you were planning to put the same index into several pd.Serieses.

"Put the same pd.Index into two pd.Serieses."

import sys
import pandas as pd

index = pd.RangeIndex(1, 6, name = "day")

data = [10.0, 20.0, 30.0, 40.0, 50.0]
temperatureSeries = pd.Series(data = data, index = index, name = "temperature")

data = [11.0, 21.0, 31.0, 41.0, 51.0]
humiditySeries = pd.Series(data = data, index = index, name = "humidity")

print(temperatureSeries)
print()
print(humiditySeries)
print()

sys.exit(0)
day
1    10.0
2    20.0
3    30.0
4    40.0
5    50.0
Name: temperature, dtype: float64

day
1    11.0
2    21.0
3    31.0
4    41.0
5    51.0
Name: humidity, dtype: float64

Now that we have two pd.Serieses sharing the same pd.Index, we can put the two pd.Serieses side by side into a single pd.DataFrame. The axis = 1 means “side by side”. If you change it to axis = 0, do you still get a pd.DataFrame or do you get a pd.Series?

df = pd.concat([temperatureSeries, humiditySeries], axis = 1) #Create a pd.DataFrame.
print(df)
     temperature  humidity
day                       
1           10.0      11.0
2           20.0      21.0
3           30.0      31.0
4           40.0      41.0
5           50.0      51.0

An index containing non-consecutive integers

"Create an index containing non-consecutive integers."

import sys
import pandas as pd

data = [
    242,
    238,
    231,
    225,
    215,
    207
]

index = pd.Index(data = data, name = "street")

data = [
    "Van Cortlandt Park",
    "Chipotle",
    "Loeser's Kosher Deli",
    "Marble Hill",
    "Baker Field",
    "Dyckman Farmhouse"
]

series = pd.Series(data = data, index = index, name = "landmark")
print(series)
print()

sys.exit(0)
street
242      Van Cortlandt Park
238                Chipotle
231    Loeser's Kosher Deli
225             Marble Hill
215             Baker Field
207       Dyckman Farmhouse
Name: landmark, dtype: object

Human engineering

Combine the two lists in the above program into a single list. (There’s a tricky way we could have used zip instead of a pair of list comprehensions.)

"An index containing non-consecutive integers: the #1 subway."

import sys
import pandas as pd

data = [
    [242, "Van Cortlandt Park"],
    [238, "Chipotle"],
    [231, "Loeser's Kosher Deli"],
    [225, "Marble Hill"],
    [215, "Baker Field"],
    [207, "Dyckman Farmhouse"]
]

index  = pd.Index( data = [datum[0] for datum in data], name = "street")
series = pd.Series(data = [datum[1] for datum in data], index = index, name = "landmark")
print(series)

sys.exit(0)
street
242      Van Cortlandt Park
238                Chipotle
231    Loeser's Kosher Deli
225             Marble Hill
215             Baker Field
207       Dyckman Farmhouse
Name: landmark, dtype: object

It’s even easier to take the data from a Python dict. But you can do this only if the the numbers are unique.

"Create a pd.Series from a Python dict: the #1 subway."

import sys
import pandas as pd

data = {
    242: "Van Cortlandt Park",
    238: "Chipotle",
    231: "Loeser's Kosher Deli",
    225: "Marble Hill",
    215: "Baker Field",
    207: "Dyckman Farmhouse"
}

series = pd.Series(data = data, name = "landmark")
series.index.name = "street"
print(series)
print()

#Left-justify the column of strings.

s = series.to_string(dtype = True, name = True) #s is one big string
lines = s.splitlines()                          #lines is a list of strings.
space = 3 * " "
lines[1:-1] = [space.join(line.split(maxsplit = 1)) for line in lines[1:-1]]
s = "\n".join(lines)
print(s)

sys.exit(0)
street
242      Van Cortlandt Park
238                Chipotle
231    Loeser's Kosher Deli
225             Marble Hill
215             Baker Field
207       Dyckman Farmhouse
Name: landmark, dtype: object

street
242   Van Cortlandt Park
238   Chipotle
231   Loeser's Kosher Deli
225   Marble Hill
215   Baker Field
207   Dyckman Farmhouse
Name: landmark, dtype: object

An index containing strings

"Provide an explicit index of strings for a pd.Series."

import sys
import pandas as pd

data = [
    "Sunday",
    "Monday",
    "Tuesday",
    "Wednesday",
    "Thursday",
    "Friday",
    "Saturday"
]

index = pd.Index(data = data, name = "weekday")

data = [
    10.0,
    20.0,
    30.0,
    35.0,
    30.0,
    25.0,
    20.0
]

series = pd.Series(data = data, index = index, name = "temperature")
print(series) #or print(series.to_string(dtype = True, length = True, name = True))
print()

print(f"{series.index = }")
print()

print(f"{type(series.index) = }")
print(f"{series.index.name = }")
print(f"{series.index.dtype.name = }")
print(f"{len(series.index) = }")

sys.exit(0)
weekday
Sunday       10.0
Monday       20.0
Tuesday      30.0
Wednesday    35.0
Thursday     30.0
Friday       25.0
Saturday     20.0
Name: temperature, dtype: float64

series.index = Index(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       'Saturday'],
      dtype='object', name='weekday')

type(series.index) = <class 'pandas.core.indexes.base.Index'>
series.index.name = 'weekday'
series.index.dtype.name = 'object'
len(series.index) = 7

Combine the above two lists of data into one list

"Provide an explicit index of strings for a pd.Series."

import sys
import pandas as pd

days = [
    ["Sunday",    10.0],
    ["Monday",    20.0],
    ["Tuesday",   30.0],
    ["Wednesday", 35.0],
    ["Thursday",  30.0],
    ["Friday",    25.0],
    ["Saturday",  20.0]
]

index   = pd.Index(data = [day[0] for day in days], name = "weekday")
series = pd.Series(data = [day[1] for day in days], index = index, name = "temperature")
print(series)
sys.exit(0)
#Another way to create the above Series,
#without having to write the two list comprehensions.

index, data = zip(*days)
series = pd.Series(data = data, index = index, name = "temperature")
series.index.name = "weekday"
weekday
Sunday       10.0
Monday       20.0
Tuesday      30.0
Wednesday    35.0
Thursday     30.0
Friday       25.0
Saturday     20.0
Name: temperature, dtype: float64

or into a dict.

"Provide an explicit index of strings for a pd.Series."

import sys
import pandas as pd

data = {
    "Sunday":    10.0,
    "Monday":    20.0,
    "Tuesday":   30.0,
    "Wednesday": 35.0,
    "Thursday":  30.0,
    "Friday":    25.0,
    "Saturday":  20.0
}

series = pd.Series(data = data, name = "temperature")
series.index.name = "weekday"
print(series)
sys.exit(0)
weekday
Sunday       10.0
Monday       20.0
Tuesday      30.0
Wednesday    35.0
Thursday     30.0
Friday       25.0
Saturday     20.0
Name: temperature, dtype: float64

An index containing pd.Timestamps

A Python datetime.datetime has microsecond precision; a pd.Timestamp has nanosecond precision. That’s a thousand times more precise.

"Provide an explicit index of pd.Timestamps for a pd.Series."

import sys
import pandas as pd

data = {
    pd.Timestamp(year = 2020, month = 12, day = 25): 25.0,
    pd.Timestamp(year = 2020, month = 12, day = 26): 26.0,
    pd.Timestamp(year = 2020, month = 12, day = 27): 27.0,
    pd.Timestamp(year = 2020, month = 12, day = 28): 28.0,
    pd.Timestamp(year = 2020, month = 12, day = 29): 29.0,
    pd.Timestamp(year = 2020, month = 12, day = 30): 30.0,
    pd.Timestamp(year = 2020, month = 12, day = 31): 31.0
}

series = pd.Series(data = data, name = "temperature")
series.index.name = "timestamps"
print(series)
print()

print(f"{series.index.dtype.name = }")
sys.exit(0)
timestamps
2020-12-25    25.0
2020-12-26    26.0
2020-12-27    27.0
2020-12-28    28.0
2020-12-29    29.0
2020-12-30    30.0
2020-12-31    31.0
Name: temperature, dtype: float64

series.index.dtype.name = 'datetime64[ns]'

An index containing pd.Timestamps for consecutive dates

"Provide an explicit pd.DatetimeIndex for a pd.Series."

import sys
import pandas as pd

start = pd.Timestamp(year = 2020, month = 12, day = 25) #or start = pd.Timestamp("2020-12-25")
end   = pd.Timestamp(year = 2020, month = 12, day = 31)

index = pd.date_range(start = start, end = end, freq = "1D", name = "date")
#or index = pd.date_range("2020-12-25", "2020-12-31", name = "date")

data = [25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0]       #or data = np.arange(25.0, 32.0)
series = pd.Series(data = data, index = index, name = "temperature")
print(series)
print()

#Examine the index in greater detail.
print(f"{series.index = }")
print()
print(f"{type(series.index) = }")
print(f"{series.index.dtype.name = }")
print(f"{series.index.freqstr = }")
 
sys.exit(0)
date
2020-12-25    25.0
2020-12-26    26.0
2020-12-27    27.0
2020-12-28    28.0
2020-12-29    29.0
2020-12-30    30.0
2020-12-31    31.0
Freq: D, Name: temperature, dtype: float64

series.index = DatetimeIndex(['2020-12-25', '2020-12-26', '2020-12-27', '2020-12-28',
               '2020-12-29', '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='date', freq='D')

type(series.index) = <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
series.index.dtype.name = 'datetime64[ns]'
series.index.freqstr = 'D'

Things to try

  1. What happens if the index is too long or too short?
    data = [10.0, 20.0, 30.0, 40.0, 50.0]             #data is of length 5
    index = pd.RangeIndex(1, len(data), name = "day") #index is of length 4
    series = pd.Series(data = data, index = index, name = "temperature")
    
  2. Create a DatetimeIndex whose frequency is every 10 days. Also try freq = "W" for “weekly”.
    "A pd.DatetimeIndex whose frequency is every 10 days."
    
    import sys
    import numpy as np
    import pandas as pd
    
    start = pd.Timestamp(year = 2020, month =  1, day =  1)
    end   = pd.Timestamp(year = 2020, month = 12, day = 31)
    
    index = pd.date_range(start = start, end = end, freq = "10D", name = "date")
    data = np.arange(1, len(index) + 1)
    series = pd.Series(data = data, index = index, name = "temperature")
    print(series)
    print()
    
    print(f"{series.index.freqstr = }")
    print(f"{series.index.freq.n = }")
    print(f"{series.index.freq.base.name = }")
    
    sys.exit(0)
    
    date
    2020-01-01     1
    2020-01-11     2
    2020-01-21     3
    2020-01-31     4
    2020-02-10     5
    2020-02-20     6
    2020-03-01     7
    2020-03-11     8
    2020-03-21     9
    2020-03-31    10
    2020-04-10    11
    2020-04-20    12
    2020-04-30    13
    2020-05-10    14
    2020-05-20    15
    2020-05-30    16
    2020-06-09    17
    2020-06-19    18
    2020-06-29    19
    2020-07-09    20
    2020-07-19    21
    2020-07-29    22
    2020-08-08    23
    2020-08-18    24
    2020-08-28    25
    2020-09-07    26
    2020-09-17    27
    2020-09-27    28
    2020-10-07    29
    2020-10-17    30
    2020-10-27    31
    2020-11-06    32
    2020-11-16    33
    2020-11-26    34
    2020-12-06    35
    2020-12-16    36
    2020-12-26    37
    Freq: 10D, Name: temperature, dtype: int64
    
    series.index.freqstr = '10D'
    series.index.freq.n = 10
    series.index.freq.base.name = 'D'