Humans would rather number the days starting at 1, not 0.
Call
reset_index
to revert to the default index.
"Provide an explicit index for a pd.Series. Give the index a name." import sys import pandas as pd index = [1, 2, 3, 4, 5] #or index = range(1, 6) or index = np.arange(1, 6) data = [10.0, 20.0, 30.0, 40.0, 50.0] #or data = np.arange(10.0, 60.0, 10.0) series = pd.Series(data = data, index = index, name = "temperature") series.index.name = "day" print(series) print() print(f"{series[1] = }") #In the brackets you write one of the items in the index. print(f"{series.loc[1] = }") #same as previous statement print(f"{series.iloc[1] = }") #In the brackets you write an integer. print() print(f"{series.index = }") print(f"{type(series.index) = }") print(f"{series.index.name = }") print(f"{series.index.dtype.name = }") print(f"{len(series.index) = }") sys.exit(0)
day 1 10.0 2 20.0 3 30.0 4 40.0 5 50.0 Name: temperature, dtype: float64 series[1] = 10.0 series.loc[1] = 10.0 series.iloc[1] = 20.0 series.index = Int64Index([1, 2, 3, 4, 5], dtype='int64', name='day') type(series.index) = <class 'pandas.core.indexes.numeric.Int64Index'> series.index.name = 'day' series.index.dtype.name = 'int64' len(series.index) = 5
In the above program, change
index = [1, 2, 3, 4, 5] data = [10.0, 20.0, 30.0, 40.0, 50.0] series = pd.Series(data = data, index = index, name = "temperature") series.index.name = "day"to
#Create the pd.Index, complete with a name. data = [1, 2, 3, 4, 5] index = pd.Index(data = data, name = "day") #Create the pd.Series, and put the pd.Index into it. data = [10.0, 20.0, 30.0, 40.0, 50.0] series = pd.Series(data = data, index = index, name = "temperature")or to
#Make sure the data of the pd.Series and the index of the pd.Series are the same length. data = [10.0, 20.0, 30.0, 40.0, 50.0] index = pd.RangeIndex(1, len(data) + 1, name = "day") series = pd.Series(data = data, index = index, name = "temperature")
You might want to create the index as a separate object
if you were planning to put the same index into several
pd.Series
es.
"Put the same pd.Index into two pd.Serieses." import sys import pandas as pd index = pd.RangeIndex(1, 6, name = "day") data = [10.0, 20.0, 30.0, 40.0, 50.0] temperatureSeries = pd.Series(data = data, index = index, name = "temperature") data = [11.0, 21.0, 31.0, 41.0, 51.0] humiditySeries = pd.Series(data = data, index = index, name = "humidity") print(temperatureSeries) print() print(humiditySeries) print() sys.exit(0)
day 1 10.0 2 20.0 3 30.0 4 40.0 5 50.0 Name: temperature, dtype: float64 day 1 11.0 2 21.0 3 31.0 4 41.0 5 51.0 Name: humidity, dtype: float64
Now that we have two
pd.Series
es
sharing the same
pd.Index
,
we can put the two
pd.Series
es
side by side into a single
pd.DataFrame
.
The
axis = 1
means
“side by side”.
If you change it to
axis = 0
,
do you still get a
pd.DataFrame
or do you get a
pd.Series
?
df = pd.concat([temperatureSeries, humiditySeries], axis = 1) #Create a pd.DataFrame. print(df)
temperature humidity day 1 10.0 11.0 2 20.0 21.0 3 30.0 31.0 4 40.0 41.0 5 50.0 51.0
"Create an index containing non-consecutive integers." import sys import pandas as pd data = [ 242, 238, 231, 225, 215, 207 ] index = pd.Index(data = data, name = "street") data = [ "Van Cortlandt Park", "Chipotle", "Loeser's Kosher Deli", "Marble Hill", "Baker Field", "Dyckman Farmhouse" ] series = pd.Series(data = data, index = index, name = "landmark") print(series) print() sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
Combine the two
list
s
in the above program into a single
list
.
(There’s a tricky way we could have used
zip
instead of a pair of list comprehensions.)
"An index containing non-consecutive integers: the #1 subway." import sys import pandas as pd data = [ [242, "Van Cortlandt Park"], [238, "Chipotle"], [231, "Loeser's Kosher Deli"], [225, "Marble Hill"], [215, "Baker Field"], [207, "Dyckman Farmhouse"] ] index = pd.Index( data = [datum[0] for datum in data], name = "street") series = pd.Series(data = [datum[1] for datum in data], index = index, name = "landmark") print(series) sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
It’s even easier to take the data from a Python
dict
.
But you can do this only if the the numbers are unique.
"Create a pd.Series from a Python dict: the #1 subway." import sys import pandas as pd data = { 242: "Van Cortlandt Park", 238: "Chipotle", 231: "Loeser's Kosher Deli", 225: "Marble Hill", 215: "Baker Field", 207: "Dyckman Farmhouse" } series = pd.Series(data = data, name = "landmark") series.index.name = "street" print(series) print() #Left-justify the column of strings. s = series.to_string(dtype = True, name = True) #s is one big string lines = s.splitlines() #lines is a list of strings. space = 3 * " " lines[1:-1] = [space.join(line.split(maxsplit = 1)) for line in lines[1:-1]] s = "\n".join(lines) print(s) sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
"Provide an explicit index of strings for a pd.Series." import sys import pandas as pd data = [ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" ] index = pd.Index(data = data, name = "weekday") data = [ 10.0, 20.0, 30.0, 35.0, 30.0, 25.0, 20.0 ] series = pd.Series(data = data, index = index, name = "temperature") print(series) #or print(series.to_string(dtype = True, length = True, name = True)) print() print(f"{series.index = }") print() print(f"{type(series.index) = }") print(f"{series.index.name = }") print(f"{series.index.dtype.name = }") print(f"{len(series.index) = }") sys.exit(0)
weekday Sunday 10.0 Monday 20.0 Tuesday 30.0 Wednesday 35.0 Thursday 30.0 Friday 25.0 Saturday 20.0 Name: temperature, dtype: float64 series.index = Index(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], dtype='object', name='weekday') type(series.index) = <class 'pandas.core.indexes.base.Index'> series.index.name = 'weekday' series.index.dtype.name = 'object' len(series.index) = 7
Combine the above two
list
s
of data into one
list
"Provide an explicit index of strings for a pd.Series." import sys import pandas as pd days = [ ["Sunday", 10.0], ["Monday", 20.0], ["Tuesday", 30.0], ["Wednesday", 35.0], ["Thursday", 30.0], ["Friday", 25.0], ["Saturday", 20.0] ] index = pd.Index(data = [day[0] for day in days], name = "weekday") series = pd.Series(data = [day[1] for day in days], index = index, name = "temperature") print(series) sys.exit(0)
#Another way to create the above Series, #without having to write the two list comprehensions. index, data = zip(*days) series = pd.Series(data = data, index = index, name = "temperature") series.index.name = "weekday"
weekday Sunday 10.0 Monday 20.0 Tuesday 30.0 Wednesday 35.0 Thursday 30.0 Friday 25.0 Saturday 20.0 Name: temperature, dtype: float64
or into a
dict
.
"Provide an explicit index of strings for a pd.Series." import sys import pandas as pd data = { "Sunday": 10.0, "Monday": 20.0, "Tuesday": 30.0, "Wednesday": 35.0, "Thursday": 30.0, "Friday": 25.0, "Saturday": 20.0 } series = pd.Series(data = data, name = "temperature") series.index.name = "weekday" print(series) sys.exit(0)
weekday Sunday 10.0 Monday 20.0 Tuesday 30.0 Wednesday 35.0 Thursday 30.0 Friday 25.0 Saturday 20.0 Name: temperature, dtype: float64
A Python
datetime.datetime
has microsecond precision;
a
pd.Timestamp
has nanosecond precision.
That’s a thousand times more precise.
"Provide an explicit index of pd.Timestamps for a pd.Series." import sys import pandas as pd data = { pd.Timestamp(year = 2020, month = 12, day = 25): 25.0, pd.Timestamp(year = 2020, month = 12, day = 26): 26.0, pd.Timestamp(year = 2020, month = 12, day = 27): 27.0, pd.Timestamp(year = 2020, month = 12, day = 28): 28.0, pd.Timestamp(year = 2020, month = 12, day = 29): 29.0, pd.Timestamp(year = 2020, month = 12, day = 30): 30.0, pd.Timestamp(year = 2020, month = 12, day = 31): 31.0 } series = pd.Series(data = data, name = "temperature") series.index.name = "timestamps" print(series) print() print(f"{series.index.dtype.name = }") sys.exit(0)
timestamps 2020-12-25 25.0 2020-12-26 26.0 2020-12-27 27.0 2020-12-28 28.0 2020-12-29 29.0 2020-12-30 30.0 2020-12-31 31.0 Name: temperature, dtype: float64 series.index.dtype.name = 'datetime64[ns]'
"Provide an explicit pd.DatetimeIndex for a pd.Series." import sys import pandas as pd start = pd.Timestamp(year = 2020, month = 12, day = 25) #or start = pd.Timestamp("2020-12-25") end = pd.Timestamp(year = 2020, month = 12, day = 31) index = pd.date_range(start = start, end = end, freq = "1D", name = "date") #or index = pd.date_range("2020-12-25", "2020-12-31", name = "date") data = [25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0] #or data = np.arange(25.0, 32.0) series = pd.Series(data = data, index = index, name = "temperature") print(series) print() #Examine the index in greater detail. print(f"{series.index = }") print() print(f"{type(series.index) = }") print(f"{series.index.dtype.name = }") print(f"{series.index.freqstr = }") sys.exit(0)
date 2020-12-25 25.0 2020-12-26 26.0 2020-12-27 27.0 2020-12-28 28.0 2020-12-29 29.0 2020-12-30 30.0 2020-12-31 31.0 Freq: D, Name: temperature, dtype: float64 series.index = DatetimeIndex(['2020-12-25', '2020-12-26', '2020-12-27', '2020-12-28', '2020-12-29', '2020-12-30', '2020-12-31'], dtype='datetime64[ns]', name='date', freq='D') type(series.index) = <class 'pandas.core.indexes.datetimes.DatetimeIndex'> series.index.dtype.name = 'datetime64[ns]' series.index.freqstr = 'D'
data = [10.0, 20.0, 30.0, 40.0, 50.0] #data is of length 5 index = pd.RangeIndex(1, len(data), name = "day") #index is of length 4 series = pd.Series(data = data, index = index, name = "temperature")
DatetimeIndex
whose
freq
uency
is every 10 days.
Also try
freq = "W"
for “weekly”.
"A pd.DatetimeIndex whose frequency is every 10 days." import sys import numpy as np import pandas as pd start = pd.Timestamp(year = 2020, month = 1, day = 1) end = pd.Timestamp(year = 2020, month = 12, day = 31) index = pd.date_range(start = start, end = end, freq = "10D", name = "date") data = np.arange(1, len(index) + 1) series = pd.Series(data = data, index = index, name = "temperature") print(series) print() print(f"{series.index.freqstr = }") print(f"{series.index.freq.n = }") print(f"{series.index.freq.base.name = }") sys.exit(0)
date 2020-01-01 1 2020-01-11 2 2020-01-21 3 2020-01-31 4 2020-02-10 5 2020-02-20 6 2020-03-01 7 2020-03-11 8 2020-03-21 9 2020-03-31 10 2020-04-10 11 2020-04-20 12 2020-04-30 13 2020-05-10 14 2020-05-20 15 2020-05-30 16 2020-06-09 17 2020-06-19 18 2020-06-29 19 2020-07-09 20 2020-07-19 21 2020-07-29 22 2020-08-08 23 2020-08-18 24 2020-08-28 25 2020-09-07 26 2020-09-17 27 2020-09-27 28 2020-10-07 29 2020-10-17 30 2020-10-27 31 2020-11-06 32 2020-11-16 33 2020-11-26 34 2020-12-06 35 2020-12-16 36 2020-12-26 37 Freq: 10D, Name: temperature, dtype: int64 series.index.freqstr = '10D' series.index.freq.n = 10 series.index.freq.base.name = 'D'