Create a pd.DataFrame object

A pd.DataFrame object contains one or more columns of information. Each column is a pd.Series, and all the pd.Serieses share the same index. If you don’t specify an index, the index of a pd.DataFrame, like the index of a pd.Series, defaults to a RangeIndex.

In addition to the familiar verical index that provides an identifying number or name for each row, a pd.DataFrame has a horizontal index that provides an identifying number or name for each column.

"Create a pd.DataFrame object.  Examine its two default indices, vertical and horizontal."

import sys
import pandas as pd

data = [                       #three rows and four columns
    [ 0.0,  1.0,  2.0,  3.0],
    [10.0, 11.0, 12.0, 13.0],
    [20.0, 21.0, 22.0, 23.0]
]

df = pd.DataFrame(data = data)
print(df)                      #means print(df.to_string())
print()

print(f"{df.index   = }")
print(f"{df.columns = }")
print()

print(df.dtypes)               #a pd.Series giving the dtype of each column
print()

df.index   = pd.Index(data = ["row0", "row1", "row2"],         name = "rows")
df.columns = pd.Index(data = ["col0", "col1", "col2", "col3"], name = "cols")

print(df)
sys.exit(0)

      0     1     2     3
0   0.0   1.0   2.0   3.0
1  10.0  11.0  12.0  13.0
2  20.0  21.0  22.0  23.0

df.index   = RangeIndex(start=0, stop=3, step=1)
df.columns = RangeIndex(start=0, stop=4, step=1)

0    float64
1    float64
2    float64
3    float64
dtype: object

cols  col0  col1  col2  col3
rows
row0   0.0   1.0   2.0   3.0
row1  10.0  11.0  12.0  13.0
row2  20.0  21.0  22.0  23.0

Select a column, row, or individual value.

"Create a pd.DataFrame object.  Examine its two default indices, vertical and horizontal."

import sys
import pandas as pd

data = [                       #three rows and four columns
    [ 0.0,  1.0,  2.0,  3.0],
    [10.0, 11.0, 12.0, 13.0],
    [20.0, 21.0, 22.0, 23.0]
]

df = pd.DataFrame(data = data)

df.index   = pd.Index(data = ["row0", "row1", "row2"],         name = "rows")
df.columns = pd.Index(data = ["col0", "col1", "col2", "col3"], name = "cols")

print(df)
print()

series = df["col2"]     #Get one of the columns.
print(series)
print()

series = df.loc["row2"] #Get one of the rows.  Could also say df.iloc[2]
print(series)           #A pd.Series always prints vertically, even if originally a row.
print()

print(f'{df.at["row2", "col2"] = }')   #Get an individual value.
print(f'{df.iat[2, 2]          = }')

sys.exit(0)

cols  col0  col1  col2  col3
rows
row0   0.0   1.0   2.0   3.0
row1  10.0  11.0  12.0  13.0
row2  20.0  21.0  22.0  23.0

rows
row0     2.0
row1    12.0
row2    22.0
Name: col2, dtype: float64

cols
col0    20.0
col1    21.0
col2    22.0
col3    23.0
Name: row2, dtype: float64

df.at["row2", "col2"] = 22.0
df.iat[2, 2]          = 22.0

Things to try

Change the above data to one of the following. The first one does the work in Python. The second one does is faster because it ultimately does the work in C.

nrows = 3   #number of rows
ncols = 4   #number of columns

#Python list comprehension.
#data is a nrows by ncols list.

data = [[float(10 * row + col) for col in range(ncols)] for row in range(nrows)]

#remember to import numpy as np

nrows = 3   #number of rows
ncols = 4   #number of columns

ones = np.arange(ncols, dtype = np.float64)
tens = np.arange(0, 10 * nrows, 10, dtype = np.float64)

#Create three np.ndarrays.  Each is nrows by ncols.
ones, tens = np.meshgrid(ones, tens)
data = ones + tens

print(ones)   #just to help you understand
print()
print(tens)
print()
print(data)
print()

[[0. 1. 2. 3.]
 [0. 1. 2. 3.]
 [0. 1. 2. 3.]]

[[ 0.  0.  0.  0.]
 [10. 10. 10. 10.]
 [20. 20. 20. 20.]]

[[ 0.  1.  2.  3.]
 [10. 11. 12. 13.]
 [20. 21. 22. 23.]]

You can also create data this way:

nrows = 3   #number of rows
ncols = 4   #number of columns

ones = np.arange(ncols, dtype = np.float64)
tens = np.arange(0, 10 * nrows, 10, dtype = np.float64)
t = np.meshgrid(ones, tens)   #t is a tuple containing two np.ndarrays
data = sum(t)

or even this way:

nrows = 3   #number of rows
ncols = 4   #number of columns

ones = np.arange(ncols, dtype = np.float64)
tens = np.arange(0, 10 * nrows, 10, dtype = np.float64)
data = sum(np.meshgrid(ones, tens))

Print a pd.DataFrame that is wider than pd.options.display.max_columns. See Options and settings. The options_context function creates and returns a Python context manager. See Context Manager Types.

"Print a pd.DataFrame that is wider than pd.options.display.max_columns."

import sys
import numpy as np
import pandas as pd

print(f'{pd.get_option("display.max_columns") = }')
print(f'{pd.get_option("display.width") = }')
print()

nrows = 10
ncols = 16

ones = np.arange(ncols)
hundreds = np.arange(0, 100 * nrows, 100)
ones, hundreds = np.meshgrid(ones, hundreds) #ones is a nrows by ncols np.ndarray.  So is hundreds.
data = ones + hundreds                       #data is a nrows by ncols np.ndarray.

df = pd.DataFrame(data = data)
df.index = pd.RangeIndex(0, 100 * nrows, 100)
print(df)
print(80 * "-")
print()

with pd.option_context("display.max_columns", None):
    print(df)
print(80 * "-")
print()

with pd.option_context("display.max_columns", 16, "display.width", None):
    print(df)

sys.exit(0)

pd.get_option("display.max_columns") = 0
pd.get_option("display.width") = 80

      0    1    2    3    4    5    6   ...   9    10   11   12   13   14   15
0      0    1    2    3    4    5    6  ...    9   10   11   12   13   14   15
100  100  101  102  103  104  105  106  ...  109  110  111  112  113  114  115
200  200  201  202  203  204  205  206  ...  209  210  211  212  213  214  215
300  300  301  302  303  304  305  306  ...  309  310  311  312  313  314  315
400  400  401  402  403  404  405  406  ...  409  410  411  412  413  414  415
500  500  501  502  503  504  505  506  ...  509  510  511  512  513  514  515
600  600  601  602  603  604  605  606  ...  609  610  611  612  613  614  615
700  700  701  702  703  704  705  706  ...  709  710  711  712  713  714  715
800  800  801  802  803  804  805  806  ...  809  810  811  812  813  814  815
900  900  901  902  903  904  905  906  ...  909  910  911  912  913  914  915

[10 rows x 16 columns]
--------------------------------------------------------------------------------

       0    1    2    3    4    5    6    7    8    9   10   11   12   13  \
0      0    1    2    3    4    5    6    7    8    9   10   11   12   13
100  100  101  102  103  104  105  106  107  108  109  110  111  112  113
200  200  201  202  203  204  205  206  207  208  209  210  211  212  213
300  300  301  302  303  304  305  306  307  308  309  310  311  312  313
400  400  401  402  403  404  405  406  407  408  409  410  411  412  413
500  500  501  502  503  504  505  506  507  508  509  510  511  512  513
600  600  601  602  603  604  605  606  607  608  609  610  611  612  613
700  700  701  702  703  704  705  706  707  708  709  710  711  712  713
800  800  801  802  803  804  805  806  807  808  809  810  811  812  813
900  900  901  902  903  904  905  906  907  908  909  910  911  912  913

      14   15
0     14   15
100  114  115
200  214  215
300  314  315
400  414  415
500  514  515
600  614  615
700  714  715
800  814  815
900  914  915
--------------------------------------------------------------------------------

       0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15
0      0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15
100  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115
200  200  201  202  203  204  205  206  207  208  209  210  211  212  213  214  215
300  300  301  302  303  304  305  306  307  308  309  310  311  312  313  314  315
400  400  401  402  403  404  405  406  407  408  409  410  411  412  413  414  415
500  500  501  502  503  504  505  506  507  508  509  510  511  512  513  514  515
600  600  601  602  603  604  605  606  607  608  609  610  611  612  613  614  615
700  700  701  702  703  704  705  706  707  708  709  710  711  712  713  714  715
800  800  801  802  803  804  805  806  807  808  809  810  811  812  813  814  815
900  900  901  902  903  904  905  906  907  908  909  910  911  912  913  914  915

Create a new pd.DataFrame that contains only the first five rows of the original pd.DataFrame.

"Create a new pd.DataFrame containing only the first 5 rows of the original pd.DataFrame."

import sys
import numpy as np
import pandas as pd

nrows = 7   #number of rows
ncols = 6   #number of columns

ones = np.arange(ncols)
tens = np.arange(0, 10 * nrows, 10)
ones, tens = np.meshgrid(ones, tens)
data = ones + tens

df = pd.DataFrame(data = data)
print(df)
print()

print(df.head())
sys.exit(0)

    0   1   2   3   4   5
0   0   1   2   3   4   5
1  10  11  12  13  14  15
2  20  21  22  23  24  25
3  30  31  32  33  34  35
4  40  41  42  43  44  45
5  50  51  52  53  54  55
6  60  61  62  63  64  65

    0   1   2   3   4   5
0   0   1   2   3   4   5
1  10  11  12  13  14  15
2  20  21  22  23  24  25
3  30  31  32  33  34  35
4  40  41  42  43  44  45

Now create a new pd.DataFrame that contains only the first five columns of the original pd.DataFrame.

"Create a new pd.DataFrame containing only the first 5 columns of the original pd.DataFrame."

import sys
import numpy as np
import pandas as pd

nrows = 7   #number of rows
ncols = 6   #number of columns

ones = np.arange(ncols)
tens = np.arange(0, 10 * nrows, 10)
ones, tens = np.meshgrid(ones, tens)
data = ones + tens

df = pd.DataFrame(data = data)
print(df)
print()

print(df.T)   #uppercase T for transpose
print()

print(df.T.head())
print()

print(df.T.head().T)
sys.exit(0)

    0   1   2   3   4   5
0   0   1   2   3   4   5
1  10  11  12  13  14  15
2  20  21  22  23  24  25
3  30  31  32  33  34  35
4  40  41  42  43  44  45
5  50  51  52  53  54  55
6  60  61  62  63  64  65

   0   1   2   3   4   5   6
0  0  10  20  30  40  50  60
1  1  11  21  31  41  51  61
2  2  12  22  32  42  52  62
3  3  13  23  33  43  53  63
4  4  14  24  34  44  54  64
5  5  15  25  35  45  55  65

   0   1   2   3   4   5   6
0  0  10  20  30  40  50  60
1  1  11  21  31  41  51  61
2  2  12  22  32  42  52  62
3  3  13  23  33  43  53  63
4  4  14  24  34  44  54  64

    0   1   2   3   4
0   0   1   2   3   4
1  10  11  12  13  14
2  20  21  22  23  24
3  30  31  32  33  34
4  40  41  42  43  44
5  50  51  52  53  54
6  60  61  62  63  64

Make sure that tail works too.

Describe each of the five columns of the following pd.DataFrame.

"Describe each column of a pd.DataFrame."

import sys
import numpy as np
import pandas as pd

nrows = 10
ncols = 5

ones = np.arange(ncols)
tens = np.arange(0, 10 * nrows, 10)
ones, tens = np.meshgrid(ones, tens)
data = ones + tens

df = pd.DataFrame(data = data)
df.index = pd.RangeIndex(0, 10 * nrows, 10)
print(df)
print()

print(df.describe())
sys.exit(0)

     0   1   2   3   4
0    0   1   2   3   4
10  10  11  12  13  14
20  20  21  22  23  24
30  30  31  32  33  34
40  40  41  42  43  44
50  50  51  52  53  54
60  60  61  62  63  64
70  70  71  72  73  74
80  80  81  82  83  84
90  90  91  92  93  94

               0          1          2          3          4
count  10.000000  10.000000  10.000000  10.000000  10.000000
mean   45.000000  46.000000  47.000000  48.000000  49.000000
std    30.276504  30.276504  30.276504  30.276504  30.276504
min     0.000000   1.000000   2.000000   3.000000   4.000000
25%    22.500000  23.500000  24.500000  25.500000  26.500000
50%    45.000000  46.000000  47.000000  48.000000  49.000000
75%    67.500000  68.500000  69.500000  70.500000  71.500000
max    90.000000  91.000000  92.000000  93.000000  94.000000

Now describe each of the ten rows.

print(df.T)   #uppercase T for transpose
print()

with pd.option_context("display.width", None):
    print(df.T.describe())
print()

print(df.T.describe().T)

   0  10  20  30  40  50  60  70  80  90
0  0  10  20  30  40  50  60  70  80  90
1  1  11  21  31  41  51  61  71  81  91
2  2  12  22  32  42  52  62  72  82  92
3  3  13  23  33  43  53  63  73  83  93
4  4  14  24  34  44  54  64  74  84  94

             0          10         20         30         40         50         60         70         80         90
count  5.000000   5.000000   5.000000   5.000000   5.000000   5.000000   5.000000   5.000000   5.000000   5.000000
mean   2.000000  12.000000  22.000000  32.000000  42.000000  52.000000  62.000000  72.000000  82.000000  92.000000
std    1.581139   1.581139   1.581139   1.581139   1.581139   1.581139   1.581139   1.581139   1.581139   1.581139
min    0.000000  10.000000  20.000000  30.000000  40.000000  50.000000  60.000000  70.000000  80.000000  90.000000
25%    1.000000  11.000000  21.000000  31.000000  41.000000  51.000000  61.000000  71.000000  81.000000  91.000000
50%    2.000000  12.000000  22.000000  32.000000  42.000000  52.000000  62.000000  72.000000  82.000000  92.000000
75%    3.000000  13.000000  23.000000  33.000000  43.000000  53.000000  63.000000  73.000000  83.000000  93.000000
max    4.000000  14.000000  24.000000  34.000000  44.000000  54.000000  64.000000  74.000000  84.000000  94.000000

    count  mean       std   min   25%   50%   75%   max
0     5.0   2.0  1.581139   0.0   1.0   2.0   3.0   4.0
10    5.0  12.0  1.581139  10.0  11.0  12.0  13.0  14.0
20    5.0  22.0  1.581139  20.0  21.0  22.0  23.0  24.0
30    5.0  32.0  1.581139  30.0  31.0  32.0  33.0  34.0
40    5.0  42.0  1.581139  40.0  41.0  42.0  43.0  44.0
50    5.0  52.0  1.581139  50.0  51.0  52.0  53.0  54.0
60    5.0  62.0  1.581139  60.0  61.0  62.0  63.0  64.0
70    5.0  72.0  1.581139  70.0  71.0  72.0  73.0  74.0
80    5.0  82.0  1.581139  80.0  81.0  82.0  83.0  84.0
90    5.0  92.0  1.581139  90.0  91.0  92.0  93.0  94.0

What do you get when you convert a pd.DataFrame to a Python list? I started the week with Monday, instead of Sunday, because datetime.weekday start the week with Monday.

"Convert a pd.DataFrame to a Python list."
import sys
import pandas as pd

data = [
    "Monday",
    "Tuesday",
    "Wednesday",
    "Thursday",
    "Friday",
    "Saturday",
    "Sunday"
]
index = pd.Index(data = data, name = "weekday")

data = [
    "Spanish",
    "German",
    "Hebrew"
]
columns = pd.Index(data = data, name = "language")

data = [
    ["lunes",     "Montag",     "יוֹם שֵׁנִי"],
    ["martes",    "Dienstag",   "יוֹם שְׁלִישִׁי"],
    ["miércoles", "Mittwoch",   "יוֹם רְבִיעִי"],
    ["jueves",    "Donnerstag", "יוֹם חֲמִישִׁי"],
    ["viernes",   "Freitag",    "יוֹם שִׁשִּׁי"],
    ["sábado",    "Samstag",    "יוֹם שַׁבָּת"],
    ["domingo",   "Sonntag",    "יוֹם רִאשׁוֹן"]
]
df = pd.DataFrame(data = data, index = index, columns = columns)

print(df)
print()

for colname in list(df):   #colname is a string
    print(colname)

sys.exit(0)

language     Spanish      German           Hebrew
weekday
Monday         lunes      Montag      יוֹם שֵׁנִי
Tuesday       martes    Dienstag  יוֹם שְׁלִישִׁי
Wednesday  miércoles    Mittwoch    יוֹם רְבִיעִי
Thursday      jueves  Donnerstag   יוֹם חֲמִישִׁי
Friday       viernes     Freitag    יוֹם שִׁשִּׁי
Saturday      sábado     Samstag     יוֹם שַׁבָּת
Sunday       domingo     Sonntag    יוֹם רִאשׁוֹן

Spanish
German
Hebrew

What do you get when you convert the pd.DataFrame to a Python dict?

for weekday, series in dict(df).items():   #weekday is a string, series is a pd.Series
    print(weekday)
    print(series)
    print()

Spanish
weekday
Monday           lunes
Tuesday         martes
Wednesday    miércoles
Thursday        jueves
Friday         viernes
Saturday        sábado
Sunday         domingo
Name: Spanish, dtype: object

German
weekday
Monday           Montag
Tuesday        Dienstag
Wednesday      Mittwoch
Thursday     Donnerstag
Friday          Freitag
Saturday        Samstag
Sunday          Sonntag
Name: German, dtype: object

Hebrew
weekday
Monday           יוֹם שֵׁנִי
Tuesday      יוֹם שְׁלִישִׁי
Wednesday      יוֹם רְבִיעִי
Thursday      יוֹם חֲמִישִׁי
Friday         יוֹם שִׁשִּׁי
Saturday        יוֹם שַׁבָּת
Sunday         יוֹם רִאשׁוֹן
Name: Hebrew, dtype: object

Plot the pd.DataFrame.

"Plot a pd.DataFrame with matplotlib.pyplot."

import pandas as pd
import matplotlib.pyplot as plt

data = [
    "New York",
    "Yonkers",
    "Pougheepsie",
    "Albany" 
]

columns = pd.Index(data = data, name = "City")
index = pd.RangeIndex(1, 7, name = "day of month")

#Gets colder as you go north.
#Gets warmer as the month progresses.

data = [
    [ 6,  4,  2,  0],
    [16, 14, 12, 10],
    [26, 24, 22, 20],
    [36, 34, 32, 30],
    [46, 44, 42, 40],
    [56, 54, 52, 50]
]

df = pd.DataFrame(data = data, index = index, columns = columns)
print(df) 

axes = df.plot(grid = True, figsize = [6.4, 4.8]) #width and height in inches
figure = plt.gcf()
figure.canvas.set_window_title("matplotlib.pyplot DataFrame.plot")
axes.set_title("Daily Temperature in March")
axes.set_ylabel("temperature in Fahrenheit")

plt.show()   #infinite loop

City          New York  Yonkers  Pougheepsie  Albany
day of month                                        
1                    6        4            2       0
2                   16       14           12      10
3                   26       24           22      20
4                   36       34           32      30
5                   46       44           42      40
6                   56       54           52      50

For Space Cadets only: examine the source code for class pd.DataFrame.