array
module in the
Python Standard Library
A
container
is an object such as a
list
,
tuple
,
dict
,
or
set
.
A
list
is a heterogeneous container
because it can contain items of different data types.
A
array.array
is a homogeneous container because all its items must be of the same data type.
(Other examples of homogeneous containers are
str
and
bytes
.)
person = ["Smith", "John", "A", 10040, 2122345678, True] #a list
Why make a heterogeneous
list
instead of six separate variables?
lastName = "Smith" firstName = "John" middleInitial = "A" zipCode = 10040 phoneNumber = 2122345678 gender = True
Because putting them into a
list
makes it easier to have more than one person:
import sys persons = [ ["Smith", "John", "A", 10040, 2122345678, True], ["Public", "John", "Q", 10003, 7182345678, True], ["Doe", "Jane", None, 10003, 9142345678, False], ["Sixpack", "Joe", None, 10003, 9143456789, False] ] phoneNumbers = [person[4] for person in persons] westchesterPhoneNumbers = [phoneNumber for phoneNumber in phoneNumbers if str(phoneNumber).startswith("914")] print(f"westchesterPhoneNumbers = {westchesterPhoneNumbers}") #Python 3.7 print(f"{westchesterPhoneNumbers = }") #Python 3.8 sys.exit(0)
westchesterPhoneNumbers = [9142345678, 9143456789] westchesterPhoneNumbers = [9142345678, 9143456789]
"How many bytes are occupied by 100 million floats?" import sys import array n = 100_000_000 #underscores ignored print("list:") li = [float(i) for i in range(n)] #a list of floats print(f"{len(li) = :11,}") print(f"{sys.getsizeof(li) = :11,}") print(f"{sys.getsizeof(li) / len(li) = }") print() print("tuple:") tu = tuple(li) print(f"{len(tu) = :11,}") print(f"{sys.getsizeof(tu) = :11,}") print() print("array.array:") ar = array.array("d", li) #"d" for "double" print(f"{len(ar) = :11,}") print(f"{ar.buffer_info()[1] = :11,}") print(f"{ar.itemsize = :11,}") print(f"{sys.getsizeof(ar) = :11,}") print() sys.exit(0)
list: len(li) = 100,000,000 sys.getsizeof(li) = 859,724,464 sys.getsizeof(li) / len(li) = 8.59724464 tuple: len(tu) = 100,000,000 sys.getsizeof(tu) = 800,000,040 array.array: len(ar) = 100,000,000 ar.buffer_info()[1] = 100,000,000 ar.itemsize = 8 sys.getsizeof(ar) = 800,000,064
"How many seconds does it take to slice a list of 100 million floats?" import sys import timeit n = 100_000_000 #how many floats repeat = 4 #Do the experiment 4 times. number = 1 #Each experiment consists of 1 execution of the code. setup = f"l = [float(i) for i in range({n})]" code = "l = l[::2]" s = timeit.repeat(code, setup = setup, repeat = repeat, number = number) print(f"{s} seconds") minList = min(s) print(f"{minList} seconds is the minimum.") print() setup = f'import array; a = array.array("d", range({n}))' code = "a = a[::2]" s = timeit.repeat(code, setup = setup, repeat = repeat, number = number) print(f"{s} seconds") minArray = min(s) print(f"{minArray} seconds is the minimum.") print() print(f"The array.array is {minList / minArray} times faster than the list.") sys.exit(0)
[1.9324892540000018, 1.4928774520000019, 1.427949697999999, 1.396348383000003] seconds 1.396348383000003 seconds is the minimum. [0.360193326000001, 0.34141492199999846, 0.33032098099999985, 0.3401288020000095] seconds 0.33032098099999985 seconds is the minimum. The array is 4.227247021284438 times faster than the list.