See the Unix command
uniq -c
.
""" Count how many times each score appears. """ import sys scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10. counter = {} #an empty dict for score in scores: try: counter[score] += 1 except KeyError: counter[score] = 1 for score in sorted(counter): print(f"{score:2} {counter[score]}") sys.exit(0)
8 2 9 1 10 3 11 2 12 1
Usually you put keys and values into a
dict
ionary.
But there is a special type of
dict
ionary
called a
collections.Counter
.
(In the language of Object-Oriented Programming, we say that class
collections.Counter
is a subclass of class
dict
.)
You put keys into a
collections.Counter
,
and the
collections.Counter
provides the values for you.
""" Count how many times each score appears. """ import sys import collections scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10. counter = collections.Counter(scores) print("In original key order:") for key, value in counter.items(): #The key is the score, the value is the number of occurrences. print(f"{key:2} {value}") print() print("In order of decreasing frequency:") for key, value in counter.most_common(): #Also try for key, value in reversed(counter.most_common()): print(f"{key:2} {value}") print() print("In order of increasing score:") for key in sorted(counter): #Also try for key in sorted(counter, reverse = True): print(f"{key:2} {counter[key]}") sys.exit(0)
In original key order: 10 3 9 1 8 2 11 2 12 1 In order of decreasing frequency: 10 3 8 2 11 2 9 1 12 1 In order of increasing score: 8 2 9 1 10 3 11 2 12 1
import collections for t in collections.Counter.mro(): #method resolution order print(t) #t is a type
<class 'collections.Counter'> <class 'dict'> <class 'object'>
The following program produces exit status 0 if every letter is present, exit status 1 otherwise. See Intersect for another way to find the missing letters.
""" Count how many times each letter appears. Is every letter present? """ import sys import string import collections s = "Pack my box with five dozen liquor jugs." #pangram #Prep the patient for surgery. listOfLetters = [c for c in s if c.isalpha()] #listOfLetters is a list of one-character strings. s = "".join(listOfLetters) #s is a string s = s.lower() counter = collections.Counter(s) for key in sorted(counter): #List the lowercase letters in alphabetical order. print(key, counter[key]) allPresent = True #Innocent until proven guilty. for c in string.ascii_lowercase: #Loop 26 times. if counter[c] == 0: print(f'"{c}" is missing!') allPresent = False sys.exit(0 if allPresent else 1)
a 1 b 1 c 1 d 1 e 2 f 1 g 1 h 1 i 3 j 1 k 1 l 1 m 1 n 1 o 3 p 1 q 1 r 1 s 1 t 1 u 2 v 1 w 1 x 1 y 1 z 1
""" Count how many times each state was entered. The counts automatically start at 0. """ import sys import collections counter = collections.Counter() #Create an empty Counter. counter["NY"] += 1 #Automatically starts at 0. counter["NY"] += 1 counter["NJ"] += 1 counter["CT"] += 1 counter["NJ"] += 1 counter["NY"] += 1 for state in sorted(counter): #alphabetical order print(state, counter[state]) sys.exit(0)
CT 1 NJ 2 NY 3
An example of counting manually.
""" List the CAMIS number, name, and number of mouse violations of the 10 restaurants with the largest number of mouse violations. """ import sys import csv #Comma-Separated Values import urllib.request import collections #Database is at #https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j url = "https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv" try: fileFromUrl = urllib.request.urlopen(url) except urllib.error.URLError as error: print(error, file = sys.stderr) sys.exit(1) sequenceOfBytes = fileFromUrl.read() #Slurp whole file into one big sequenceOfBytes. fileFromUrl.close() try: s = sequenceOfBytes.decode("utf-8") #s is a string except UnicodeError as error: print(error, file = sys.stderr) sys.exit(1) lines = csv.reader(s.splitlines()) #lines is a list of lists #Two dictionaries that let you look up a CAMIS number and find the corresponding ... dba = {} #name of the restaurant numberOfViolations = collections.Counter() #number of mice violations for that restaurant for line in lines: #line is a list of 26 strings if "Evidence of mice or live mice present in facility's food and/or non-food areas." in line[11]: camis = int(line[0]) #the id number of the restaurant dba[camis] = line[1] #Record the name of this restaurant. numberOfViolations[camis] += 1 #Tally an additional violation. Automatically starts at 0. for camis, n in numberOfViolations.most_common(10): #the 10 worst offenders, starting with the worst print(f"{camis:8} {n:2} {dba[camis]}") sys.exit(0)
50016943 13 EL NUEVO ROBLE BILLIARDS 50046623 12 COLD STONE CREAMERY 40423819 12 ALFONSO'S PASTRY SHOPPE 50015263 12 LA POSADA MEXICAN FOOD 41259444 12 COCO ROCO RESTAURANT 50058969 11 CAFE CREOLE 41642251 11 LITTLE CAESARS 50035603 11 AUTHENTIC SZECHUAN 41407999 11 BERMUDEZ BAKERY 50038412 11 TWIN SISTER PAN
lines
,
change the rest of the above program to the following.
What are the tradeoffs?
listOfTuples = [ (int(line[0]), line[1]) #tuple containing CAMIS number and name of restaurant for line in lines if "Evidence of mice or live mice present in facility's food and/or non-food areas." in line[11] ] for (camis, dba), n in collections.Counter(listOfTuples).most_common(10): #Start with worst offender. print(f"{camis:8} {n:2} {dba}") sys.exit(0)
50016943 13 EL NUEVO ROBLE BILLIARDS 50046623 12 COLD STONE CREAMERY 40423819 12 ALFONSO'S PASTRY SHOPPE 50015263 12 LA POSADA MEXICAN FOOD 41259444 12 COCO ROCO RESTAURANT 50058969 11 CAFE CREOLE 41642251 11 LITTLE CAESARS 50035603 11 AUTHENTIC SZECHUAN 41407999 11 BERMUDEZ BAKERY 50038412 11 TWIN SISTER PANWhat I really want to create is a
dict
ionary
in which each key is a CAMIS number,
and each value is a
tuple
containing the restaurant’s name and number of mouse violations.
Then I’d like to treat this
dict
ionary
as a
collections.Counter
based on the number of mouse violations
(and ignoring the name of the restaurant).
Is this possible?
import sys import pandas as pd scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10. series = pd.Series(scores) #series is a pandas Series. print("The original series:") print(series) print() print("In order of decreasing frequency:") print(series.value_counts(sort = True)) #also try print(series.value_counts(sort = True, ascending = True)) print() print("In order of increasing score:") print(series.value_counts(sort = False)) #also try print(series.value_counts(sort = False).sort_index(ascending = False)) print() counts = series.value_counts(sort = True) mostFrequentScore = counts.index[0] numberOfTimes = counts.array[0] print(f"The most frequently occurring score is {mostFrequentScore}.") print(f"It occurs {numberOfTimes} times.") sys.exit(0)
The original series: 0 10 1 9 2 10 3 8 4 11 5 10 6 12 7 8 8 11 dtype: int64 In order of decreasing frequency: 10 3 11 2 8 2 12 1 9 1 dtype: int64 In order of increasing score: 8 2 9 1 10 3 11 2 12 1 dtype: int64 The most frequently occurring score is 10. It occurs 3 times.