import csv import numpy as np def get_data(filename): # This functions returns the data as a list of rows # where each row has the form [1, Belem, oil] with # a transaction number, location, and product with open(filename,'r') as data_file: csv_reader = csv.reader(data_file, delimiter=',') data = [] for row in csv_reader: trans_num = int(row[0].strip()) city = row[1] item = row[2] data.append([trans_num, city, item]) return data def get_cities(data): # We'll exploit the set() feature in Python to ensure # that duplicates are first removed before a list is made. # Start with a set: cities_set = set() for row in data: # Add to the set, which automatically removes duplicates cities_set.add(row[1]) # Convert to list: cities = list(cities_set) return cities def get_products(data): # Write your code here to return a list of all products. # First use a set and then convert to list. return product_list def sort_products(products): sorted_products = sorted(products, key=lambda x:x[0]) return sorted_products def get_purchases(data): # We've written this so to return a list of transactions # where every transaction has the form # [1, ['oil', 'cheese', 'banana', 'egg', 'chicken', 'garlic']] # Thus, the first element is the transaction number and # the second is itself a list (of products) transactions = [] current_trans = data[0][0] current_set = [] for row in data: if row[0] != current_trans: transactions.append([current_trans, current_set]) current_trans = row[0] current_set = [] current_set.append(row[2]) else: current_set.append(row[2]) transactions.append([current_trans, current_set]) return transactions def compute_support(products, trans, min_occurence): # Write your code here to create a list called support. # Each element in this will itself be a list like ['cheese', 'egg', 4] # Thus, for every unique pair of items such as 'cheese' and 'egg', # count the number of transactions that have both of these. # If you report for 'cheese' and 'egg', do not also report 'egg' and cheese' support = [] return support def sort_support(support): # We've written this to sort. sorted_support = sorted(support, key=lambda x:x[2], reverse=True) return sorted_support