""" This code probides a basic skeleton for the stationary bandit code. It should be adapted by the students for their work. """ import csv, random class BanditSet(object): """ This object represents...

1 answer below ยป
I need this project done.


""" This code probides a basic skeleton for the stationary bandit code. It should be adapted by the students for their work. """ import csv, random class BanditSet(object): """ This object represents a set of arms for a stationary multi-armed bandit problem it will store a fixed set of arms from a set and will then maintain them over multiple iterations. """ def __init__(self, DataRows, ArmNames, ExpRate, DistribParam, DecayRate, RewardWeight): """ This initializes the set of choices by acting as a factory class to create one arm instance for each of the choices. The names and the rows will come from the file that is read in. """ # Store the Data for later use. self.Data = DataRows # Initialize the parameters. self.ExplorationRate = ExpRate self.DistributionParameter = DistribParam self.DecayRate = DecayRate self.RewardWeight = RewardWeight # Store items for each of the arms. self.Names = ArmNames # Store a list for the weights. self.Weights = [-1 for I in range(len(ArmNames))] # Calculate the starting probability and add it. StartProb = 1 / float(len(ArmNames)) self.Probabilities = [StartProb for I in range(len(ArmNames))] # And store the Cumulative Reward self.CumulativeReward = 0 def handleRows(self): """ Process each of the rows and update our running reward and the basic probabilies for each one. """ # We initialize the cumulative # Reward to be 0 self.CumulativeReward = 0 # Now iterate over the rows and make each # of the choices. for CurrRow in self.Rows: # Now pick one from the list of probabilities. # Get the reward value from the row. # Update the reward weight. # And update the probabilities. # Return the cumulative reward. return(self.CumulativeReward) def pickArmIndex(self): """ Pick an index based upon the probabilities using the cumulative score approach based upon a random value. """ pass def getReward(self, Index): """ Use the Armnames to get the reward for the chosen arm. """ pass def updateWeight(self, Index, Reward): """ Update the weight for the chosen index using the parameters. """ pass def updateProbability(self, Index): """ Update the probability for the index from its weight. """ pass def normalizeProbabilities(self, Index, Reward): """ Normalize the probability values. """ pass "Sample A","Sample B","Sample C","Sample D" 0,0,0,0 1,0,0,0 0,0,1,0 0,0,0,0 0,0,0,1 0,0,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,0,1 0,1,0,1 0,0,0,0 0,1,0,0 0,0,0,1 0,0,0,0 1,1,0,0 0,0,0,0 1,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 1,1,0,0 1,0,0,0 0,1,1,0 0,0,0,0 0,1,0,0 0,1,0,1 0,1,0,0 0,0,0,0 0,0,1,0 0,0,1,0 0,0,0,1 0,1,0,0 0,0,0,0 1,0,0,0 0,0,0,0 0,0,0,0 0,1,0,0 1,0,1,0 1,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,0,1 1,0,0,0 0,1,0,1 0,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,1,0 1,0,0,1 0,0,0,0 0,1,0,0 0,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,1,0,0 0,0,0,0 0,0,0,0 0,0,1,0 0,0,1,0 0,0,0,0 1,1,0,1 0,0,0,0 0,1,1,0 1,0,0,0 0,1,0,0 1,0,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,1,0 0,1,1,1 1,0,0,0 1,1,1,0 0,0,0,1 0,0,1,0 0,0,1,0 0,0,0,0 0,1,0,0 0,0,0,0 0,0,0,0 1,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,1,0,0 0,1,1,0 0,0,0,0 0,0,0,0 0,0,1,0 0,1,0,0 0,0,0,0 0,0,0,0 0,1,0,1 0,0,0,0 0,0,0,0 0,1,0,0 0,0,0,0 0,0,1,0 1,1,0,0 1,0,0,0 0,0,0,0 0,0,0,0 0,1,0,0 0,1,0,0 0,0,0,0 0,0,1,0 0,0,0,0 0,1,0,0 0,0,0,0 0,0,0,0 0,1,0,0 0,0,0,0 0,1,1,0 0,0,0,0 1,0,1,0 1,0,0,0 0,0,0,0 0,1,0,0 0,0,0,1 0,1,0,0 0,0,0,0 0,0,0,0 0,0,0,1 0,0,0,0 0,0,1,1 0,0,0,0 0,0,1,0 0,0,0,0 0,1,0,0 1,0,0,0 0,1,0,1 0,1,0,0 1,0,0,0 1,0,1,1 0,0,0,0 0,0,0,0 1,1,0,0 0,1,0,0 0,0,0,1 0,0,0,0 0,0,0,0 0,1,0,0 1,0,0,0 0,0,0,0 0,1,0,0 0,0,0,0 0,0,0,0 1,0,0,0 0,1,1,0 0,0,0,0 0,0,0,1 0,0,0,0 1,0,1,0 0,0,0,0 0,0,0,0 0,1,0,0 0,0,1,0 0,0,0,1 0,0,0,0 1,1,0,0 1,0,0,0 0,0,0,0 1,0,0,0 0,1,1,0 0,0,0,0 0,0,0,0 1,0,1,0 0,0,0,0 0,0,0,0 0,0,0,0 0,1,0,0 1,1,0,0 1,0,1,0 0,0,0,0 0,0,0,0 1,1,0,0 0,1,0,0 0,0,0,0 0,1,0,0 1,0,0,0 1,0,0,0 0,1,0,0 0,0,0,0 1,1,0,0 0,0,0,0 0,0,0,1 0,1,0,0 1,0,0,0 0,1,1,0 0,1,0,0 0,0,1,0 1,0,1,0 0,1,0,0 0,0,0,0 0,0,1,0 1,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,1,0 0,0,0,0 0,0,0,0 0,1,0,0 0,0,0,0 1,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,1,0,1 0,0,1,0 0,0,1,0 0,0,0,1 0,0,0,0 0,0,0,0 1,0,0,0 1,0,0,0 0,1,0,0 0,0,0,0 0,0,0,0 0,0,0,0 0,0,0,0 1,1,0,0 1,0,0,0 0,1,1,0 0,0,0,0 0,0,0,0 0,0,0,0 1,1,0,0 0,0,0,0 1,0,0,0 0
Answered Same DayAug 02, 2021

Answer To: """ This code probides a basic skeleton for the stationary bandit code. It should be adapted by the...

Swapnil answered on Aug 03 2021
144 Votes
89040/Bandits.py
import csv, random, sys

class BanditSet(object):
def __init__(self, DataRows, ArmNames, ExpRate, DistribParam, DecayRate, RewardWeight):

self.Data = DataRows
self.ExplorationRate = ExpRate
self.DistributionParameter = DistribParam
self.DecayRate = DecayRat
e
self.RewardWeight = RewardWeight
self.Names = ArmNames
StartProb = 1 / float(len(ArmNames))
self.Probabilities = [StartProb for I in range(len(ArmNames))]
self.Weights = [StartProb for I in range(len(ArmNames))]
self.CumulativeReward = 0
def handleRows(self):
self.CumulativeReward = 0

for CurrRow in self.Data:
print(CurrRow)
reward_arm_index = self.pickArmIndex()
rewardValue = self.getReward(reward_arm_index,CurrRow)
self.updateWeight(reward_arm_index,rewardValue)
self.updateProbability(reward_arm_index)
self.normalizeProbabilities(reward_arm_index,rewardValue)
self.CumulativeReward = float(self.CumulativeReward)+float(rewardValue)
print('choice made: ',reward_arm_index)
print('reward from choice: ',rewardValue)
print('cumulative reward: ',self.CumulativeReward)
return(self.CumulativeReward)

def pickArmIndex(self):
return random.choices(range(len(self.Probabilities)), weights=self.Probabilities, k=1)[0]

def getReward(self, Index,row):
choosenArm = self.Names[Index]
return row[choosenArm]

def updateWeight(self, Index, Reward):
weight = float(self.DecayRate)*float(self.Weights[Index])+float(self.RewardWeight)*float(Reward)
totalWeight = 0
self.Weights[Index]=weight

for currentweight in self.Weights:
totalWeight = totalWeight+ currentweight
names = self.Names
for weightIndex in range(len(self.Weights)):
weightIndex
self.Weights[weightIndex] =float(self.Weights[weightIndex])/float(totalWeight)
def updateProbability(self, Index):
probability = float(self.Weights[Index]) * float(1-float((self.ExplorationRate)))+ float(self.ExplorationRate)*float(self.DistributionParameter)
self.Probabilities[Index]= probability
pass
def normalizeProbabilities(self, Index, Reward):
totalProbability=0

for probability in self.Probabilities:
totalProbability = totalProbability+ probability

for probabilityIndex in range(len(self.Probabilities)):
self.Probabilities[probabilityIndex] =float(self.Probabilities[probabilityIndex])/float(totalProbability)
def main():
fileName = sys.argv[1]
DataRows =[]
ExpRate= .3
DistribParam=.1
DecayRate=.6
RewardWeight=.9

with open(fileName, newline='') as csvfile:
reader = csv.DictReader(csvfile)
ArmNames = reader.fieldnames

for row in reader:
DataRows.append(row)
bandits = BanditSet(DataRows, ArmNames, ExpRate, DistribParam, DecayRate, RewardWeight)
bandits.handleRows();
main()
89040/BanditsData.csv
"Sample A","Sample B","Sample C","Sample...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions ยป

Submit New Assignment

Copy and Paste Your Assignment Here