Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(11)  |  Call(11)  |  Derive(0)  |  Import(0)

src/r/l/rlpy-HEAD/rlpy/MDPSolvers/ValueIteration.py   rlpy(Download)
        converged = False
        iteration = 0
        while self.hasTime() and not converged:
            prev_weight_vec = self.representation.weight_vec.copy()
            # Sweep The State Space
            for i in xrange(0, no_of_states):
                if not self.hasTime():
                # Sweep The Actions
                for a in actions:
                    if not self.hasTime():
                        break
                    self.BellmanBackup(s, a, ns_samples=self.ns_samples)

src/r/l/rlpy-HEAD/rlpy/MDPSolvers/PolicyIteration.py   rlpy(Download)
            converged = False
            policy_evaluation_iteration = 0
            while not converged and self.hasTime() and policy_evaluation_iteration < self.max_PE_iterations:
                policy_evaluation_iteration += 1
                # Sweep The State Space
                for i in xrange(0, no_of_states):
                    if not self.hasTime():
            policyChanged = 0
            i = 0
            while i < no_of_states and self.hasTime():
                s = self.representation.stateID2state(i)
                if not self.domain.isTerminal(s) and len(self.domain.possibleActions(s)):
                    for a in self.domain.possibleActions(s):
                        if not self.hasTime():

src/r/l/rlpy-HEAD/rlpy/MDPSolvers/TrajectoryBasedPolicyIteration.py   rlpy(Download)
            forcedDeterministicAmongBestActions=True)  # Copy the representation so that the weight change during the evaluation does not change the policy
 
        while self.hasTime() and not converged:
            # Policy Evaluation
            PE_iteration = 0
                ) > self.epsilon else randSet(
                    p_actions)
                while not terminal and step < self.domain.episodeCap and self.hasTime():
 
                    # print "Policy Features = %d" % policy.representation.features_num

src/r/l/rlpy-HEAD/rlpy/MDPSolvers/TrajectoryBasedValueIteration.py   rlpy(Download)
        # BellmanError
        converged_trajectories = 0
        while self.hasTime() and not converged:
 
            # Generate a new episode e-greedy with the current values
            ) > self.epsilon else randSet(
                p_actions)
            while not terminal and step < self.domain.episodeCap and self.hasTime():
                new_Q = self.representation.Q_oneStepLookAhead(
                    s,