[WIP: June2023] Deep Q-Learning using TorchSharp #710
Replies: 3 comments 6 replies
-
| 
 using Torch;
using System;
namespace DeepQLearning
{
    class Program
    {
        static void Main(string[] args)
        {
            // Define the neural network
            var model = new Sequential();
            model.Add(new Linear(4, 128));
            model.Add(new ReLU());
            model.Add(new Linear(128, 2));
            // Define the optimizer
            var optimizer = new Adam(model.Parameters(), 0.001);
            // Define the loss function
            var loss = new MSELoss();
            // Define the environment
            var env = Gym.Make("CartPole-v0");
            // Train the model
            int episodeCount = 1000;
            int stepsPerEpisode = 200;
            int maxSteps = episodeCount * stepsPerEpisode;
            int stepCount = 0;
            int episode = 0;
            while (stepCount < maxSteps)
            {
                env.Reset();
                for (int step = 0; step < stepsPerEpisode; step++)
                {
                    // Get the current state
                    var state = env.Observation;
                    // Choose an action based on the current state
                    var tensor = new Tensor(state, new[] { 1, state.Length });
                    var qValues = model.Forward(tensor);
                    var action = qValues.Max().Item2;
                    // Take the action and observe the result
                    var result = env.Step(action);
                    var nextState = result.Observation;
                    var reward = result.Reward;
                    var done = result.Done;
                    // Calculate the target Q-value
                    var target = qValues.Clone();
                    if (done)
                    {
                        target[0, action] = reward;
                    }
                    else
                    {
                        var nextTensor = new Tensor(nextState, new[] { 1, nextState.Length });
                        var nextQValues = model.Forward(nextTensor);
                        var maxNextQ = nextQValues.Max().Item1;
                        target[0, action] = reward + 0.99 * maxNextQ;
                    }
                    // Calculate the loss and update the model
                    tensor.Reshape(new[] { 1, 4 });
                    var output = model.Forward(tensor);
                    optimizer.ZeroGrad();
                    var l = loss.Forward(output, target);
                    l.Backward();
                    optimizer.Step();
                    // Update the step count
                    stepCount++;
                    // Check if the episode is done
                    if (done)
                    {
                        break;
                    }
                }
                // Print the episode number
                Console.WriteLine("Episode: " + episode);
                episode++;
            }
            // Close the environment
            env.Close();
        }
    }
}
 | 
Beta Was this translation helpful? Give feedback.
-
| I think I reproduced solution from lecture in my repo hear. | 
Beta Was this translation helpful? Give feedback.
-
| This is cool. I still hope we can build out a gym in .NET and maybe some shareable components for Q-learning. I don't have the expertise or experience to do that, but it'd be very cool. | 
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
June 2023
#981 (comment)
mindmap root((Reinforcement<br/>Learning)) Definitions Interactions Environment Agent Elements State Action Strategy<br/>策略 Deterministic Policy<br/>确定性策略 Stochastic Policy<br/>随机性策略 State transfer probability<br/>状态转移概率 Rewards<br/>即时奖励 Others Episodes Trial Continuing Tasks Policy Policy based learning Value based learning Monte Carlo learning Temporal Difference Learning SARSA<br/>State Action Reward State Action QLearning Dynamic programming learning Policy iteration algorithm Policy Evaluation Policy Improvement Value iteration algorithm Markov Decision Process Markov Decision Process<br/>马尔科夫决策过程 Trajectory<br/>轨迹 Markov Process<br/>马尔科夫过程 Objective FunctionsFeb 2023
https://www.youtube.com/watch?v=217tCMsZu0I

Beta Was this translation helpful? Give feedback.
All reactions