/**
* This class simulates a CartPole environment
*/
class Environment {
constructor(config = {}) {
// Physical constants and configuration parameters.
this.gravity = config.gravity || 9.8;
this.massCart = config.massCart || 1.0;
this.massPole = config.massPole || 0.1;
this.total_mass = this.massCart + this.massPole;
this.length = config.length || 0.5; // half the pole's length
this.polemass_length = this.massPole * this.length;
this.force_mag = config.force_mag || 10.0;
this.tau = config.tau || 0.02; // time interval for updates
// Termination thresholds.
this.x_threshold = config.x_threshold || 2.4;
this.theta_threshold_radians = config.theta_threshold_radians || (12 * Math.PI / 180); // 12 degrees in radians
// Initialize state variables.
this.state = null;
}
/**
* Resets the environment to an initial state.
* @returns {Array} The initial state of the environment.
*/
reset() {
// For CartPole, the state consists of:
// [cart position, cart velocity, pole angle, pole angular velocity]
// Initialize state with small random perturbations.
this.state = [
(Math.random() * 0.08) - 0.04, // cart position
(Math.random() * 0.08) - 0.04, // cart velocity
(Math.random() * 0.08) - 0.04, // pole angle
(Math.random() * 0.08) - 0.04 // pole angular velocity
];
return this.state;
}
/**
* Takes an action and updates the environment state using realistic dynamics.
* @param {Number} action - The action to perform (0 for left, 1 for right).
* @returns {Object} The result of the action, including the new state, reward, and done flag.
*/
step(action = 1) {
// Unpack current state.
let [x, x_dot, theta, theta_dot] = this.state;
// Determine the force based on the action.
const force = (action === 1) ? this.force_mag : -this.force_mag;
const costheta = Math.cos(theta);
const sintheta = Math.sin(theta);
// Compute the acceleration using the standard CartPole dynamics.
const temp = (force + this.polemass_length * theta_dot * theta_dot * sintheta) / this.total_mass;
const thetaacc = (this.gravity * sintheta - costheta * temp) /
(this.length * (4.0 / 3.0 - this.massPole * costheta * costheta / this.total_mass));
const xacc = temp - this.polemass_length * thetaacc * costheta / this.total_mass;
// Update state using Euler's method.
const new_x = x + this.tau * x_dot;
const new_x_dot = x_dot + this.tau * xacc;
const new_theta = theta + this.tau * theta_dot;
const new_theta_dot = theta_dot + this.tau * thetaacc;
const newState = [new_x, new_x_dot, new_theta, new_theta_dot];
// Check if the episode is done.
const done = (new_x < -this.x_threshold || new_x > this.x_threshold ||
new_theta < -this.theta_threshold_radians || new_theta > this.theta_threshold_radians);
// Reward is typically 1 for each time step until termination.
const reward = done ? 0 : 1;
// Update internal state.
this.state = newState;
return { state: newState, reward, done };
}
}
// Must export as Environment
export {
Environment
};