Full CartPole Environment

/**
 * This class simulates a CartPole environment
 */
class Environment {
    constructor(config = {}) {
        // Physical constants and configuration parameters.
        this.gravity = config.gravity || 9.8;
        this.massCart = config.massCart || 1.0;
        this.massPole = config.massPole || 0.1;
        this.total_mass = this.massCart + this.massPole;
        this.length = config.length || 0.5; // half the pole's length
        this.polemass_length = this.massPole * this.length;
        this.force_mag = config.force_mag || 10.0;
        this.tau = config.tau || 0.02; // time interval for updates

        // Termination thresholds.
        this.x_threshold = config.x_threshold || 2.4;
        this.theta_threshold_radians = config.theta_threshold_radians || (12 * Math.PI / 180); // 12 degrees in radians

        // Initialize state variables.
        this.state = null;
    }

    /**
     * Resets the environment to an initial state.
     * @returns {Array} The initial state of the environment.
     */
    reset() {
        // For CartPole, the state consists of:
        // [cart position, cart velocity, pole angle, pole angular velocity]
        // Initialize state with small random perturbations.
        this.state = [
            (Math.random() * 0.08) - 0.04,  // cart position
            (Math.random() * 0.08) - 0.04,  // cart velocity
            (Math.random() * 0.08) - 0.04,  // pole angle
            (Math.random() * 0.08) - 0.04   // pole angular velocity
        ];
        return this.state;
    }

    /**
     * Takes an action and updates the environment state using realistic dynamics.
     * @param {Number} action - The action to perform (0 for left, 1 for right).
     * @returns {Object} The result of the action, including the new state, reward, and done flag.
     */
    step(action = 1) {
        // Unpack current state.
        let [x, x_dot, theta, theta_dot] = this.state;
        // Determine the force based on the action.
        const force = (action === 1) ? this.force_mag : -this.force_mag;
        const costheta = Math.cos(theta);
        const sintheta = Math.sin(theta);

        // Compute the acceleration using the standard CartPole dynamics.
        const temp = (force + this.polemass_length * theta_dot * theta_dot * sintheta) / this.total_mass;
        const thetaacc = (this.gravity * sintheta - costheta * temp) /
            (this.length * (4.0 / 3.0 - this.massPole * costheta * costheta / this.total_mass));
        const xacc = temp - this.polemass_length * thetaacc * costheta / this.total_mass;

        // Update state using Euler's method.
        const new_x = x + this.tau * x_dot;
        const new_x_dot = x_dot + this.tau * xacc;
        const new_theta = theta + this.tau * theta_dot;
        const new_theta_dot = theta_dot + this.tau * thetaacc;
        const newState = [new_x, new_x_dot, new_theta, new_theta_dot];

        // Check if the episode is done.
        const done = (new_x < -this.x_threshold || new_x > this.x_threshold ||
            new_theta < -this.theta_threshold_radians || new_theta > this.theta_threshold_radians);

        // Reward is typically 1 for each time step until termination.
        const reward = done ? 0 : 1;

        // Update internal state.
        this.state = newState;
        return { state: newState, reward, done };
    }
}

// Must export as Environment
export {
    Environment
};