WebLLM

Last updated 9 months ago

Model Support

When deploying a model for inference on ObitCC, it must be compiled in a specific format to be compatible with Web users, and the variety of hardware that they have. MLC (the group behind WebLLM) maintains in different sizes, and the HuggingFace community maintains . If you want to use your own model for inference, check out the on it by MLC.

Example

The following example is provided in addition to the tutorial in "Creating your first Workload"

// here we will use WebLLM from MLC.ai to do inference on a small model.

// call any tools functions you like here
import { markAllTasksDone } from "../modules/tools.js";

// note: this model is FROM the webllm repository
// we can enable a mode where you host your own model, or we host weights to your model
const model = "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC";
let prompts = ["What is 2+2?", "What is 3+3?", "What is 4+4?"];
const responses = [];

const defaultPrompt = {
    content: "You are a helpful AI agent helping users.",
    role: "system",
};

// this function will be called when the user requests a shader
function shaderResponse() {
    // return a random prompt. users may not finish generating or there may be errors,
    // so it's fine to return the same prompt to multiple users.
    const prompt = prompts[Math.floor(Math.random() * prompts.length)];
    // because of batching, we have to make this an array of an array
    return [[defaultPrompt, { content: prompt, role: "user" }]];
}

// we recommend this to be called when the jobs are done
function handleFinished() {
    // upload to your remote server here
    console.log("All prompts are completed", responses);
    // call this function from tools to declare that all tasks are done
    markAllTasksDone();
    // wait a couple minutes for the users to get the cleanup packet, then exit
    setTimeout(() => {
        process.exit();
    }, 120000);
}

// this function will be called when the user responds with outputs
function handleOutputs(promptList, outputs) {
    console.log("User responded with response", outputs["choices"][0]["message"]["content"]);
    // remove the prompt from the prompts array as it is completed
    let prompt;
    for (let i = 0; i < promptList.length; i++) {
        const individualPrompt = promptList[i];
        if (individualPrompt["role"] === "user") {
            prompt = individualPrompt["content"];
        }
    }

    prompts = prompts.filter((p) => p !== prompt);
    responses.push(outputs);
    if (prompts.length === 0) {
        handleFinished();
    }
    console.log("Prompts left", prompts);
}

// users must consent to large files being stored on their computer before
// receiving some "type": "webllm" scripts
export default {
    type: "webllm",
    action: "inference",
    officialName: "webllm-testuniversity-test1",
    organization: "Test University",
    hooks: {
        shaderResponse,
        handleOutputs
    },
    payload: {
        model: model,
        config: {
            temperature: 1.0,
            top_p: 1,
        }
    }
}

Entitlements

You may experience a slight decrease in computing capacity compared to our statistics, because some users (e.g, those on phone or those who are running low on disk space) can opt out of the Large Storage Entitlement (the permission to access storage over 100MB on the user's device). Most WebLLM projects will require this entitlement, as most weights are over 100MB.

Model Support

Example

The following example is provided in addition to the tutorial in "Creating your first Workload"

// here we will use WebLLM from MLC.ai to do inference on a small model.

// call any tools functions you like here
import { markAllTasksDone } from "../modules/tools.js";

// note: this model is FROM the webllm repository
// we can enable a mode where you host your own model, or we host weights to your model
const model = "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC";
let prompts = ["What is 2+2?", "What is 3+3?", "What is 4+4?"];
const responses = [];

const defaultPrompt = {
    content: "You are a helpful AI agent helping users.",
    role: "system",
};

// this function will be called when the user requests a shader
function shaderResponse() {
    // return a random prompt. users may not finish generating or there may be errors,
    // so it's fine to return the same prompt to multiple users.
    const prompt = prompts[Math.floor(Math.random() * prompts.length)];
    // because of batching, we have to make this an array of an array
    return [[defaultPrompt, { content: prompt, role: "user" }]];
}

// we recommend this to be called when the jobs are done
function handleFinished() {
    // upload to your remote server here
    console.log("All prompts are completed", responses);
    // call this function from tools to declare that all tasks are done
    markAllTasksDone();
    // wait a couple minutes for the users to get the cleanup packet, then exit
    setTimeout(() => {
        process.exit();
    }, 120000);
}

// this function will be called when the user responds with outputs
function handleOutputs(promptList, outputs) {
    console.log("User responded with response", outputs["choices"][0]["message"]["content"]);
    // remove the prompt from the prompts array as it is completed
    let prompt;
    for (let i = 0; i < promptList.length; i++) {
        const individualPrompt = promptList[i];
        if (individualPrompt["role"] === "user") {
            prompt = individualPrompt["content"];
        }
    }

    prompts = prompts.filter((p) => p !== prompt);
    responses.push(outputs);
    if (prompts.length === 0) {
        handleFinished();
    }
    console.log("Prompts left", prompts);
}

// users must consent to large files being stored on their computer before
// receiving some "type": "webllm" scripts
export default {
    type: "webllm",
    action: "inference",
    officialName: "webllm-testuniversity-test1",
    organization: "Test University",
    hooks: {
        shaderResponse,
        handleOutputs
    },
    payload: {
        model: model,
        config: {
            temperature: 1.0,
            top_p: 1,
        }
    }
}

Entitlements

WebLLM

WebLLM

Model Support

Limitations

Example

Entitlements

Model Support

Limitations

Example

Entitlements