asadm · asadm · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/website/components/editor.jsx b/website/components/editor.jsx
@@ -19,9 +19,12 @@ const DEFAULTCODE = `function add(num1, num2){
 
 function CodeEditor() {
   const [model, setModel] = useState("gpt-3.5-turbo-1106");
+  const [delay, setDelay] = useState(500);
   const [acceptOnClick, setAcceptOnClick] = useState(true);
   return (
     <>
+      <div className="flex items-center gap-2">
+        <label>Model</label>
       <Select
         value={model}
         onValueChange={(value) => {
@@ -36,6 +39,9 @@ function CodeEditor() {
           <SelectItem value="gpt-3.5-turbo-1106">
             GPT 3.5 Turbo <Badge variant="secondary">recommended</Badge>
           </SelectItem>
+          <SelectItem value="mixtral-8x7b">
+            Mixtral MoE 8x7B Instruct <Badge variant="secondary">best open source</Badge>
+          </SelectItem>
           <SelectItem value="codellama">
             Code Llama <Badge variant="secondary">buggy</Badge>
           </SelectItem>
@@ -44,6 +50,34 @@ function CodeEditor() {
           </SelectItem>
         </SelectContent>
       </Select>
+
+      <label className="ml-2">Delay</label>
+      <Select
+        value={delay}
+        onValueChange={(value) => {
+          setDelay(value);
+          clearLocalCache();
+        }}
+      >
+        <SelectTrigger className="w-[180px]">
+          <SelectValue placeholder="Delay" />
+        </SelectTrigger>
+        <SelectContent>
+          <SelectItem value={500}>
+            500ms <Badge variant="secondary">recommended</Badge>
+          </SelectItem>
+          <SelectItem value={1000}>
+            1000ms <Badge variant="secondary">comfy</Badge>
+          </SelectItem>
+          <SelectItem value={100}>
+            100ms <Badge variant="destructive">psycho mode</Badge>
+          </SelectItem>
+          <SelectItem value={50}>
+            50ms <Badge variant="destructive">psycho's mom mode</Badge>
+          </SelectItem>
+        </SelectContent>
+      </Select>
+      </div>
       <CodeMirror
         style={{
           fontSize: "17px",
@@ -80,7 +114,7 @@ function CodeEditor() {
               const { prediction } = await res.json();
               return prediction;
             },
-            500,
+            delay,
             acceptOnClick,
           ),
         ]}

diff --git a/website/lib/backends/llama.js b/website/lib/backends/llama.js
@@ -0,0 +1,19 @@
+
+export async function completionLlama(prefix, suffix, language){
+  try {
+    const response = await fetch(
+      `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`,
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown. <PRE>${prefix}<SUF>${suffix}<MID>`, "max_tokens": 30 })
+    });
+
+    const data = await response.json();
+    return data.result.response;
+  } catch (error) {
+    console.error('Error:', error);
+  }
+}
diff --git a/website/lib/backends/mistral.js b/website/lib/backends/mistral.js
@@ -0,0 +1,122 @@
+
+function extractCodeSegments(markdownText) {
+  // Regular expression to match code blocks (optionally including a language specifier)
+  const codeBlockRegex = /```[a-z]*[\s\S]*?```/g;
+
+  // Find matches for the regex in the provided markdown text
+  const matches = markdownText.match(codeBlockRegex);
+
+  if (matches) {
+      // Remove the backticks and the optional language specifier, then trim whitespace
+      return matches.map(match => match.replace(/```[a-z]*\n?/, '').replace(/```/, '').trim());
+  } else {
+      // Return an empty array if no matches are found
+      return [markdownText];
+  }
+}
+function removeOverlapPrefixSuffix(text, prefix, suffix) {
+  // Remove overlapping part from the start (prefix)
+  let commonPrefixLength = 0;
+  for (let i = 0; i < prefix.length; i++) {
+      if (text.startsWith(prefix.slice(i))) {
+          commonPrefixLength = prefix.length - i;
+          break;
+      }
+  }
+  if (commonPrefixLength > 0) {
+      text = text.slice(commonPrefixLength);
+  }
+  else{
+    throw new Error("prefix not found");
+  }
+
+  // Remove overlapping part from the end (suffix)
+  let commonSuffixLength = 0;
+  for (let i = 0; i < suffix.length; i++) {
+      if (text.endsWith(suffix.substring(0, i + 1))) {
+          commonSuffixLength = i + 1;
+          break;
+      }
+  }
+  if (commonSuffixLength > 0) {
+      text = text.slice(0, -commonSuffixLength);
+  }
+  else{
+    throw new Error("suffix not found");
+  }
+
+  return text;
+}
+
+async function completionMixtral(prefix, suffix, language, previousOutput) {
+  let messages = [
+      {
+          role: "user",
+          content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. ALWAYS INCLUDE PREFIX AND SUFFIX in the completed code.\n Do not format code, leave prefix and suffix as-is, only replace <FILL_ME> part, do not include any code comments. ```\n" + prefix + "<FILL_ME>" + suffix + "\n```" + "\nPut output in markdown\n",
+      },
+  ];
+  if (previousOutput) {
+    messages.push({
+      role: "assistant",
+      content: previousOutput,
+    });
+    messages.push({
+      role: "user",
+      content: "The previous output was not formatted correctly. Please try again. Output should be in markdown code block and should include prefix and suffix.",
+    });
+  }
+  const response = await fetch(
+    `https://api.fireworks.ai/inference/v1/chat/completions`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Accept': 'text/event-stream',
+      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
+      n: 1,
+      messages: messages,
+      stop: [
+          "<|im_start|>",
+          "<|im_end|>",
+          "<|endoftext|>"
+      ],
+      top_p: 1,
+      top_k: 40,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      prompt_truncate_len: 1024,
+      context_length_exceeded_behavior: "truncate",
+      temperature: 0.9,
+      max_tokens: 150
+    }),
+  });
+
+  const wholeOutput = await response.json();
+  const outputRaw = wholeOutput?.choices[0]?.message?.content;
+
+  try {
+    // extract markdown code part
+    const codeItself = extractCodeSegments(outputRaw)[0];
+    // check if <FILL_ME> is still there
+    if (codeItself.includes("<FILL_ME")) {
+      throw new Error("fill me still there");
+    }
+    return removeOverlapPrefixSuffix(codeItself, prefix, suffix);;
+  }
+  catch (e) {
+    if (!previousOutput) {
+      return await completionMixtral(prefix, suffix, language, outputRaw);
+    }
+    return "";
+  }
+}
+
+export async function completionMixtralWithRetries(prefix, suffix, language, _model, retries=5){
+  while(retries-->0){
+    const output = await completionMixtral(prefix, suffix, language);
+    if (output) return output;
+  }
+  return "";
+}
diff --git a/website/lib/backends/openai.js b/website/lib/backends/openai.js
@@ -0,0 +1,20 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+export async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
+  const chatCompletion = await openai.chat.completions.create({
+    messages: [
+      {
+        role: "system",
+        content: `You are a ${language?(language + " "):""}programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown.`,
+      },
+      { role: "user", content: `${prefix}<FILL_ME>${suffix}` },
+    ],
+    model,
+  });
+
+  return chatCompletion.choices[0].message.content;
+}
diff --git a/website/package-lock.json b/website/package-lock.json
diff --git a/website/package.json b/website/package.json
@@ -20,6 +20,7 @@
     "@uiw/react-codemirror": "^4.21.21",
     "class-variance-authority": "^0.7.0",
     "clsx": "^2.0.0",
+    "extract-json-from-string": "^1.0.1",
     "lucide-react": "^0.294.0",
     "next": "14.0.4",
     "openai": "^4.21.0",

diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
@@ -1,47 +1,11 @@
-import OpenAI from "openai";
-
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-});
-
-async function completionLlama(prefix, suffix, language){
-  try {
-    const response = await fetch(
-      `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, {
-      method: 'POST',
-      headers: {
-        'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`,
-        'Content-Type': 'application/json'
-      },
-      body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown. <PRE>${prefix}<SUF>${suffix}<MID>`, "max_tokens": 30 })
-    });
-
-    const data = await response.json();
-    return data.result.response;
-  } catch (error) {
-    console.error('Error:', error);
-  }
-}
-
-async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
-  const chatCompletion = await openai.chat.completions.create({
-    messages: [
-      {
-        role: "system",
-        content: `You are a ${language?(language + " "):""}programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown.`,
-      },
-      { role: "user", content: `${prefix}<FILL_ME>${suffix}` },
-    ],
-    model,
-  });
-
-  return chatCompletion.choices[0].message.content;
-}
+import { completionLlama } from "@/lib/backends/llama";
+import { completionMixtralWithRetries } from "@/lib/backends/mistral";
+import { completionOpenAI } from "@/lib/backends/openai";
 
 export default async function handler(req, res) {
   const { prefix, suffix, model, language } = req.body;
-  const completionMethod = model == "codellama" ? completionLlama : completionOpenAI;
+  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithRetries : completionOpenAI);
   const prediction = await completionMethod(prefix, suffix, model, language);
-  console.log(model, prediction)
-  res.status(200).json({ prediction })
+  console.log(model, prediction);
+  res.status(200).json({ prediction });
 }