mindspore-lab
diff --git a/‎official/cv/segment-anything/README.md
Lines changed: 1 addition & 1 deletion b/‎official/cv/segment-anything/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎official/cv/segment-anything/images/blip2-text-prompt-wheel.jpg
-44.8 KB b/‎official/cv/segment-anything/images/blip2-text-prompt-wheel.jpg
-44.8 KB
diff --git a/‎official/cv/segment-anything/images/blip2-text-prompt-wheel.png
269 KB b/‎official/cv/segment-anything/images/blip2-text-prompt-wheel.png
269 KB
diff --git a/‎official/cv/segment-anything/text_inference.py
Lines changed: 1 addition & 4 deletions b/‎official/cv/segment-anything/text_inference.py
Lines changed: 1 addition & 4 deletions
@@ -116,7 +116,7 @@ python text_inference.py --checkpoint=your/path/to/ckpt
 
 Below is an experimental result prompted with `wheels`. _Note that the model is trained with limited data and the smallest SAM type `vit_b`._ 
 <div align="center">
-    <img alt="img.png" src="images/blip2-text-prompt-wheel.jpg" width="600"/>
+    <img alt="img.png" src="images/blip2-text-prompt-wheel.png" width="600"/>
 </div>
 
 ## Demo
 
@@ -65,9 +65,6 @@ def infer(args):
         print(f'prompt is: {args.text_prompt}')
         mask_logits = network(image, text_ids=input_ids)[0]   # (1, 1, 1024, 1024)
 
-    with Timer('Second time inference'):
-        mask_logits = network(image, text_ids=input_ids)[0]  # (1, 1, 1024, 1024)
-
     # Step3: post-process
     with Timer('post-process'):
         mask_logits = mask_logits.asnumpy()[0, 0] > 0.0
@@ -86,7 +83,7 @@ def infer(args):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description=("Runs inference on one image"))
-    parser.add_argument("--image_path", type=str, default='./datasets/sa-1b/sa_000000/sa_1.jpg', help="Path to an input image.")
+    parser.add_argument("--image_path", type=str, default='./images/truck.jpg', help="Path to an input image.")
     parser.add_argument(
         "--model-type",
         type=str,