-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModel.py
More file actions
136 lines (109 loc) · 4.53 KB
/
Model.py
File metadata and controls
136 lines (109 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Load YOLOv5 model from Ultralytics (pre-trained on COCO dataset)
model_yolo = torch.hub.load('ultralytics/yolov5', 'yolov5x') # Use a larger model for better accuracy
# JavaScript to capture image from webcam
def get_webcam_image():
js = Javascript('''
async function takePhoto() {
const div = document.createElement('div');
const video = document.createElement('video');
const stream = await navigator.mediaDevices.getUserMedia({video: true});
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
const button = document.createElement('button');
div.appendChild(button);
button.textContent = 'Take Photo';
await new Promise((resolve) => button.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
stream.getVideoTracks()[0].stop();
div.remove();
return canvas.toDataURL('image/jpeg', 0.8);
}
''')
display(js)
return js
# Function to extract object contours and generate point cloud
def contour_to_pointcloud(img, bbox):
x1, y1, x2, y2 = map(int, bbox)
cropped_img = img[y1:y2, x1:x2] # Crop the object region
gray = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150) # Detect edges
# Find contours
contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
points = []
for contour in contours:
for point in contour:
x, y = point[0]
z = np.random.uniform(0, 1) # Assign random depth (improve if you have actual depth data)
points.append([x + x1, y + y1, z]) # Add offset to original image coordinates
return np.array(points)
# Function to visualize the point cloud using Plotly
def visualize_pointcloud(pointcloud, label):
if pointcloud.size == 0:
print("No point cloud to visualize.")
return
# Create a 3D scatter plot
trace = go.Scatter3d(
x=pointcloud[:, 0],
y=pointcloud[:, 1],
z=pointcloud[:, 2],
mode='markers',
marker=dict(
size=4, # Smaller point size for better accuracy
color=np.random.randn(len(pointcloud)), # Color by random values
colorscale='Viridis',
opacity=0.8
)
)
layout = go.Layout(
title=f"Point Cloud for {label}",
scene=dict(
xaxis=dict(title='X'),
yaxis=dict(title='Y'),
zaxis=dict(title='Z')
)
)
# Create the figure
fig = go.Figure(data=[trace], layout=layout)
# Show the figure
fig.show()
# Function to capture the image, perform detection, and display one object as a point cloud
def webcam_object_detection():
js = get_webcam_image() # Call get_webcam_image()
data = eval_js('takePhoto()')
# Decode the base64 image data
binary = b64decode(data.split(',')[1])
jpg = np.frombuffer(binary, dtype=np.uint8)
# Convert the image to OpenCV format and resize it
img = cv2.imdecode(jpg, cv2.IMREAD_COLOR)
img_resized = cv2.resize(img, (640, 480)) # Resize for faster inference
cv2.imwrite('webcam.jpg', img_resized) # Save the image for detection
# Perform object detection
results = model_yolo(img_resized)
# Extracting detected objects
labels = results.names
# Move detected objects tensor to CPU before converting to NumPy
detected_objects = results.xyxy[0].cpu().numpy()
# Display results on the image
results.show()
if len(detected_objects) == 0:
print("No objects detected.")
return
# Choose the object with the highest confidence
best_object = detected_objects[np.argmax(detected_objects[:, 4])]
xmin, ymin, xmax, ymax, confidence, class_id = best_object
class_name = labels[int(class_id)]
print(f"Detected: {class_name} with confidence {confidence:.2f}")
# Generate point cloud using object contours
points = contour_to_pointcloud(img_resized, [xmin, ymin, xmax, ymax])
print(f"Generating point cloud for: {class_name} with {len(points)} points")
# Visualize the point cloud for the most confident object
visualize_pointcloud(points, class_name)
# Run the function to detect objects and visualize point cloud for one object
webcam_object_detection()