Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added DL_Object Recognition System.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions Infosys_object_detection_in_google_colab.ipynb

Large diffs are not rendered by default.

143 changes: 143 additions & 0 deletions annotation_convertion.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "73f058fb-154c-482e-97c7-31475291bdf7",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "4f002fe0-7cbb-42ec-9821-bf78e81a60ef",
"metadata": {},
"outputs": [],
"source": [
"coco_ann_file = \"C:/infosys internship/coco2017_subset/annotation_subset/instances_val2017_subset.json\"\n",
"output_dir = \"C:/infosys internship/coco2017_subset/val2017_labels\"\n",
"\n",
"os.makedirs(output_dir, exist_ok=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f7b471a6-137c-4305-b664-465ca5e350eb",
"metadata": {},
"outputs": [],
"source": [
"# Load COCO annotations\n",
"with open(coco_ann_file, 'r') as f:\n",
" coco_data = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f2165d89-6d9e-4123-9cdc-6b16abc127b0",
"metadata": {},
"outputs": [],
"source": [
"# Create mapping: COCO category id -> new id (0-based contiguous indices)\n",
"# For example, if valid categories are 1, 2, 3, 5, 6, ... then you might have:\n",
"coco_categories = sorted([cat['id'] for cat in coco_data['categories']])\n",
"id_mapping = {orig_id: new_id for new_id, orig_id in enumerate(coco_categories)}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "8f38b511-f624-4f5f-80a9-8f0308897e41",
"metadata": {},
"outputs": [],
"source": [
"# Build a mapping from image_id to image info\n",
"img_info = {img['id']: img for img in coco_data['images']}"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "9ed5f07a-08ee-4573-ae8c-8256bd95c422",
"metadata": {},
"outputs": [],
"source": [
"# Process annotations per image\n",
"annotations_by_image = {}\n",
"for ann in coco_data['annotations']:\n",
" img_id = ann['image_id']\n",
" img = img_info[img_id]\n",
" img_width, img_height = img['width'], img['height']\n",
"\n",
" x, y, w, h = ann['bbox']\n",
" # Convert COCO bbox [x, y, w, h] to YOLO format [x_center, y_center, width, height] normalized\n",
" x_center = (x + w / 2) / img_width\n",
" y_center = (y + h / 2) / img_height\n",
" norm_w = w / img_width\n",
" norm_h = h / img_height\n",
"\n",
" # Remap the category id\n",
" orig_cat = ann['category_id']\n",
" if orig_cat not in id_mapping:\n",
" continue # skip if the category is not in the mapping\n",
" new_cat = id_mapping[orig_cat]\n",
"\n",
" if img_id not in annotations_by_image:\n",
" annotations_by_image[img_id] = []\n",
" annotations_by_image[img_id].append(f\"{new_cat} {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "d420d94a-3e54-4893-a800-28292749d233",
"metadata": {},
"outputs": [],
"source": [
"# Write YOLO label files for each image\n",
"for img_id, ann_list in annotations_by_image.items():\n",
" file_name = img_info[img_id]['file_name']\n",
" base_name = os.path.splitext(file_name)[0]\n",
" # Save the label file in the same directory as your images, or wherever you prefer\n",
" out_file = os.path.join(output_dir, base_name + '.txt')\n",
" with open(out_file, 'w') as f:\n",
" for line in ann_list:\n",
" f.write(line + '\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d56b0b41-9890-40fe-8dfa-30b0feda93e3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
81 changes: 81 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import streamlit as st
import cv2
import numpy as np
import tempfile
import time
from ultralytics import YOLO

# Load the YOLO model (adjust the path as needed)
model = YOLO(r"C:\Users\jerin\yolo11n.pt")

# Sidebar configuration for confidence threshold and mode selection
confidence_threshold = st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.5, 0.05)
mode = st.sidebar.selectbox("Select Mode", ["Image", "Video", "Webcam"])

st.title("YOLO Object Detection with Streamlit")

if mode == "Image":
st.header("Image Detection")
uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
# Convert the uploaded image to a NumPy array
file_bytes = np.asarray(bytearray(uploaded_image.read()), dtype=np.uint8)
image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
st.image(image, channels="BGR", caption="Original Image")
# Run YOLO detection on the image
results = model.predict(source=image, conf=confidence_threshold, show=False)
annotated_image = results[0].plot() # Annotated image with detections
st.image(annotated_image, channels="BGR", caption="Detection Result")

elif mode == "Video":
st.header("Video Detection")
uploaded_video = st.file_uploader("Upload a Video", type=["mp4", "mov", "avi"])
if uploaded_video is not None:
# Save the uploaded video to a temporary file
tfile = tempfile.NamedTemporaryFile(delete=False)
tfile.write(uploaded_video.read())
cap = cv2.VideoCapture(tfile.name)
stframe = st.empty() # Placeholder to update video frames
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Run YOLO detection on the current frame
results = model.predict(source=frame, conf=confidence_threshold, show=False)
annotated_frame = results[0].plot()
stframe.image(annotated_frame, channels="BGR")
time.sleep(0.03) # Small delay to control frame rate
cap.release()

elif mode == "Webcam":
st.header("Webcam Live Detection (OpenCV Method)")

# Initialize webcam run state in session state
if "run_webcam" not in st.session_state:
st.session_state.run_webcam = False

# Buttons to start and stop webcam capture
start_button = st.button("Start Webcam")
stop_button = st.button("Stop Webcam")
if start_button:
st.session_state.run_webcam = True
if stop_button:
st.session_state.run_webcam = False

webcam_placeholder = st.empty() # Placeholder for webcam frames

if st.session_state.run_webcam:
cap = cv2.VideoCapture(0) # Open default webcam
st.write("Starting webcam...")
while st.session_state.run_webcam and cap.isOpened():
ret, frame = cap.read()
if not ret:
st.write("Failed to grab frame")
break
# Run YOLO detection on the current webcam frame
results = model.predict(source=frame, conf=confidence_threshold, show=False)
annotated_frame = results[0].plot()
webcam_placeholder.image(annotated_frame, channels="BGR")
time.sleep(0.03) # Small delay to control frame rate
cap.release()
st.write("Webcam stopped.")
83 changes: 83 additions & 0 deletions classes_train&val.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "21a45fde-b895-4915-982e-bb3751c3539f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of Classes: 80\n",
"Class IDs: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]\n",
"Class IDs saved to 'classes.txt'\n"
]
}
],
"source": [
"import os\n",
"\n",
"# Paths to label folders (update if necessary)\n",
"train_label_path = r\"C:/infosys internship/coco2017_subset/train2017_labels\"\n",
"val_label_path = r\"C:/infosys internship/coco2017_subset/val2017_labels\"\n",
"\n",
"# Store unique class IDs\n",
"unique_classes = set()\n",
"\n",
"# Function to process label files in a folder\n",
"def extract_classes(label_path):\n",
" for file in os.listdir(label_path):\n",
" if file.endswith(\".txt\"):\n",
" with open(os.path.join(label_path, file), \"r\") as f:\n",
" for line in f:\n",
" class_id = int(line.split()[0]) # Extract class ID\n",
" unique_classes.add(class_id)\n",
"\n",
"# Process both train and val folders\n",
"extract_classes(train_label_path)\n",
"extract_classes(val_label_path)\n",
"\n",
"# Print results\n",
"print(f\"Number of Classes: {len(unique_classes)}\")\n",
"print(f\"Class IDs: {sorted(unique_classes)}\")\n",
"\n",
"# (Optional) Save to a text file\n",
"with open(\"classes.txt\", \"w\") as f:\n",
" f.write(\"\\n\".join(map(str, sorted(unique_classes))))\n",
"\n",
"print(\"Class IDs saved to 'classes.txt'\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d43cb6f-bff8-432a-a4c9-96ebe029c96c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading