AI stuff

2025-04-09 11:53:36 -05:00 · 2025-04-09 11:53:36 -05:00 · d597d1941c
commit d597d1941c
parent 79d7a21bec
3 changed files with 1009 additions and 0 deletions
--- a/Object-Recognision/coco.names
+++ b/Object-Recognision/coco.names
@ -0,0 +1,80 @@
 person
 bicycle
 car
 motorbike
 aeroplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 sofa
 pottedplant
 bed
 diningtable
 toilet
 tvmonitor
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/Object-Recognision/main.py
+++ b/Object-Recognision/main.py
@ -0,0 +1,140 @@
 import cv2
 import numpy as np
 import time
 # ----------------------------
 # Configuration and Setup
 # ----------------------------
 # Paths to the YOLO files (update these if your files are in a different directory)
 config_path = './yolov3.cfg'
 weights_path = './yolov3.weights'
 names_path   = './coco.names'
 # Load class names from coco.names file
 with open(names_path, 'r') as f:
    classes = [line.strip() for line in f.readlines()]
 # Set up the neural network
 net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
 # Optionally, set preferable backend and target to improve speed (e.g., use OpenCV's CUDA if available)
 net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)  # Change to DNN_TARGET_CUDA if available
 # Get all layer names from the network
 layer_names = net.getLayerNames()
 # Use .flatten() so that we always work with a 1D array of indices.
 output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]
 # Confidence and Non-max suppression thresholds
 conf_threshold = 0.5  # Minimum probability to filter weak detections
 nms_threshold  = 0.4  # Non-maximum suppression threshold
 # Colors for each class for bounding boxes (for visualization)
 np.random.seed(42)
 colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')
 # ----------------------------
 # Object Detection Function
 # ----------------------------
 def detect_objects(frame):
    """
    Process a frame to detect objects using YOLO.
    Returns bounding boxes, confidences, and class IDs.
    """
    height, width = frame.shape[:2]
    # Create a blob from the input frame and perform a forward pass
    blob = cv2.dnn.blobFromImage(frame, scalefactor=1/255.0, size=(416, 416),
                                 swapRB=True, crop=False)
    net.setInput(blob)
    # Inference; YOLO returns predictions with shape (N, 85) for each detected object
    start = time.time()
    detections = net.forward(output_layers)
    end = time.time()
    # Uncomment to print inference time for debugging
    # print(f"Inference time: {end - start:.2f} seconds")
    boxes = []
    confidences = []
    class_ids = []
    # Process each output layer's detections
    for output in detections:
        for detection in output:
            # detection[0:4] are center_x, center_y, width and height; detection[5:] are class probabilities
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                # Scale bounding box coordinates back to the size of the image
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                # Calculate the top-left coordinate of the bounding box
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    # Apply non-max suppression to remove overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    final_boxes = []
    final_confidences = []
    final_class_ids = []
    if len(indices) > 0:
        for i in indices.flatten():
            final_boxes.append(boxes[i])
            final_confidences.append(confidences[i])
            final_class_ids.append(class_ids[i])
    return final_boxes, final_confidences, final_class_ids
 # ----------------------------
 # Main Function: Real-Time Object Detection
 # ----------------------------
 def main():
    cap = cv2.VideoCapture(0)  # Start the webcam
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab a frame.")
            break
        # Detect objects in the frame
        boxes, confidences, class_ids = detect_objects(frame)
        # Draw bounding boxes and labels on the frame
        for i, box in enumerate(boxes):
            x, y, w, h = box
            color = [int(c) for c in colors[class_ids[i]]]
            label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, color, 2)
        # Display the frame
        cv2.imshow("Real-Time Object Detection", frame)
        # Exit on pressing 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
 if __name__ == "__main__":
    main()
--- a/Object-Recognision/yolov3.cfg
+++ b/Object-Recognision/yolov3.cfg
@ -0,0 +1,789 @@
 [net]
 # Testing
 # batch=1
 # subdivisions=1
 # Training
 batch=64
 subdivisions=16
 width=608
 height=608
 channels=3
 momentum=0.9
 decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
 learning_rate=0.001
 burn_in=1000
 max_batches = 500200
 policy=steps
 steps=400000,450000
 scales=.1,.1
 [convolutional]
 batch_normalize=1
 filters=32
 size=3
 stride=1
 pad=1
 activation=leaky
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=32
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 ######################
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=255
 activation=linear
 [yolo]
 mask = 6,7,8
 anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
 classes=80
 num=9
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -4
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 61
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=255
 activation=linear
 [yolo]
 mask = 3,4,5
 anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
 classes=80
 num=9
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -4
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 36
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=255
 activation=linear
 [yolo]
 mask = 0,1,2
 anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
 classes=80
 num=9
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1