akt

2025-12-18 10:40:08 +01:00
parent f2754cba35
commit 321a7e5b39
21 changed files with 1569 additions and 0 deletions
--- a/dzien_05/przyklady/yolo_test/funkcje/init.py
+++ b/dzien_05/przyklady/yolo_test/funkcje/init.py
--- a/dzien_05/przyklady/yolo_test/funkcje/obraz.py
+++ b/dzien_05/przyklady/yolo_test/funkcje/obraz.py
@@ -0,0 +1,470 @@
+import cv2 as cv
+import numpy as np
+import os
+try:
+    from ultralytics import YOLO
+    YOLO_AVAILABLE = True
+except ImportError:
+    YOLO_AVAILABLE = False
+    print("Uwaga: Biblioteka ultralytics (YOLO) nie jest zainstalowana.")
+    print("Aby używać detekcji YOLO, zainstaluj: pip install ultralytics")
+
+class Obraz:
+    def __init__(self, filename: str, mode: int = cv.IMREAD_COLOR_BGR):
+        self.filename = os.path.abspath(filename)
+        self.mode = mode
+        self.loaded = False
+        self.img = None
+        self.img_other = None
+        self.transform_name = "NOTHING"
+
+    def load_image(self):
+        if self.loaded:
+            return True
+        if not os.path.exists(self.filename):
+            raise FileNotFoundError(f"Nie udało się wczytać pliku: {self.filename}")
+        try:
+            self.img = cv.imread(self.filename, flags=self.mode)
+            self.loaded = True
+        except Exception as e:
+            print(e)
+
+        return self.img
+
+    def show_image(self, window_name = "Window"):
+        if not self.loaded:
+            raise ValueError("Obraz nie został wczytany")
+        window_name += f"-{os.path.basename(self.filename)} ({self.filename})"
+        cv.imshow(window_name, self.img)
+        cv.waitKey(0)
+        cv.destroyAllWindows()
+        return None
+
+    def show_side_by_side(self, window_name="Original vs Transformed"):
+        """
+        Wyświetla oryginalny i przetworzony obraz obok siebie w jednym oknie.
+        
+        Args:
+            window_name (str): Nazwa okna
+        """
+        if not self.loaded:
+            raise ValueError("Obraz nie został wczytany")
+        if self.img_other is None:
+            raise ValueError("Nie wykonano jeszcze żadnej transformacji")
+        
+        try:
+            # Przygotuj obrazy do wyświetlenia
+            if len(self.img.shape) == 2:  # Jeśli obraz jest w skali szarości
+                img_display = cv.cvtColor(self.img, cv.COLOR_GRAY2BGR)
+            else:
+                img_display = self.img.copy()
+                
+            if len(self.img_other.shape) == 2:  # Jeśli obraz przetworzony jest w skali szarości
+                img_other_display = cv.cvtColor(self.img_other, cv.COLOR_GRAY2BGR)
+            else:
+                img_other_display = self.img_other.copy()
+            
+            # Upewnij się, że oba obrazy mają ten sam rozmiar
+            if img_display.shape != img_other_display.shape:
+                # Jeśli rozmiary są różne, przeskaluj drugi obraz do rozmiaru pierwszego
+                img_other_display = cv.resize(img_other_display, (img_display.shape[1], img_display.shape[0]))
+            
+            # Dodaj etykiety
+            cv.putText(img_display, "Original", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 
+                     1, (0, 255, 0), 2, cv.LINE_AA)
+            cv.putText(img_other_display, f"{self.transform_name}", (10, 30), 
+                     cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv.LINE_AA)
+            
+            # Połącz obrazy obok siebie
+            combined = cv.hconcat([img_display, img_other_display])
+            
+            # Domyślne rozmiary ekranu
+            screen_width = 1920
+            screen_height = 1080
+            
+            # Dostosuj rozmiar obrazu do ekranu (80% rozmiaru ekranu)
+            max_width = int(screen_width * 0.8)
+            max_height = int(screen_height * 0.8)
+            
+            h, w = combined.shape[:2]
+            
+            # Oblicz skalę, jeśli obraz jest większy niż maksymalne wymiary
+            if w > max_width or h > max_height:
+                scale = min(max_width / w, max_height / h)
+                new_width = int(w * scale)
+                new_height = int(h * scale)
+                combined = cv.resize(combined, (new_width, new_height))
+            
+            # Wyświetl połączony obraz
+            cv.namedWindow(window_name, cv.WINDOW_AUTOSIZE)
+            cv.imshow(window_name, combined)
+            
+            # Poczekaj chwilę, aby okno się wyrenderowało
+            cv.waitKey(1)
+            
+            # Poczekaj na naciśnięcie klawisza
+            print(f"Wyświetlono obraz. Naciśnij dowolny klawisz w oknie obrazu, aby zamknąć...")
+            cv.waitKey(0)
+            cv.destroyAllWindows()
+            
+        except Exception as e:
+            print(f"Błąd podczas wyświetlania obrazów: {str(e)}")
+            raise
+            
+        return None
+
+    def transform(self, transform_name: str, show_side_by_side=True):
+        """
+        Metoda transformuje obrazek na podstawie przekazanej nazwy transformacji.
+
+        Parameters
+        ----------
+        transform_name : str
+            Nazwa transformacji, która ma być zastosowana do obrazka.
+        show_side_by_side : bool
+            Czy wyświetlić obrazy obok siebie po transformacji.
+
+        Returns
+        -------
+        numpy.ndarray
+            Przetworzony obraz.
+
+        Raises
+        -------
+        ValueError
+            Jeśli obrazek nie został wczytany lub transformacja nie jest obsługiwana.
+        """
+        if not self.loaded:
+            raise ValueError("Obraz nie został wczytany")
+        
+        # Zresetuj poprzedni wynik transformacji
+        self.img_other = None
+        self.transform_name = transform_name
+        
+        if transform_name == "NOTHING":
+            self.img_other = self.img.copy()
+        
+        elif transform_name == "GRAY":
+            self.img_other = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY)
+        
+        elif transform_name == "BLUR_GAUSSIAN":
+            # Rozmycie Gaussa
+            self.img_other = cv.GaussianBlur(self.img, (15, 15), 0)
+        
+        elif transform_name == "BLUR_MEDIAN":
+            # Rozmycie medianowe
+            self.img_other = cv.medianBlur(self.img, 5)
+        
+        elif transform_name == "BLUR_BILATERAL":
+            # Rozmycie bilateralne (zachowuje krawędzie)
+            self.img_other = cv.bilateralFilter(self.img, 9, 75, 75)
+        
+        elif transform_name == "CANNY":
+            # Detekcja krawędzi Canny
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY) if len(self.img.shape) == 3 else self.img
+            self.img_other = cv.Canny(gray, 100, 200)
+        
+        elif transform_name == "THRESHOLD":
+            # Threshold wymaga obrazu w skali szarości
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY) if len(self.img.shape) == 3 else self.img
+            self.img_other = cv.threshold(gray, 127, 255, cv.THRESH_BINARY)[1]
+        
+        elif transform_name == "ADAPTIVE":
+            # Adaptive threshold wymaga obrazu w skali szarości
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY) if len(self.img.shape) == 3 else self.img
+            self.img_other = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 11, 2)
+        
+        elif transform_name == "SOBEL":
+            # Detekcja krawędzi Sobel
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY) if len(self.img.shape) == 3 else self.img
+            sobelx = cv.Sobel(gray, cv.CV_64F, 1, 0, ksize=5)
+            sobely = cv.Sobel(gray, cv.CV_64F, 0, 1, ksize=5)
+            self.img_other = cv.magnitude(sobelx, sobely)
+            self.img_other = np.uint8(self.img_other)
+        
+        elif transform_name == "LAPLACIAN":
+            # Detekcja krawędzi Laplacian
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY) if len(self.img.shape) == 3 else self.img
+            self.img_other = cv.Laplacian(gray, cv.CV_64F)
+            self.img_other = np.uint8(np.absolute(self.img_other))
+        
+        elif transform_name == "FACE_DETECTION":
+            # Detekcja twarzy używając Haar Cascade
+            self.img_other = self.img.copy()
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY)
+            
+            # Ścieżka do klasyfikatora Haar Cascade
+            cascade_path = cv.data.haarcascades + 'haarcascade_frontalface_default.xml'
+            face_cascade = cv.CascadeClassifier(cascade_path)
+            
+            # Wykryj twarze
+            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+            
+            # Narysuj prostokąty wokół wykrytych twarzy
+            for (x, y, w, h) in faces:
+                cv.rectangle(self.img_other, (x, y), (x+w, y+h), (0, 255, 0), 2)
+                cv.putText(self.img_other, "Face", (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        
+        elif transform_name == "EDGE_DETECTION":
+            # Zaawansowana detekcja krawędzi
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY)
+            blurred = cv.GaussianBlur(gray, (5, 5), 0)
+            self.img_other = cv.Canny(blurred, 50, 150)
+        
+        elif transform_name == "CONTOURS":
+            # Detekcja konturów
+            self.img_other = self.img.copy()
+            gray = cv.cvtColor(self.img, cv.COLOR_BGR2GRAY)
+            blurred = cv.GaussianBlur(gray, (5, 5), 0)
+            edges = cv.Canny(blurred, 50, 150)
+            contours, _ = cv.findContours(edges, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
+            cv.drawContours(self.img_other, contours, -1, (0, 255, 0), 2)
+        
+        elif transform_name == "SHARPEN":
+            # Wyostrzenie obrazu
+            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
+            self.img_other = cv.filter2D(self.img, -1, kernel)
+        
+        elif transform_name == "EMBOSS":
+            # Efekt emboss (wytłoczenie)
+            kernel = np.array([[-2,-1,0], [-1,1,1], [0,1,2]])
+            self.img_other = cv.filter2D(self.img, -1, kernel)
+        
+        elif transform_name == "YOLO_DETECT":
+            # Detekcja obiektów używając YOLO
+            if not YOLO_AVAILABLE:
+                raise ValueError("YOLO nie jest dostępne. Zainstaluj: pip install ultralytics")
+            
+            self.img_other = self._yolo_detection(model_size='n')
+        
+        elif transform_name == "YOLO_DETECT_MEDIUM":
+            # Detekcja obiektów używając YOLO (model średni - dokładniejszy)
+            if not YOLO_AVAILABLE:
+                raise ValueError("YOLO nie jest dostępne. Zainstaluj: pip install ultralytics")
+            
+            self.img_other = self._yolo_detection(model_size='s')
+        
+        elif transform_name == "YOLO_DETECT_LARGE":
+            # Detekcja obiektów używając YOLO (model duży - najdokładniejszy)
+            if not YOLO_AVAILABLE:
+                raise ValueError("YOLO nie jest dostępne. Zainstaluj: pip install ultralytics")
+            
+            self.img_other = self._yolo_detection(model_size='m')
+        
+        elif transform_name == "YOLO_SEGMENT":
+            # Segmentacja obiektów używając YOLO
+            if not YOLO_AVAILABLE:
+                raise ValueError("YOLO nie jest dostępne. Zainstaluj: pip install ultralytics")
+            
+            self.img_other = self._yolo_segmentation()
+        
+        else:
+            raise ValueError(f"Brak obsługiwanej metody transformacji: {transform_name}")
+        
+        # Po wykonaniu transformacji wyświetl obrazy obok siebie, jeśli wymagane
+        if show_side_by_side and self.img_other is not None:
+            self.show_side_by_side()
+            
+        return self.img_other
+    
+    def _yolo_detection(self, model_size='n', conf_threshold=0.25):
+        """
+        Wykonuje detekcję obiektów używając YOLO.
+        
+        Args:
+            model_size (str): Rozmiar modelu ('n', 's', 'm', 'l', 'x')
+            conf_threshold (float): Próg pewności detekcji (0.0-1.0)
+        
+        Returns:
+            numpy.ndarray: Obraz z naniesionymi detekcjami
+        """
+        print(f"\nŁadowanie modelu YOLO (yolov8{model_size})...")
+        
+        # Załaduj model YOLO
+        model = YOLO(f'yolov8{model_size}.pt')
+        
+        print("Wykonywanie detekcji obiektów...")
+        
+        # Wykonaj detekcję
+        results = model(self.img, conf=conf_threshold, verbose=False)
+        
+        # Skopiuj obraz
+        img_result = self.img.copy()
+        
+        # Pobierz wyniki
+        if len(results) > 0:
+            result = results[0]
+            boxes = result.boxes
+            
+            detected_objects = []
+            
+            # Rysuj prostokąty i etykiety dla każdego wykrytego obiektu
+            for box in boxes:
+                # Pobierz współrzędne
+                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                
+                # Pobierz klasę i pewność
+                conf = float(box.conf[0])
+                cls = int(box.cls[0])
+                class_name = model.names[cls]
+                
+                detected_objects.append((class_name, conf))
+                
+                # Wybierz kolor dla prostokąta (różne kolory dla różnych klas)
+                color = self._get_color_for_class(cls)
+                
+                # Narysuj prostokąt
+                cv.rectangle(img_result, (x1, y1), (x2, y2), color, 2)
+                
+                # Przygotuj etykietę
+                label = f"{class_name}: {conf:.2f}"
+                
+                # Oblicz rozmiar tekstu
+                (text_width, text_height), baseline = cv.getTextSize(
+                    label, cv.FONT_HERSHEY_SIMPLEX, 0.6, 2
+                )
+                
+                # Narysuj tło dla tekstu
+                cv.rectangle(
+                    img_result,
+                    (x1, y1 - text_height - baseline - 5),
+                    (x1 + text_width, y1),
+                    color,
+                    -1
+                )
+                
+                # Narysuj tekst
+                cv.putText(
+                    img_result,
+                    label,
+                    (x1, y1 - baseline - 5),
+                    cv.FONT_HERSHEY_SIMPLEX,
+                    0.6,
+                    (255, 255, 255),
+                    2
+                )
+            
+            # Wyświetl podsumowanie w konsoli
+            print(f"\n✓ Wykryto {len(detected_objects)} obiektów:")
+            for obj_name, obj_conf in detected_objects:
+                print(f"  - {obj_name}: {obj_conf:.2%}")
+        else:
+            print("\n✗ Nie wykryto żadnych obiektów.")
+        
+        return img_result
+    
+    def _yolo_segmentation(self, model_size='n', conf_threshold=0.25):
+        """
+        Wykonuje segmentację obiektów używając YOLO.
+        
+        Args:
+            model_size (str): Rozmiar modelu ('n', 's', 'm', 'l', 'x')
+            conf_threshold (float): Próg pewności detekcji (0.0-1.0)
+        
+        Returns:
+            numpy.ndarray: Obraz z naniesioną segmentacją
+        """
+        print(f"\nŁadowanie modelu YOLO Segmentation (yolov8{model_size}-seg)...")
+        
+        # Załaduj model segmentacji
+        model = YOLO(f'yolov8{model_size}-seg.pt')
+        
+        print("Wykonywanie segmentacji obiektów...")
+        
+        # Wykonaj segmentację
+        results = model(self.img, conf=conf_threshold, verbose=False)
+        
+        # Skopiuj obraz
+        img_result = self.img.copy()
+        
+        # Pobierz wyniki
+        if len(results) > 0:
+            result = results[0]
+            
+            if result.masks is not None:
+                masks = result.masks.data.cpu().numpy()
+                boxes = result.boxes
+                
+                detected_objects = []
+                
+                # Dla każdej maski
+                for i, (mask, box) in enumerate(zip(masks, boxes)):
+                    # Pobierz klasę i pewność
+                    conf = float(box.conf[0])
+                    cls = int(box.cls[0])
+                    class_name = model.names[cls]
+                    
+                    detected_objects.append((class_name, conf))
+                    
+                    # Wybierz kolor dla maski
+                    color = self._get_color_for_class(cls)
+                    
+                    # Przeskaluj maskę do rozmiaru obrazu
+                    mask_resized = cv.resize(mask, (img_result.shape[1], img_result.shape[0]))
+                    mask_bool = mask_resized > 0.5
+                    
+                    # Nałóż kolorową maskę na obraz
+                    colored_mask = np.zeros_like(img_result)
+                    colored_mask[mask_bool] = color
+                    
+                    # Zmieszaj z oryginalnym obrazem (przezroczystość 40%)
+                    img_result = cv.addWeighted(img_result, 1.0, colored_mask, 0.4, 0)
+                    
+                    # Narysuj kontur
+                    contours, _ = cv.findContours(
+                        mask_bool.astype(np.uint8),
+                        cv.RETR_EXTERNAL,
+                        cv.CHAIN_APPROX_SIMPLE
+                    )
+                    cv.drawContours(img_result, contours, -1, color, 2)
+                    
+                    # Dodaj etykietę
+                    x1, y1, x2, y2 = map(int, box.xyxy[0])
+                    label = f"{class_name}: {conf:.2f}"
+                    cv.putText(
+                        img_result,
+                        label,
+                        (x1, y1 - 10),
+                        cv.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        color,
+                        2
+                    )
+                
+                # Wyświetl podsumowanie w konsoli
+                print(f"\n✓ Zsegmentowano {len(detected_objects)} obiektów:")
+                for obj_name, obj_conf in detected_objects:
+                    print(f"  - {obj_name}: {obj_conf:.2%}")
+            else:
+                print("\n✗ Nie wykryto żadnych obiektów do segmentacji.")
+        else:
+            print("\n✗ Nie wykryto żadnych obiektów.")
+        
+        return img_result
+    
+    def _get_color_for_class(self, class_id):
+        """
+        Generuje unikalny kolor dla danej klasy obiektów.
+        
+        Args:
+            class_id (int): ID klasy
+        
+        Returns:
+            tuple: Kolor w formacie BGR
+        """
+        # Predefiniowane kolory dla lepszej wizualizacji
+        colors = [
+            (255, 0, 0),      # Niebieski
+            (0, 255, 0),      # Zielony
+            (0, 0, 255),      # Czerwony
+            (255, 255, 0),    # Cyjan
+            (255, 0, 255),    # Magenta
+            (0, 255, 255),    # Żółty
+            (128, 0, 128),    # Fioletowy
+            (255, 128, 0),    # Pomarańczowy
+            (0, 128, 255),    # Jasnoniebieski
+            (128, 255, 0),    # Limonkowy
+        ]
+        
+        return colors[class_id % len(colors)]
+