Spaces:

furbola
/

chaskick

Sleeping

App Files Files Community

Mirko Trasciatti commited on Nov 11

Commit

cf42079

1 Parent(s): a9a341a

REVERT to propagate_in_video_iterator - it DOES support bidirectional propagation!

Browse files

Files changed (1) hide show

app.py +15 -37

app.py CHANGED Viewed

@@ -486,47 +486,25 @@ def segment_video_multi(video_file, objects_json):
                 # Skip initial model inference - go straight to propagation
                 # The propagation loop will handle init_frame when it reaches it
-                # Propagate through ALL frames explicitly (frame-by-frame)
-                # This ensures bidirectional propagation from init_frame
-                # Based on: https://huggingface.co/spaces/yonigozlan/Segment-Anything-2-video-tracking
                 video_segments = {}
                 confidence_scores = []
-                print(f"  Propagating masks through all frames (0 → {len(video_frames)-1})...")
-                print(f"    Annotation at frame {init_frame} will guide propagation")
-                with torch.inference_mode():
-                    # Process ALL frames in sequential order (0→N)
-                    # SAM2's temporal model expects sequential processing
-                    # The annotated frame (init_frame) is already in processed_frames
-                    for frame_idx in range(len(video_frames)):
-                        frame_pil = video_frames[frame_idx]
-                        pixel_values = None
-                        # Check if this frame was already processed (e.g., the annotated frame)
-                        if inference_session.processed_frames is None or frame_idx not in inference_session.processed_frames:
-                            pixel_values = processor(images=frame_pil, device=device, return_tensors="pt").pixel_values[0]
-                        # Call model - it will use annotation if frame_idx == init_frame
-                        sam2_output = model(
-                            inference_session=inference_session,
-                            frame=pixel_values,
-                            frame_idx=frame_idx
-                        )
-                        # Post-process masks
-                        H = inference_session.video_height
-                        W = inference_session.video_width
-                        pred_masks = sam2_output.pred_masks.detach().cpu()
-                        video_res_masks = processor.post_process_masks(
-                            [pred_masks],
-                            original_sizes=[[H, W]],
-                            binarize=False
-                        )[0]
-                        video_segments[frame_idx] = video_res_masks
-                        mask_float = video_res_masks.float() if video_res_masks.dtype == torch.bool else video_res_masks
-                        confidence_scores.append(float(mask_float.mean()))
                 print(f"  ✅ Got masks for {len(video_segments)} frames (init_frame was {init_frame})")

                 # Skip initial model inference - go straight to propagation
                 # The propagation loop will handle init_frame when it reaches it
+                # Use propagate_in_video_iterator for BIDIRECTIONAL propagation
+                # According to SAM2 docs, this should propagate both forward AND backward
+                # from the annotated frame (init_frame)
                 video_segments = {}
                 confidence_scores = []
+                print(f"  Using propagate_in_video_iterator for bidirectional propagation from frame {init_frame}...")
+                for sam2_output in model.propagate_in_video_iterator(inference_session):
+                    video_res_masks = processor.post_process_masks(
+                        [sam2_output.pred_masks],
+                        original_sizes=[[inference_session.video_height, inference_session.video_width]],
+                        binarize=False
+                    )[0]
+                    video_segments[sam2_output.frame_idx] = video_res_masks
+                    # Calculate confidence
+                    mask_float = video_res_masks.float() if video_res_masks.dtype == torch.bool else video_res_masks
+                    confidence_scores.append(float(mask_float.mean()))
                 print(f"  ✅ Got masks for {len(video_segments)} frames (init_frame was {init_frame})")