前两个月更新的PICO Unity Integration SDK3.4.0版本支持在消费版PICO 4 Ultra设备上使用 PXR_CameraImage 类提供的接口来管理相机图像数据(但还是不能访问深度传感器)

相机图像数据(用户设备) | PICO 开发者平台 https://developer-cn.picoxr.com/document/unity/camera-image-data-management/PXR_CameraImage | PICO 开发者平台 https://developer-cn.picoxr.com/reference/unity/client-api/PXR_CameraImage/#02a2ba70参照上述说明文档和api手册,实现双路相机 → PC 预览

首先要权限声明,在AndroidManifest.xml 中声明相机权限。

<uses-permission android:name="android.permission.CAMERA" />

写一个测试代码:

using UnityEngine;
using Unity.XR.PXR; 
public class testCamera : MonoBehaviour
{
    // Start is called once before the first execution of Update after the MonoBehaviour is created
    void Start()
    {
        PxrResult ret = PXR_CameraImage.GetAvailableCameras(out XrCameraIdPICO[] cameraIds);
        if (ret == PxrResult.SUCCESS)
        {
            // 打印总数
            Debug.Log("[Camera] GetAvailableCameras total:" + cameraIds.Length);
            foreach (var cameraId in cameraIds)
            {
                Debug.Log("[Camera] GetAvailableCameras cameraId:" + cameraId);
            }
        }
        else{
            Debug.Log("[Camera] GetAvailableCameras failed:" + ret);
        }
    }
}

ADB输出日志应看到类似内容,证明成功。

04-29 17:41:17.900 17199 18485 I Unity   : [Camera] GetAvailableCameras total:2
04-29 17:41:17.900 17199 18485 I Unity   : [Camera] GetAvailableCameras cameraId:XR_CAMERA_ID_RGB_LEFT_PICO
04-29 17:41:17.900 17199 18485 I Unity   : [Camera] GetAvailableCameras cameraId:XR_CAMERA_ID_RGB_RIGHT_PICO

双路相机画面推送到局域网的电脑上:

先用 ZeroMQ(NetMQ + pyzmq),模式为 PUSH(头显)/ PULL(PC),比 WebRTC 轻(后续可能填一下坑),适合原始 RGBA 大包调试。

架构:

头显(Unity)

  1. 使用 PICO SDK 的 PXR_CameraImage:枚举相机 → CreateCameraDeviceAsync / CreateCameraCaptureSessionAsync → BeginCameraCapture → 每帧 AcquireCameraImage → GetCameraImageData → 拷成紧凑 RGBA → ReleaseCameraImage。
  2. 主线程采集,单独线程里 NetMQ PushSocket.Connect("tcp://PC_IP:5555"),multipart 发送:[固定头 25B][payload]。
  3. 头里含:左右眼编号、宽高、stride、bpp、时间戳等,与 Python struct.unpack 对齐。

PC(Python)

  1. PULL.bind("tcp://0.0.0.0:5555"),recv_multipart,numpy + OpenCV 显示
  2. 透视朝向与屏幕习惯可能不一致,所以做 180° 旋转 + 水平镜像
     
using System;
using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using NetMQ;
using NetMQ.Sockets;
using UnityEngine;
using Unity.XR.PXR;
#if UNITY_ANDROID && !UNITY_EDITOR
using UnityEngine.Android;
#endif

/// <summary>
/// 从 PICO 左右 RGB 相机取帧,经 ZMQ PUSH 发往 PC;PC 端用 PULL + Python 脚本接收。
/// 需在 Inspector 填写本机局域网 IP(运行 zmq_camera_viewer 的机器)
/// </summary>
public class PicoCameraZmqStreamer : MonoBehaviour
{
    const int HeaderBytes = 25; // 见 Python 端 struct.unpack

    [SerializeField] string serverHost = "192.168.1.100";
    [SerializeField] int serverPort = 5555;
    [SerializeField] int targetWidth = 640;
    [SerializeField] int targetHeight = 480;
    [SerializeField] XrCameraImageFpsPICO targetFps = XrCameraImageFpsPICO.XR_CAMERA_IMAGE_FPS_30_PICO;
    [Tooltip("When the network cannot keep up, drop older frames and send only the latest per eye (lower latency, live preview). Off = FIFO queue (higher latency if backlog).")]
    [SerializeField] bool dropStaleFrames = true;
    [Tooltip("ZMQ outbound queue cap (small = lower latency, may block if PC is very slow).")]
    [SerializeField] int sendSocketHighWaterMark = 4;

    readonly List<XrCameraIdPICO> _activeCameras = new List<XrCameraIdPICO>(2);
    readonly Dictionary<XrCameraIdPICO, long> _lastCaptureTime = new Dictionary<XrCameraIdPICO, long>();
    readonly ConcurrentQueue<FrameEnvelope> _sendQueue = new ConcurrentQueue<FrameEnvelope>();

    Thread _netThread;
    volatile bool _netRunning;
    bool _pipelineReady;
    int _diagEnqueueLogged;
    long _diagSentTotal;
#if UNITY_ANDROID && !UNITY_EDITOR
    bool _androidPermissionsGranted;
    const string PvrSpatialDataPermission = "com.picovr.permission.SPATIAL_DATA";
#endif

    struct FrameEnvelope
    {
        public byte[] Header;
        public byte[] Payload;
    }

    void Start()
    {
#if UNITY_ANDROID && !UNITY_EDITOR
        StartCoroutine(StartAfterAndroidPermissions());
#else
        StartCoroutine(StartPipelineCoroutine());
#endif
    }

#if UNITY_ANDROID && !UNITY_EDITOR
    IEnumerator StartAfterAndroidPermissions()
    {
        yield return EnsureAndroidCameraPermissions();
        if (!_androidPermissionsGranted)
        {
            Debug.LogError(
                "[ZmqCamera] Camera and SPATIAL_DATA (MR) permissions required. ");
            yield break;
        }

        yield return StartPipelineCoroutine();
    }

    IEnumerator EnsureAndroidCameraPermissions()
    {
        var need = new List<string>(2);
        if (!Permission.HasUserAuthorizedPermission(Permission.Camera))
            need.Add(Permission.Camera);
        if (!Permission.HasUserAuthorizedPermission(PvrSpatialDataPermission))
            need.Add(PvrSpatialDataPermission);

        if (need.Count == 0)
        {
            _androidPermissionsGranted = true;
            yield break;
        }

        Debug.Log("[ZmqCamera] Requesting permissions: " + string.Join(", ", need));

        var settled = false;
        var callbacks = new PermissionCallbacks();
        callbacks.PermissionGranted += _ =>
        {
            if (Permission.HasUserAuthorizedPermission(Permission.Camera) &&
                Permission.HasUserAuthorizedPermission(PvrSpatialDataPermission))
            {
                _androidPermissionsGranted = true;
                settled = true;
            }
        };
        callbacks.PermissionDenied += name =>
        {
            Debug.LogWarning("[ZmqCamera] Permission denied: " + name);
            settled = true;
        };
        callbacks.PermissionDeniedAndDontAskAgain += name =>
        {
            Debug.LogWarning("[ZmqCamera] Permission denied (don't ask again): " + name);
            settled = true;
        };

        Permission.RequestUserPermissions(need.ToArray(), callbacks);

        var waited = 0f;
        const float timeout = 120f;
        while (!settled && waited < timeout)
        {
            waited += Time.unscaledDeltaTime;
            yield return null;
        }

        if (!_androidPermissionsGranted && !settled)
            Debug.LogError("[ZmqCamera] Permission request timed out.");
    }
#endif

    IEnumerator StartPipelineCoroutine()
    {
        var task = InitializePipelineAsync();
        while (!task.IsCompleted)
            yield return null;
        if (task.IsFaulted && task.Exception != null)
            Debug.LogError("[ZmqCamera] Init failed: " + task.Exception.GetBaseException());
    }

    void Update()
    {
        if (!_pipelineReady)
            return;

        foreach (var cam in _activeCameras)
        {
            byte eyeIndex = cam == XrCameraIdPICO.XR_CAMERA_ID_RGB_LEFT_PICO ? (byte)0 : (byte)1;
            TryEnqueueFrame(cam, eyeIndex);
        }
    }

    void OnDestroy()
    {
        _netRunning = false;
        try
        {
            _netThread?.Join(4000);
        }
        catch
        {
            // ignored
        }

        foreach (var cam in _activeCameras)
        {
            PXR_CameraImage.EndCameraCapture(cam);
            PXR_CameraImage.DestroyCameraCaptureSession(cam);
            PXR_CameraImage.DestroyCameraDevice(cam);
        }

        _activeCameras.Clear();
    }

    async Task InitializePipelineAsync()
    {
        var retEnum = PXR_CameraImage.GetAvailableCameras(out var cameraIds);
        if (retEnum != PxrResult.SUCCESS || cameraIds == null || cameraIds.Length == 0)
        {
            Debug.LogError("[ZmqCamera] GetAvailableCameras failed: " + retEnum);
            return;
        }

        var tryIds = new[] { XrCameraIdPICO.XR_CAMERA_ID_RGB_LEFT_PICO, XrCameraIdPICO.XR_CAMERA_ID_RGB_RIGHT_PICO };
        foreach (var camId in tryIds)
        {
            if (Array.IndexOf(cameraIds, camId) < 0)
                continue;

            var devRet = await PXR_CameraImage.CreateCameraDeviceAsync(camId);
            if (devRet != PxrResult.SUCCESS)
            {
                Debug.LogWarning("[ZmqCamera] CreateCameraDeviceAsync " + camId + " -> " + devRet);
                continue;
            }

            if (!TryPickResolution(camId, out var w, out var h))
            {
                Debug.LogWarning("[ZmqCamera] No resolution for " + camId);
                PXR_CameraImage.DestroyCameraDevice(camId);
                continue;
            }

            var sessRet = await PXR_CameraImage.CreateCameraCaptureSessionAsync(
                camId,
                w,
                h,
                targetFps,
                XrCameraImageFormatPICO.XR_CAMERA_IMAGE_FORMAT_RGBA_8888_PICO,
                XrCameraDataTransferTypePICO.XR_CAMERA_DATA_TRANSFER_TYPE_RAW_BUFFER_PICO,
                XrCameraModelPICO.XR_CAMERA_MODEL_PINHOLE_PICO);

            if (sessRet != PxrResult.SUCCESS)
            {
                Debug.LogWarning("[ZmqCamera] CreateCameraCaptureSessionAsync " + camId + " -> " + sessRet);
                PXR_CameraImage.DestroyCameraDevice(camId);
                continue;
            }

            var beginRet = PXR_CameraImage.BeginCameraCapture(camId);
            if (beginRet != PxrResult.SUCCESS)
            {
                Debug.LogWarning("[ZmqCamera] BeginCameraCapture " + camId + " -> " + beginRet);
                PXR_CameraImage.DestroyCameraCaptureSession(camId);
                PXR_CameraImage.DestroyCameraDevice(camId);
                continue;
            }

            _activeCameras.Add(camId);
            _lastCaptureTime[camId] = 0;
            Debug.Log("[ZmqCamera] Streaming " + camId + " at " + w + "x" + h);
        }

        if (_activeCameras.Count == 0)
        {
            Debug.LogError("[ZmqCamera] No camera started.");
            return;
        }

        _netRunning = true;
        _netThread = new Thread(NetThreadMain)
        {
            IsBackground = true,
            Name = "NetMQ-Push"
        };
        _netThread.Start();
        _pipelineReady = true;
    }

    bool TryPickResolution(XrCameraIdPICO camId, out int w, out int h)
    {
        w = targetWidth;
        h = targetHeight;
        var ret = PXR_CameraImage.GetCameraImageResolutionCapability(camId, out var res);
        if (ret != PxrResult.SUCCESS || res == null || res.Length == 0)
            return true;

        for (var i = 0; i < res.Length; i++)
        {
            if (res[i].width == targetWidth && res[i].height == targetHeight)
            {
                w = res[i].width;
                h = res[i].height;
                return true;
            }
        }

        PxrExtent2Di best = res[0];
        var bestDist = int.MaxValue;
        foreach (var r in res)
        {
            var d = Math.Abs(r.width - targetWidth) + Math.Abs(r.height - targetHeight);
            if (d < bestDist)
            {
                bestDist = d;
                best = r;
            }
        }

        w = best.width;
        h = best.height;
        return true;
    }

    void TryEnqueueFrame(XrCameraIdPICO camId, byte eyeIndex)
    {
        var last = _lastCaptureTime[camId];
        var acq = PXR_CameraImage.AcquireCameraImage(camId, last, out var imageId, out var captureTime);
        if (acq != PxrResult.SUCCESS)
            return;

        try
        {
            var dataRet = PXR_CameraImage.GetCameraImageData(camId, imageId, out var raw);
            if (dataRet != PxrResult.SUCCESS || raw.buffer == IntPtr.Zero || raw.bufferSize == 0)
                return;

            var tight = CopyTightRgba(ref raw);
            var header = new byte[HeaderBytes];
            header[0] = eyeIndex;
            BitConverter.GetBytes((uint)raw.width).CopyTo(header, 1);
            BitConverter.GetBytes((uint)raw.height).CopyTo(header, 5);
            BitConverter.GetBytes((uint)raw.stride).CopyTo(header, 9);
            BitConverter.GetBytes((uint)raw.bytesPerPixel).CopyTo(header, 13);
            BitConverter.GetBytes(captureTime).CopyTo(header, 17);

            _sendQueue.Enqueue(new FrameEnvelope { Header = header, Payload = tight });
            _lastCaptureTime[camId] = captureTime;
            if (_diagEnqueueLogged < 5)
            {
                _diagEnqueueLogged++;
                Debug.Log("[ZmqCamera] enqueue sample " + _diagEnqueueLogged + " eye=" + eyeIndex + " payload=" +
                          tight.Length + " bytes");
            }
        }
        finally
        {
            PXR_CameraImage.ReleaseCameraImage(camId, imageId);
        }
    }

    static byte[] CopyTightRgba(ref XrCameraImageDataRawBuffer raw)
    {
        var w = (int)raw.width;
        var h = (int)raw.height;
        var bpp = (int)raw.bytesPerPixel;
        var stride = (int)raw.stride;
        var rowTight = w * bpp;
        var dst = new byte[rowTight * h];
        for (var y = 0; y < h; y++)
        {
            var row = IntPtr.Add(raw.buffer, y * stride);
            Marshal.Copy(row, dst, y * rowTight, rowTight);
        }

        return dst;
    }

    bool TryDrainLatestPerEye(out bool hasLeft, out FrameEnvelope left, out bool hasRight, out FrameEnvelope right)
    {
        left = right = default;
        hasLeft = hasRight = false;
        if (!_sendQueue.TryDequeue(out var first))
            return false;

        var hl = false;
        var hr = false;
        var el = default(FrameEnvelope);
        var er = default(FrameEnvelope);

        void Take(FrameEnvelope e)
        {
            if (e.Header == null || e.Header.Length == 0)
                return;
            var eye = e.Header[0];
            if (eye == 0)
            {
                el = e;
                hl = true;
            }
            else
            {
                er = e;
                hr = true;
            }
        }

        Take(first);
        while (_sendQueue.TryDequeue(out var more))
            Take(more);

        hasLeft = hl;
        hasRight = hr;
        left = el;
        right = er;
        return true;
    }

    void SendOne(PushSocket push, ref FrameEnvelope env)
    {
        push.SendMoreFrame(env.Header);
        push.SendFrame(env.Payload);
        _diagSentTotal++;
        if (_diagSentTotal <= 3L || _diagSentTotal % 120L == 0L)
            Debug.Log("[ZmqCamera] frames sent to PC=" + _diagSentTotal);
    }

    void NetThreadMain()
    {
        try
        {
            AsyncIO.ForceDotNet.Force();
        }
        catch (Exception e)
        {
            Debug.LogError("[ZmqCamera] AsyncIO.ForceDotNet: " + e);
            return;
        }

        try
        {
            using (var push = new PushSocket())
            {
                var host = (serverHost ?? string.Empty).Trim();
                if (host.Length == 0)
                {
                    Debug.LogError("[ZmqCamera] serverHost is empty");
                    return;
                }

                push.Options.Linger = TimeSpan.Zero;
                push.Options.SendHighWatermark = Mathf.Max(1, sendSocketHighWaterMark);

                var endpoint = "tcp://" + host + ":" + serverPort;
                push.Connect(endpoint);
                Debug.Log("[ZmqCamera] NetMQ PUSH -> " + endpoint +
                          " | start Python PULL first; allow inbound TCP " + serverPort + " on PC firewall");
                Thread.Sleep(250);

                while (_netRunning)
                {
                    if (dropStaleFrames)
                    {
                        if (!TryDrainLatestPerEye(out var hasL, out var envL, out var hasR, out var envR))
                        {
                            Thread.Sleep(1);
                            continue;
                        }

                        try
                        {
                            if (hasL)
                                SendOne(push, ref envL);
                            if (hasR)
                                SendOne(push, ref envR);
                        }
                        catch (Exception ex)
                        {
                            Debug.LogError("[ZmqCamera] Send failed: " + ex.Message);
                            Thread.Sleep(50);
                        }
                    }
                    else
                    {
                        if (!_sendQueue.TryDequeue(out var env))
                        {
                            Thread.Sleep(1);
                            continue;
                        }

                        try
                        {
                            SendOne(push, ref env);
                        }
                        catch (Exception ex)
                        {
                            Debug.LogError("[ZmqCamera] Send failed: " + ex.Message);
                            Thread.Sleep(50);
                        }
                    }
                }
            }
        }
        catch (Exception e)
        {
            Debug.LogError("[ZmqCamera] Net thread: " + e);
        }
        finally
        {
            try
            {
                NetMQConfig.Cleanup(false);
            }
            catch
            {
                // ignored
            }
        }
    }
}

挂到场景中,并修改server host为电脑的ip,然后build和install

# 依赖: pip install pyzmq opencv-python numpy
#
# 用法: python zmq_camera_viewer.py
# 默认 bind tcp://*:5555 ,与 Unity PicoCameraZmqStreamer 中端口一致。
# 防火墙需放行该端口。
# OpenCV 窗口获得焦点时按 Esc(27) 退出;无视频流时也可退出(接收超时后会轮询按键)。

import struct
import sys
import time
from typing import Optional, Tuple

import cv2
import numpy as np
import zmq

HEADER_FMT = "<BIIIIq"  # eye, w, h, stride, bpp(uint32), capture_time(int64);25 字节.
HEADER_SIZE = struct.calcsize(HEADER_FMT)


def parse_header(blob: bytes) -> Optional[Tuple[int, int, int, int, int]]:
    if len(blob) < HEADER_SIZE:
        return None
    eye, w, h, stride, bpp, _cap_t = struct.unpack(HEADER_FMT, blob[:HEADER_SIZE])
    return eye, w, h, stride, bpp


def main():
    port = 5555
    if len(sys.argv) > 1:
        port = int(sys.argv[1])

    ctx = zmq.Context.instance()
    sock = ctx.socket(zmq.PULL)
    sock.setsockopt(zmq.RCVTIMEO, 250)
    bind_addr = f"tcp://0.0.0.0:{port}"
    sock.bind(bind_addr)
    print(f"PULL bound {bind_addr}, waiting for PICO... (Esc to quit)")
    print(
        "若一直无画面: 1) 用「管理员 PowerShell」放行: "
        "netsh advfirewall firewall add rule name=\"ZMQ PICO\" dir=in action=allow protocol=TCP localport="
        f"{port} 2) 确认头显里 server IP 是本机当前网卡 IP 3) 尽量先开本脚本再进 VR"
    )

    cv2.namedWindow("_zmq_viewer_status", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("_zmq_viewer_status", 400, 48)
    status_img = np.zeros((48, 400, 3), dtype=np.uint8)
    cv2.putText(
        status_img,
        "Esc = quit | waiting for stream...",
        (8, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.5,
        (220, 220, 220),
        1,
        cv2.LINE_AA,
    )
    cv2.imshow("_zmq_viewer_status", status_img)

    windows = {"0": "pico_left", "1": "pico_right"}
    n_recv = 0
    t_hb = time.monotonic()

    def esc_pressed() -> bool:
        return (cv2.waitKey(1) & 0xFF) == 27

    while True:
        try:
            parts = sock.recv_multipart()
        except zmq.Again:
            now = time.monotonic()
            if now - t_hb >= 2.0:
                print(
                    f"... 尚无数据包 (已收 {n_recv} 帧). 检查防火墙入站 {port} / 头显 IP / 先开 viewer 再开 App"
                )
                t_hb = now
            if esc_pressed():
                break
            continue
        except KeyboardInterrupt:
            break

        if len(parts) != 2:
            print("unexpected multipart count:", len(parts))
            if esc_pressed():
                break
            continue

        header_blob, payload = parts[0], parts[1]
        n_recv += 1
        if n_recv <= 3:
            print(
                f"收到帧 #{n_recv}: header={len(header_blob)} payload={len(payload)}"
            )
        parsed = parse_header(header_blob)
        if not parsed:
            if esc_pressed():
                break
            continue
        eye, w, h, stride, bpp = parsed
        need = w * h * bpp
        if len(payload) < need:
            print(f"short payload got {len(payload)} need ~{need}")
            if esc_pressed():
                break
            continue

        rgba = np.frombuffer(payload, dtype=np.uint8, count=need).reshape((h, w, 4))
        bgr = cv2.cvtColor(rgba, cv2.COLOR_RGBA2BGR)
        # PICO 透视相机朝向与屏幕预期不一致时校正:先 180° 再左右镜像
        bgr = cv2.rotate(bgr, cv2.ROTATE_180)
        bgr = cv2.flip(bgr, 1)
        win = windows.get(str(eye), f"pico_cam_{eye}")
        cv2.imshow(win, bgr)
        if esc_pressed():
            break

    cv2.destroyAllWindows()
    sock.close(0)
    ctx.term()


if __name__ == "__main__":
    main()

电脑端需防火墙放行端口

netsh advfirewall firewall add rule name=\"ZMQ PICO\" dir=in action=allow protocol=TCP localport=5555

运行结果:

之后可能集成一些深度学习模型算法做视觉检测?似乎这个比较流行。

 

Logo

AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。

更多推荐