FaceAccess/VocieProcess/api/rtp_headers.h
2024-09-06 18:26:45 +08:00

209 lines
7.3 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_RTP_HEADERS_H_
#define API_RTP_HEADERS_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "absl/types/optional.h"
#include "api/units/timestamp.h"
#include "api/video/color_space.h"
#include "api/video/video_content_type.h"
#include "api/video/video_rotation.h"
#include "api/video/video_timing.h"
#include "rtc_base/checks.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
struct FeedbackRequest {
// Determines whether the recv delta as specified in
// https://tools.ietf.org/html/draft-holmer-rmcat-transport-wide-cc-extensions-01
// should be included.
bool include_timestamps;
// Include feedback of received packets in the range [sequence_number -
// sequence_count + 1, sequence_number]. That is, no feedback will be sent if
// sequence_count is zero.
int sequence_count;
};
// The Absolute Capture Time extension is used to stamp RTP packets with a NTP
// timestamp showing when the first audio or video frame in a packet was
// originally captured. The intent of this extension is to provide a way to
// accomplish audio-to-video synchronization when RTCP-terminating intermediate
// systems (e.g. mixers) are involved. See:
// http://www.webrtc.org/experiments/rtp-hdrext/abs-capture-time
struct AbsoluteCaptureTime {
// Absolute capture timestamp is the NTP timestamp of when the first frame in
// a packet was originally captured. This timestamp MUST be based on the same
// clock as the clock used to generate NTP timestamps for RTCP sender reports
// on the capture system.
//
// Its not always possible to do an NTP clock readout at the exact moment of
// when a media frame is captured. A capture system MAY postpone the readout
// until a more convenient time. A capture system SHOULD have known delays
// (e.g. from hardware buffers) subtracted from the readout to make the final
// timestamp as close to the actual capture time as possible.
//
// This field is encoded as a 64-bit unsigned fixed-point number with the high
// 32 bits for the timestamp in seconds and low 32 bits for the fractional
// part. This is also known as the UQ32.32 format and is what the RTP
// specification defines as the canonical format to represent NTP timestamps.
uint64_t absolute_capture_timestamp;
// Estimated capture clock offset is the senders estimate of the offset
// between its own NTP clock and the capture systems NTP clock. The sender is
// here defined as the system that owns the NTP clock used to generate the NTP
// timestamps for the RTCP sender reports on this stream. The sender system is
// typically either the capture system or a mixer.
//
// This field is encoded as a 64-bit twos complement signed fixed-point
// number with the high 32 bits for the seconds and low 32 bits for the
// fractional part. Its intended to make it easy for a receiver, that knows
// how to estimate the sender systems NTP clock, to also estimate the capture
// systems NTP clock:
//
// Capture NTP Clock = Sender NTP Clock + Capture Clock Offset
absl::optional<int64_t> estimated_capture_clock_offset;
};
// The audio level extension is used to indicate the voice activity and the
// audio level of the payload in the RTP stream. See:
// https://tools.ietf.org/html/rfc6464#section-3.
class AudioLevel {
public:
AudioLevel();
AudioLevel(bool voice_activity, int audio_level);
AudioLevel(const AudioLevel& other) = default;
AudioLevel& operator=(const AudioLevel& other) = default;
// Flag indicating whether the encoder believes the audio packet contains
// voice activity.
bool voice_activity() const { return voice_activity_; }
// Audio level in -dBov. Values range from 0 to 127, representing 0 to -127
// dBov. 127 represents digital silence.
int level() const { return audio_level_; }
private:
bool voice_activity_;
int audio_level_;
};
inline bool operator==(const AbsoluteCaptureTime& lhs,
const AbsoluteCaptureTime& rhs) {
return (lhs.absolute_capture_timestamp == rhs.absolute_capture_timestamp) &&
(lhs.estimated_capture_clock_offset ==
rhs.estimated_capture_clock_offset);
}
inline bool operator!=(const AbsoluteCaptureTime& lhs,
const AbsoluteCaptureTime& rhs) {
return !(lhs == rhs);
}
struct RTPHeaderExtension {
RTPHeaderExtension();
RTPHeaderExtension(const RTPHeaderExtension& other);
RTPHeaderExtension& operator=(const RTPHeaderExtension& other);
static constexpr int kAbsSendTimeFraction = 18;
Timestamp GetAbsoluteSendTimestamp() const {
RTC_DCHECK(hasAbsoluteSendTime);
RTC_DCHECK(absoluteSendTime < (1ul << 24));
return Timestamp::Micros((absoluteSendTime * 1000000ll) /
(1 << kAbsSendTimeFraction));
}
bool hasTransmissionTimeOffset;
int32_t transmissionTimeOffset;
bool hasAbsoluteSendTime;
uint32_t absoluteSendTime;
absl::optional<AbsoluteCaptureTime> absolute_capture_time;
bool hasTransportSequenceNumber;
uint16_t transportSequenceNumber;
absl::optional<FeedbackRequest> feedback_request;
// Audio Level includes both level in dBov and voiced/unvoiced bit. See:
// https://tools.ietf.org/html/rfc6464#section-3
absl::optional<AudioLevel> audio_level() const { return audio_level_; }
void set_audio_level(absl::optional<AudioLevel> audio_level) {
audio_level_ = audio_level;
}
// For Coordination of Video Orientation. See
// http://www.etsi.org/deliver/etsi_ts/126100_126199/126114/12.07.00_60/
// ts_126114v120700p.pdf
bool hasVideoRotation;
VideoRotation videoRotation;
// TODO(ilnik): Refactor this and one above to be absl::optional() and remove
// a corresponding bool flag.
bool hasVideoContentType;
VideoContentType videoContentType;
bool has_video_timing;
VideoSendTiming video_timing;
VideoPlayoutDelay playout_delay;
// For identification of a stream when ssrc is not signaled. See
// https://tools.ietf.org/html/rfc8852
std::string stream_id;
std::string repaired_stream_id;
// For identifying the media section used to interpret this RTP packet. See
// https://tools.ietf.org/html/rfc8843
std::string mid;
absl::optional<ColorSpace> color_space;
private:
absl::optional<AudioLevel> audio_level_;
};
enum { kRtpCsrcSize = 15 }; // RFC 3550 page 13
struct RTC_EXPORT RTPHeader {
RTPHeader();
RTPHeader(const RTPHeader& other);
RTPHeader& operator=(const RTPHeader& other);
bool markerBit;
uint8_t payloadType;
uint16_t sequenceNumber;
uint32_t timestamp;
uint32_t ssrc;
uint8_t numCSRCs;
uint32_t arrOfCSRCs[kRtpCsrcSize];
size_t paddingLength;
size_t headerLength;
RTPHeaderExtension extension;
};
// RTCP mode to use. Compound mode is described by RFC 4585 and reduced-size
// RTCP mode is described by RFC 5506.
enum class RtcpMode { kOff, kCompound, kReducedSize };
enum NetworkState {
kNetworkUp,
kNetworkDown,
};
} // namespace webrtc
#endif // API_RTP_HEADERS_H_