聊聊flink的AsyncWaitOperator


本文主要研究一下flink的AsyncWaitOperator

AsyncWaitOperator
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/AsyncWaitOperator.java

@Internal
public class AsyncWaitOperator<IN, OUT>
extends AbstractUdfStreamOperator<OUT, AsyncFunction<IN, OUT>>
implements OneInputStreamOperator<IN, OUT>, OperatorActions {
private static final long serialVersionUID = 1L;

private static final String STATE_NAME = "_async_wait_operator_state_";

/** Capacity of the stream element queue. */
private final int capacity;

/** Output mode for this operator. */
private final AsyncDataStream.OutputMode outputMode;

/** Timeout for the async collectors. */
private final long timeout;

protected transient Object checkpointingLock;

/** {@link TypeSerializer} for inputs while making snapshots. */
private transient StreamElementSerializer<IN> inStreamElementSerializer;

/** Recovered input stream elements. */
private transient ListState<StreamElement> recoveredStreamElements;

/** Queue to store the currently in-flight stream elements into. */
private transient StreamElementQueue queue;

/** Pending stream element which could not yet added to the queue. */
private transient StreamElementQueueEntry<?> pendingStreamElementQueueEntry;

private transient ExecutorService executor;

/** Emitter for the completed stream element queue entries. */
private transient Emitter<OUT> emitter;

/** Thread running the emitter. */
private transient Thread emitterThread;

public AsyncWaitOperator(
AsyncFunction<IN, OUT> asyncFunction,
long timeout,
int capacity,
AsyncDataStream.OutputMode outputMode) {
super(asyncFunction);
chainingStrategy = ChainingStrategy.ALWAYS;

Preconditions.checkArgument(capacity > 0, "The number of concurrent async operation should be greater than 0.");
this.capacity = capacity;

this.outputMode = Preconditions.checkNotNull(outputMode, "outputMode");

this.timeout = timeout;
}

@Override
public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {
super.setup(containingTask, config, output);

this.checkpointingLock = getContainingTask().getCheckpointLock();

this.inStreamElementSerializer = new StreamElementSerializer<>(
getOperatorConfig().<IN>getTypeSerializerIn1(getUserCodeClassloader()));

// create the operators executor for the complete operations of the queue entries
this.executor = Executors.newSingleThreadExecutor();

switch (outputMode) {
case ORDERED:
queue = new OrderedStreamElementQueue(
capacity,
executor,
this);
break;
case UNORDERED:
queue = new UnorderedStreamElementQueue(
capacity,
executor,
this);
break;
default:
throw new IllegalStateException("Unknown async mode: " + outputMode + ‘.‘);
}
}

@Override
public void open() throws Exception {
super.open();

// create the emitter
this.emitter = new Emitter<>(checkpointingLock, output, queue, this);

// start the emitter thread
this.emitterThread = new Thread(emitter, "AsyncIO-Emitter-Thread (" + getOperatorName() + ‘)‘);
emitterThread.setDaemon(true);
emitterThread.start();

// process stream elements from state, since the Emit thread will start as soon as all
// elements from previous state are in the StreamElementQueue, we have to make sure that the
// order to open all operators in the operator chain proceeds from the tail operator to the
// head operator.
if (recoveredStreamElements != null) {
for (StreamElement element : recoveredStreamElements.get()) {
if (element.isRecord()) {
processElement(element.<IN>asRecord());
}
else if (element.isWatermark()) {
processWatermark(element.asWatermark());
}
else if (element.isLatencyMarker()) {
processLatencyMarker(element.asLatencyMarker());
}
else {
throw new IllegalStateException("Unknown record type " + element.getClass() +
" encountered while opening the operator.");
}
}
recoveredStreamElements = null;
}

}

@Override
public void processElement(StreamRecord<IN> element) throws Exception {
final StreamRecordQueueEntry<OUT> streamRecordBufferEntry = new StreamRecordQueueEntry<>(element);

if (timeout >www.yongshiyule178.com 0L) {
// register a timeout for this AsyncStreamRecordBufferEntry
long timeoutTimestamp = timeout + getProcessingTimeService().getCurrentProcessingTime();

final ScheduledFuture<?> timerFuture = getProcessingTimeService().registerTimer(
timeoutTimestamp,
new ProcessingTimeCallback() {
@Override
public void onProcessingTime(long timestamp) throws Exception {
userFunction.timeout(element.getValue(), streamRecordBufferEntry);
}
});

// Cancel the timer once we‘ve completed the stream record buffer entry. This will remove
// the register trigger task
streamRecordBufferEntry.onComplete(
(StreamElementQueueEntry<www.gxgjpt1.com Collection<OUT>> value) -> {
timerFuture.cancel(true);
},
executor);
}

addAsyncBufferEntry(streamRecordBufferEntry);

userFunction.asyncInvoke(element.getValue(), streamRecordBufferEntry);
}

@Override
public void processWatermark(Watermark mark) throws Exception {
WatermarkQueueEntry watermarkBufferEntry = new WatermarkQueueEntry(mark);

addAsyncBufferEntry(watermarkBufferEntry);
}

@Override
public void snapshotState(StateSnapshotContext www.boyunylpt1.com context) throws Exception {
super.snapshotState(context);

ListState<StreamElement> partitionableState =
getOperatorStateBackend().getListState(new ListStateDescriptor<>(STATE_NAME, inStreamElementSerializer));
partitionableState.clear();

Collection<StreamElementQueueEntry<www.gcyL157.com>> values = queue.values();

try {
for (StreamElementQueueEntry<?> value : values) {
partitionableState.add(value.getStreamElement());
}

// add the pending stream element queue entry if the stream element queue is currently full
if (pendingStreamElementQueueEntry != null) {
partitionableState.add(pendingStreamElementQueueEntry.getStreamElement());
}
} catch (Exception e) {
partitionableState.clear(www.mingrenf178.com );

throw new Exception("Could not add stream element queue entries to operator state " +
"backend of operator " + getOperatorName() + ‘.‘, e);
}
}

@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
recoveredStreamElements = context
.getOperatorStateStore()
.getListState(new ListStateDescriptor<>(STATE_NAME, inStreamElementSerializer));

}

@Override
public void close() throws Exception {
try {
assert(Thread.holdsLock(checkpointingLock));

while (!queue.isEmpty()) {
// wait for the emitter thread to output the remaining elements
// for that he needs the checkpointing lock and thus we have to free it
checkpointingLock.wait();
}
}
finally {
Exception exception = null;

try {
super.close();
} catch (InterruptedException interrupted) {
exception = interrupted;

Thread.currentThread().interrupt();
} catch (Exception e) {
exception = e;
}

try {
// terminate the emitter, the emitter thread and the executor
stopResources(true);
} catch (InterruptedException interrupted) {
exception = ExceptionUtils.firstOrSuppressed(interrupted, exception);

Thread.currentThread().interrupt();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}

if (exception != null) {
LOG.warn("Errors occurred while closing the AsyncWaitOperator.", exception);
}
}
}

@Override
public void dispose() throws Exception {
Exception exception = null;

try {
super.dispose();
} catch (InterruptedException interrupted) {
exception = interrupted;

Thread.currentThread().interrupt();
} catch (Exception e) {
exception = e;
}

try {
stopResources(false);
} catch (InterruptedException interrupted) {
exception = ExceptionUtils.firstOrSuppressed(interrupted, exception);

Thread.currentThread().interrupt();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}

if (exception != null) {
throw exception;
}
}

private void stopResources(boolean waitForShutdown) throws InterruptedException {
emitter.stop();
emitterThread.interrupt();

executor.shutdown();

if (waitForShutdown) {
try {
if (!executor.awaitTermination(365L, TimeUnit.DAYS)) {
executor.shutdownNow();
}
} catch (InterruptedException e) {
executor.shutdownNow();

Thread.currentThread().interrupt();
}

/*
* FLINK-5638: If we have the checkpoint lock we might have to free it for a while so
* that the emitter thread can complete/react to the interrupt signal.
*/
if (Thread.holdsLock(checkpointingLock)) {
while (emitterThread.isAlive()) {
checkpointingLock.wait(100L);
}
}

emitterThread.join();
} else {
executor.shutdownNow();
}
}

private <T> void addAsyncBufferEntry(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException {
assert(Thread.holdsLock(checkpointingLock));

pendingStreamElementQueueEntry = streamElementQueueEntry;

while (!queue.tryPut(streamElementQueueEntry)) {
// we wait for the emitter to notify us if the queue has space left again
checkpointingLock.wait();
}

pendingStreamElementQueueEntry = null;
}

@Override
public void failOperator(Throwable throwable) {
getContainingTask().getEnvironment().failExternally(throwable);
}
}
AsyncWaitOperator继承了AbstractUdfStreamOperator,覆盖了AbstractUdfStreamOperator的setup、open、initializeState、close、dispose方法;实现了OneInputStreamOperator接口定义的processElement、processWatermark、processLatencyMarker方法;实现了OperatorActions定义的failOperator方法
setup方法使用Executors.newSingleThreadExecutor()创建了ExecutorService,之后根据不同的outputMode创建不同的StreamElementQueue(OrderedStreamElementQueue或者UnorderedStreamElementQueue);open方法使用Emitter创建并启动AsyncIO-Emitter-Thread,另外就是处理recoveredStreamElements,根据不同的类型分别调用processElement、processWatermark、processLatencyMarker方法
processElement方法首先根据timeout注册一个timer,在ProcessingTimeCallback的onProcessingTime方法里头执行userFunction.timeout,之后将StreamRecordQueueEntry添加到StreamElementQueue中,最后触发userFunction.asyncInvoke;close和dispose方法会调用stopResources方法来关闭资源,不同的是waitForShutdown参数传值不同,close方法传true,而dispose方法传false
Emitter
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/Emitter.java

@Internal
public class Emitter<OUT> implements Runnable {

private static final Logger LOG = LoggerFactory.getLogger(Emitter.class);

/** Lock to hold before outputting. */
private final Object checkpointLock;

/** Output for the watermark elements. */
private final Output<StreamRecord<OUT>> output;

/** Queue to consume the async results from. */
private final StreamElementQueue streamElementQueue;

private final OperatorActions operatorActions;

/** Output for stream records. */
private final TimestampedCollector<OUT> timestampedCollector;

private volatile boolean running;

public Emitter(
final Object checkpointLock,
final Output<StreamRecord<OUT>> output,
final StreamElementQueue streamElementQueue,
final OperatorActions operatorActions) {

this.checkpointLock = Preconditions.checkNotNull(checkpointLock, "checkpointLock");
this.output = Preconditions.checkNotNull(output, "output");
this.streamElementQueue = Preconditions.checkNotNull(streamElementQueue, "streamElementQueue");
this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions");

this.timestampedCollector = new TimestampedCollector<>(this.output);
this.running = true;
}

@Override
public void run() {
try {
while (running) {
LOG.debug("Wait for next completed async stream element result.");
AsyncResult streamElementEntry = streamElementQueue.peekBlockingly();

output(streamElementEntry);
}
} catch (InterruptedException e) {
if (running) {
operatorActions.failOperator(e);
} else {
// Thread got interrupted which means that it should shut down
LOG.debug("Emitter thread got interrupted, shutting down.");
}
} catch (Throwable t) {
operatorActions.failOperator(new Exception("AsyncWaitOperator‘s emitter caught an " +
"unexpected throwable.", t));
}
}

private void output(AsyncResult asyncResult) throws InterruptedException {
if (asyncResult.isWatermark()) {
synchronized (checkpointLock) {
AsyncWatermarkResult asyncWatermarkResult = asyncResult.asWatermark();

LOG.debug("Output async watermark.");
output.emitWatermark(asyncWatermarkResult.getWatermark());

// remove the peeked element from the async collector buffer so that it is no longer
// checkpointed
streamElementQueue.poll();

// notify the main thread that there is again space left in the async collector
// buffer
checkpointLock.notifyAll();
}
} else {
AsyncCollectionResult<OUT> streamRecordResult = asyncResult.asResultCollection();

if (streamRecordResult.hasTimestamp()) {
timestampedCollector.setAbsoluteTimestamp(streamRecordResult.getTimestamp());
} else {
timestampedCollector.eraseTimestamp();
}

synchronized (checkpointLock) {
LOG.debug("Output async stream element collection result.");

try {
Collection<OUT> resultCollection = streamRecordResult.get();

if (resultCollection != null) {
for (OUT result : resultCollection) {
timestampedCollector.collect(result);
}
}
} catch (Exception e) {
operatorActions.failOperator(
new Exception("An async function call terminated with an exception. " +
"Failing the AsyncWaitOperator.", e));
}

// remove the peeked element from the async collector buffer so that it is no longer
// checkpointed
streamElementQueue.poll();

// notify the main thread that there is again space left in the async collector
// buffer
checkpointLock.notifyAll();
}
}
}

public void stop() {
running = false;
}
}
Emitter实现了Runnable接口,它主要负责从StreamElementQueue取出element,然后输出到TimestampedCollector
Emitter的run方法就是不断循环调用streamElementQueue.peekBlockingly()阻塞获取AsyncResult,获取到之后就调用output方法将result输出出去
Emitter的output方法根据asyncResult是否是watermark做不同处理,不是watermark的话,就会将result通过timestampedCollector.collect输出,如果出现异常则调用operatorActions.failOperator传递异常,最后调用streamElementQueue.poll()来移除队首的元素
StreamElementQueue
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueue.java

@Internal
public interface StreamElementQueue {

<T> void put(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException;

<T> boolean tryPut(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException;

AsyncResult peekBlockingly() throws InterruptedException;

AsyncResult poll() throws InterruptedException;

Collection<StreamElementQueueEntry<?>> values() throws InterruptedException;

boolean isEmpty();

int size();
}
StreamElementQueue接口主要定义了AsyncWaitOperator所要用的blocking stream element queue的接口;它定义了put、tryPut、peekBlockingly、poll、values、isEmpty、size方法;StreamElementQueue接口有两个子类分别是UnorderedStreamElementQueue及OrderedStreamElementQueue;队列元素类型为StreamElementQueueEntry
UnorderedStreamElementQueue
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueue.java

@Internal
public class UnorderedStreamElementQueue implements StreamElementQueue {

private static final Logger LOG = LoggerFactory.getLogger(UnorderedStreamElementQueue.class);

/** Capacity of this queue. */
private final int capacity;

/** Executor to run the onComplete callbacks. */
private final Executor executor;

/** OperatorActions to signal the owning operator a failure. */
private final OperatorActions operatorActions;

/** Queue of uncompleted stream element queue entries segmented by watermarks. */
private final ArrayDeque<Set<StreamElementQueueEntry<?>>> uncompletedQueue;

/** Queue of completed stream element queue entries. */
private final ArrayDeque<StreamElementQueueEntry<?>> completedQueue;

/** First (chronologically oldest) uncompleted set of stream element queue entries. */
private Set<StreamElementQueueEntry<?>> firstSet;

// Last (chronologically youngest) uncompleted set of stream element queue entries. New
// stream element queue entries are inserted into this set.
private Set<StreamElementQueueEntry<?>> lastSet;
private volatile int numberEntries;

/** Locks and conditions for the blocking queue. */
private final ReentrantLock lock;
private final Condition notFull;
private final Condition hasCompletedEntries;

public UnorderedStreamElementQueue(
int capacity,
Executor executor,
OperatorActions operatorActions) {

Preconditions.checkArgument(capacity > 0, "The capacity must be larger than 0.");
this.capacity = capacity;

this.executor = Preconditions.checkNotNull(executor, "executor");

this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions");

this.uncompletedQueue = new ArrayDeque<>(capacity);
this.completedQueue = new ArrayDeque<>(capacity);

this.firstSet = new HashSet<>(capacity);
this.lastSet = firstSet;

this.numberEntries = 0;

this.lock = new ReentrantLock();
this.notFull = lock.newCondition();
this.hasCompletedEntries = lock.newCondition();
}

@Override
public <T> void put(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
while (numberEntries >= capacity) {
notFull.await();
}

addEntry(streamElementQueueEntry);
} finally {
lock.unlock();
}
}

@Override
public <T> boolean tryPut(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
if (numberEntries < capacity) {
addEntry(streamElementQueueEntry);

LOG.debug("Put element into unordered stream element queue. New filling degree " +
"({}/{}).", numberEntries, capacity);

return true;
} else {
LOG.debug("Failed to put element into unordered stream element queue because it " +
"was full ({}/{}).", numberEntries, capacity);

return false;
}
} finally {
lock.unlock();
}
}

@Override
public AsyncResult peekBlockingly() throws InterruptedException {
lock.lockInterruptibly();

try {
while (completedQueue.isEmpty()) {
hasCompletedEntries.await();
}

LOG.debug("Peeked head element from unordered stream element queue with filling degree " +
"({}/{}).", numberEntries, capacity);

return completedQueue.peek();
} finally {
lock.unlock();
}
}

@Override
public AsyncResult poll() throws InterruptedException {
lock.lockInterruptibly();

try {
while (completedQueue.isEmpty()) {
hasCompletedEntries.await();
}

numberEntries--;
notFull.signalAll();

LOG.debug("Polled element from unordered stream element queue. New filling degree " +
"({}/{}).", numberEntries, capacity);

return completedQueue.poll();
} finally {
lock.unlock();
}
}

@Override
public Collection<StreamElementQueueEntry<?>> values() throws InterruptedException {
lock.lockInterruptibly();

try {
StreamElementQueueEntry<?>[] array = new StreamElementQueueEntry[numberEntries];

array = completedQueue.toArray(array);

int counter = completedQueue.size();

for (StreamElementQueueEntry<?> entry: firstSet) {
array[counter] = entry;
counter++;
}

for (Set<StreamElementQueueEntry<?>> asyncBufferEntries : uncompletedQueue) {

for (StreamElementQueueEntry<?> streamElementQueueEntry : asyncBufferEntries) {
array[counter] = streamElementQueueEntry;
counter++;
}
}

return Arrays.asList(array);
} finally {
lock.unlock();
}
}

@Override
public boolean isEmpty() {
return numberEntries == 0;
}

@Override
public int size() {
return numberEntries;
}

public void onCompleteHandler(StreamElementQueueEntry<?> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
if (firstSet.remove(streamElementQueueEntry)) {
completedQueue.offer(streamElementQueueEntry);

while (firstSet.isEmpty() && firstSet != lastSet) {
firstSet = uncompletedQueue.poll();

Iterator<StreamElementQueueEntry<?>> it = firstSet.iterator();

while (it.hasNext()) {
StreamElementQueueEntry<?> bufferEntry = it.next();

if (bufferEntry.isDone()) {
completedQueue.offer(bufferEntry);
it.remove();
}
}
}

LOG.debug("Signal unordered stream element queue has completed entries.");
hasCompletedEntries.signalAll();
}
} finally {
lock.unlock();
}
}

private <T> void addEntry(StreamElementQueueEntry<T> streamElementQueueEntry) {
assert(lock.isHeldByCurrentThread());

if (streamElementQueueEntry.isWatermark()) {
lastSet = new HashSet<>(capacity);

if (firstSet.isEmpty()) {
firstSet.add(streamElementQueueEntry);
} else {
Set<StreamElementQueueEntry<?>> watermarkSet = new HashSet<>(1);
watermarkSet.add(streamElementQueueEntry);
uncompletedQueue.offer(watermarkSet);
}
uncompletedQueue.offer(lastSet);
} else {
lastSet.add(streamElementQueueEntry);
}

streamElementQueueEntry.onComplete(
(StreamElementQueueEntry<T> value) -> {
try {
onCompleteHandler(value);
} catch (InterruptedException e) {
// The accept executor thread got interrupted. This is probably cause by
// the shutdown of the executor.
LOG.debug("AsyncBufferEntry could not be properly completed because the " +
"executor thread has been interrupted.", e);
} catch (Throwable t) {
operatorActions.failOperator(new Exception("Could not complete the " +
"stream element queue entry: " + value + ‘.‘, t));
}
},
executor);

numberEntries++;
}
}
UnorderedStreamElementQueue实现了StreamElementQueue接口,它emit结果的顺序是无序的,其内部使用了两个ArrayDeque,一个是uncompletedQueue,一个是completedQueue
peekBlockingly方法首先判断completedQueue是否有元素,没有的话则执行hasCompletedEntries.await(),有则执行completedQueue.peek();put及tryPut都会调用addEntry方法,该方法会往uncompletedQueue队列新增元素,然后同时给每个streamElementQueueEntry的onComplete方法注册一个onCompleteHandler
onCompleteHandler方法会将执行完成的streamElementQueueEntry从uncompletedQueue移除,然后添加到completedQueue
OrderedStreamElementQueue
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueue.java

@Internal
public class OrderedStreamElementQueue implements StreamElementQueue {

private static final Logger LOG = LoggerFactory.getLogger(OrderedStreamElementQueue.class);

/** Capacity of this queue. */
private final int capacity;

/** Executor to run the onCompletion callback. */
private final Executor executor;

/** Operator actions to signal a failure to the operator. */
private final OperatorActions operatorActions;

/** Lock and conditions for the blocking queue. */
private final ReentrantLock lock;
private final Condition notFull;
private final Condition headIsCompleted;

/** Queue for the inserted StreamElementQueueEntries. */
private final ArrayDeque<StreamElementQueueEntry<?>> queue;

public OrderedStreamElementQueue(
int capacity,
Executor executor,
OperatorActions operatorActions) {

Preconditions.checkArgument(capacity > 0, "The capacity must be larger than 0.");
this.capacity = capacity;

this.executor = Preconditions.checkNotNull(executor, "executor");

this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions");

this.lock = new ReentrantLock(false);
this.headIsCompleted = lock.newCondition();
this.notFull = lock.newCondition();

this.queue = new ArrayDeque<>(capacity);
}

@Override
public AsyncResult peekBlockingly() throws InterruptedException {
lock.lockInterruptibly();

try {
while (queue.isEmpty() || !queue.peek().isDone()) {
headIsCompleted.await();
}

LOG.debug("Peeked head element from ordered stream element queue with filling degree " +
"({}/{}).", queue.size(), capacity);

return queue.peek();
} finally {
lock.unlock();
}
}

@Override
public AsyncResult poll() throws InterruptedException {
lock.lockInterruptibly();

try {
while (queue.isEmpty() || !queue.peek().isDone()) {
headIsCompleted.await();
}

notFull.signalAll();

LOG.debug("Polled head element from ordered stream element queue. New filling degree " +
"({}/{}).", queue.size() - 1, capacity);

return queue.poll();
} finally {
lock.unlock();
}
}

@Override
public Collection<StreamElementQueueEntry<?>> values() throws InterruptedException {
lock.lockInterruptibly();

try {
StreamElementQueueEntry<?>[] array = new StreamElementQueueEntry[queue.size()];

array = queue.toArray(array);

return Arrays.asList(array);
} finally {
lock.unlock();
}
}

@Override
public boolean isEmpty() {
return queue.isEmpty();
}

@Override
public int size() {
return queue.size();
}

@Override
public <T> void put(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
while (queue.size() >= capacity) {
notFull.await();
}

addEntry(streamElementQueueEntry);
} finally {
lock.unlock();
}
}

@Override
public <T> boolean tryPut(StreamElementQueueEntry<T> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
if (queue.size() < capacity) {
addEntry(streamElementQueueEntry);

LOG.debug("Put element into ordered stream element queue. New filling degree " +
"({}/{}).", queue.size(), capacity);

return true;
} else {
LOG.debug("Failed to put element into ordered stream element queue because it " +
"was full ({}/{}).", queue.size(), capacity);

return false;
}
} finally {
lock.unlock();
}
}

private <T> void addEntry(StreamElementQueueEntry<T> streamElementQueueEntry) {
assert(lock.isHeldByCurrentThread());

queue.addLast(streamElementQueueEntry);

streamElementQueueEntry.onComplete(
(StreamElementQueueEntry<T> value) -> {
try {
onCompleteHandler(value);
} catch (InterruptedException e) {
// we got interrupted. This indicates a shutdown of the executor
LOG.debug("AsyncBufferEntry could not be properly completed because the " +
"executor thread has been interrupted.", e);
} catch (Throwable t) {
operatorActions.failOperator(new Exception("Could not complete the " +
"stream element queue entry: " + value + ‘.‘, t));
}
},
executor);
}

private void onCompleteHandler(StreamElementQueueEntry<?> streamElementQueueEntry) throws InterruptedException {
lock.lockInterruptibly();

try {
if (!queue.isEmpty() && queue.peek().isDone()) {
LOG.debug("Signal ordered stream element queue has completed head element.");
headIsCompleted.signalAll();
}
} finally {
lock.unlock();
}
}
}
OrderedStreamElementQueue实现了StreamElementQueue接口,它有序地emit结果,它内部有一个ArrayDeque类型的queue
peekBlockingly方法首先判断queue是否有元素而且是执行完成的,没有就执行headIsCompleted.await(),有则执行queue.peek();put及tryPut都会调用addEntry方法,该方法会执行queue.addLast(streamElementQueueEntry),然后同时给每个streamElementQueueEntry的onComplete方法注册一个onCompleteHandler
onCompleteHandler方法会检测执行完成的元素是否是队列的第一个元素,如果是则执行headIsCompleted.signalAll()
AsyncResult
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/AsyncResult.java

@Internal
public interface AsyncResult {

boolean isWatermark();

boolean isResultCollection();

AsyncWatermarkResult asWatermark();

<T> AsyncCollectionResult<T> asResultCollection();
}
AsyncResult接口定义了StreamElementQueue的元素异步返回的结果要实现的方法,该async result可能是watermark,可能是真正的结果
StreamElementQueueEntry
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueEntry.java

@Internal
public abstract class StreamElementQueueEntry<T> implements AsyncResult {

private final StreamElement streamElement;

public StreamElementQueueEntry(StreamElement streamElement) {
this.streamElement = Preconditions.checkNotNull(streamElement);
}

public StreamElement getStreamElement() {
return streamElement;
}

public boolean isDone() {
return getFuture().isDone();
}

public void onComplete(
final Consumer<StreamElementQueueEntry<T>> completeFunction,
Executor executor) {
final StreamElementQueueEntry<T> thisReference = this;

getFuture().whenCompleteAsync(
// call the complete function for normal completion as well as exceptional completion
// see FLINK-6435
(value, throwable) -> completeFunction.accept(thisReference),
executor);
}

protected abstract CompletableFuture<T> getFuture();

@Override
public final boolean isWatermark() {
return AsyncWatermarkResult.class.isAssignableFrom(getClass());
}

@Override
public final boolean isResultCollection() {
return AsyncCollectionResult.class.isAssignableFrom(getClass());
}

@Override
public final AsyncWatermarkResult asWatermark() {
return (AsyncWatermarkResult) this;
}

@Override
public final <T> AsyncCollectionResult<T> asResultCollection() {
return (AsyncCollectionResult<T>) this;
}
}
StreamElementQueueEntry实现了AsyncResult接口,它定义了onComplete方法用于结果完成时的回调处理,同时它还定义了抽象方法getFuture供子类实现;它有两个子类,分别是WatermarkQueueEntry及StreamRecordQueueEntry
WatermarkQueueEntry
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/WatermarkQueueEntry.java

@Internal
public class WatermarkQueueEntry extends StreamElementQueueEntry<Watermark> implements AsyncWatermarkResult {

private final CompletableFuture<Watermark> future;

public WatermarkQueueEntry(Watermark watermark) {
super(watermark);

this.future = CompletableFuture.completedFuture(watermark);
}

@Override
public Watermark getWatermark() {
return (Watermark) getStreamElement();
}

@Override
protected CompletableFuture<Watermark> getFuture() {
return future;
}
}
WatermarkQueueEntry继承了StreamElementQueueEntry,其元素类型为Watermark,同时实现了AsyncWatermarkResult接口
StreamRecordQueueEntry
flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/operators/async/queue/StreamRecordQueueEntry.java

@Internal
public class StreamRecordQueueEntry<OUT> extends StreamElementQueueEntry<Collection<OUT>>
implements AsyncCollectionResult<OUT>, ResultFuture<OUT> {

/** Timestamp information. */
private final boolean hasTimestamp;
private final long timestamp;

/** Future containing the collection result. */
private final CompletableFuture<Collection<OUT>> resultFuture;

public StreamRecordQueueEntry(StreamRecord<?> streamRecord) {
super(streamRecord);

hasTimestamp = streamRecord.hasTimestamp();
timestamp = streamRecord.getTimestamp();

resultFuture = new CompletableFuture<>();
}

@Override
public boolean hasTimestamp() {
return hasTimestamp;
}

@Override
public long getTimestamp() {
return timestamp;
}

@Override
public Collection<OUT> get() throws Exception {
return resultFuture.get();
}

@Override
protected CompletableFuture<Collection<OUT>> getFuture() {
return resultFuture;
}

@Override
public void complete(Collection<OUT> result) {
resultFuture.complete(result);
}

@Override
public void completeExceptionally(Throwable error) {
resultFuture.completeExceptionally(error);
}
}
StreamRecordQueueEntry继承了StreamElementQueueEntry,同时实现了AsyncCollectionResult、ResultFuture接口
小结
AsyncWaitOperator继承了AbstractUdfStreamOperator,覆盖了AbstractUdfStreamOperator的setup、open、initializeState、close、dispose方法;实现了OneInputStreamOperator接口定义的processElement、processWatermark、processLatencyMarker方法;实现了OperatorActions定义的failOperator方法;open方法使用Emitter创建并启动AsyncIO-Emitter-Thread
Emitter实现了Runnable接口,它主要负责从StreamElementQueue取出element,然后输出到TimestampedCollector;其run方法就是不断循环调用streamElementQueue.peekBlockingly()阻塞获取AsyncResult,获取到之后就调用output方法将result输出出去
StreamElementQueue接口主要定义了AsyncWaitOperator所要用的blocking stream element queue的接口;它定义了put、tryPut、peekBlockingly、poll、values、isEmpty、size方法;StreamElementQueue接口有两个子类分别是UnorderedStreamElementQueue及OrderedStreamElementQueue;队列元素类型为StreamElementQueueEntry,StreamElementQueueEntry实现了AsyncResult接口,它定义了onComplete方法用于结果完成时的回调处理,同时它还定义了抽象方法getFuture供子类实现;它有两个子类,分别是WatermarkQueueEntry及StreamRecordQueueEntry

原文地址:https://www.cnblogs.com/qwangxiao/p/10294404.html

时间: 2024-10-10 17:25:14

聊聊flink的AsyncWaitOperator的相关文章

聊聊flink的log.file配置

本文主要研究一下flink的log.file配置 log4j.properties flink-release-1.6.2/flink-dist/src/main/flink-bin/conf/log4j.properties # This affects logging for both user code and Flink log4j.rootLogger=INFO, file # Uncomment this if you want to _only_ change Flink's lo

聊聊flink的CsvTableSource

序 本文主要研究一下flink的CsvTableSource TableSource flink-table_2.11-1.7.1-sources.jar!/org/apache/flink/table/sources/TableSource.scala trait TableSource[T] { /** Returns the [[TypeInformation]] for the return type of the [[TableSource]]. * The fields of the

Flink与Spark Streaming在与kafka结合的区别!

本文主要是想聊聊flink与kafka结合.当然,单纯的介绍flink与kafka的结合呢,比较单调,也没有可对比性,所以的准备顺便帮大家简单回顾一下Spark Streaming与kafka的结合. 看懂本文的前提是首先要熟悉kafka,然后了解spark Streaming的运行原理及与kafka结合的两种形式,然后了解flink实时流的原理及与kafka结合的方式. kafka kafka作为一个消息队列,在企业中主要用于缓存数据,当然,也有人用kafka做存储系统,比如存最近七天的数据.

Apache流处理框架对比

分布式流处理,类似于MapReduce这样的通用计算模型,但是却要求它能够在毫秒级别或者秒级别完成响应.这些系统可以用DAG表示流处理的拓扑. Points of Interest 在比较不同系统是,可以参照如下几点 Runtime and Programming model(运行与编程模型) 一个平台提供的编程模型往往会决定很多它的特性,并且这个编程模型应该足够处理所有可能的用户案例. Functional Primitives(函数式单元) 一个合格的处理平台应该能够提供丰富的能够在独立信息

Flink - FLIP

https://cwiki.apache.org/confluence/display/FLINK/Flink+Improvement+Proposals   FLIP-1 : Fine Grained Recovery from Task Failures   When a task fails during execution, Flink currently resets the entire execution graph and triggers complete re-executi

深度剖析阿里巴巴对Apache Flink的优化与改进

本文主要从两个层面深度剖析:阿里巴巴对Flink究竟做了哪些优化? 取之开源,用之开源 一.SQL层 为了能够真正做到用户根据自己的业务逻辑开发一套代码,能够同时运行在多种不同的场景,Flink首先需要给用户提供一个统一的API.在经过一番调研之后,阿里巴巴实时计算认为SQL是一个非常适合的选择.在批处理领域,SQL已经经历了几十年的考验,是公认的经典.在流计算领域,近年来也不断有流表二象性.流是表的ChangeLog等理论出现.在这些理论基础之上,阿里巴巴提出了动态表的概念,使得流计算也可以像

Flink异步IO-实现原理(外文)

原文转至:https://docs.google.com/document/d/1Lr9UYXEz6s6R_3PWg3bZQLF3upGaNEkc0rQCFSzaYDI/edit# Asynchronous I/O Design and Implementation Motivation I/O access, for the most case, is a time-consuming process, making the TPS for single operator much lower

Flink 原理(六)——异步I/O(asynchronous I/O)

1.前言 本文是基于Flink官网上Asynchronous  I/O的介绍结合自己的理解写成的,若有不正确的欢迎大伙留言交流,谢谢! 2.Asynchronous  I/O简介 将Flink用于流计算时,若涉及到和外部系统进行交互,如利用Flink从数据库中读取数据,这种需要获取I/O的场景时,我们需要考虑交互所带来的时延问题. 为分析如何减少时延,我们先来分析一下,Flink以同步的形式方法外部系统(以MapFunction中和数据库交互为例)的过程,若图1虚线左侧所示,请求a发送到data

Flink资料(4) -- 类型抽取和序列化

类型抽取和序列化 本文翻译自Type Extraction and Serialization Flink处理类型的方式比较特殊,包括它自己的类型描述,一般类型抽取和类型序列化框架.该文档描述这些概念并解释其机理. Java API和Scala API处理类型信息的方式有根本性的区别,所以本文描述的问题仅与其中一种API相关 一.Flink中对类型的处理 一般处理类型时,我们并不干涉,而是让编程语言和序列化框架来自动处理类型.与之相反的,Flink想要尽可能掌握进出用户函数的数据类型的信息. 1