-
Notifications
You must be signed in to change notification settings - Fork 64
[NEMO-472] Implement Intermediate Combine #318
base: master
Are you sure you want to change the base?
Changes from 12 commits
28adec8
fb1ca4f
c97de05
f5b139d
60537ba
2e871b2
0a3f0ab
9fe50a0
c5d6f9c
32ebe2c
4f137a5
1392950
347aa77
551c159
db409f5
5fc6867
4fbf06b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.nemo.common.ir.vertex.executionproperty; | ||
|
|
||
| import org.apache.nemo.common.ir.executionproperty.VertexExecutionProperty; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.HashSet; | ||
|
|
||
| /** | ||
| * List of set of node names to limit the scheduling of the tasks of the vertex to while shuffling. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This explanation is not clear to me. Does this property set the destination executor for the output of intermediate vertex? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It limits the sources of the data that each task reads from, depending on where the task is located at. I'll add the explanation. |
||
| */ | ||
| public final class ShuffleExecutorSetProperty extends VertexExecutionProperty<ArrayList<HashSet<String>>> { | ||
wonook marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
wonook marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| /** | ||
| * Default constructor. | ||
| * @param value value of the execution property. | ||
| */ | ||
| private ShuffleExecutorSetProperty(final ArrayList<HashSet<String>> value) { | ||
wonook marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| super(value); | ||
| } | ||
|
|
||
| /** | ||
| * Static method for constructing {@link ShuffleExecutorSetProperty}. | ||
| * | ||
| * @param setsOfExecutors the list of executors to schedule the tasks of the vertex on. | ||
| * Leave empty to make it effectless. | ||
| * @return the new execution property | ||
| */ | ||
| public static ShuffleExecutorSetProperty of(final HashSet<HashSet<String>> setsOfExecutors) { | ||
wonook marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return new ShuffleExecutorSetProperty(new ArrayList<>(setsOfExecutors)); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,9 +45,9 @@ | |
| * @param <InputT> input type | ||
| * @param <OutputT> output type | ||
| */ | ||
| public final class GBKTransform<K, InputT, OutputT> | ||
| public final class CombineTransform<K, InputT, OutputT> | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain why the class name is changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's because GroupByKey Transform in Beam semantics can be represented as Combine PerKey Transform, and I thought that this class represents Combine PerKey rather than GroupByKey. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I know, GroupByKey Transform is not always be represented as Combine PerKey Transform, so changing the name is confusing to me. For instance, CoGroupByKey is not combining, but it is also represented as GroupByKey as far as I know. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We actually have a separate GroupByKeyTransform. We have renamed the GBKTransform since it actually works as a CombineTransform. It was misnamed in the first place. |
||
| extends AbstractDoFnTransform<KV<K, InputT>, KeyedWorkItem<K, InputT>, KV<K, OutputT>> { | ||
| private static final Logger LOG = LoggerFactory.getLogger(GBKTransform.class.getName()); | ||
| private static final Logger LOG = LoggerFactory.getLogger(CombineTransform.class.getName()); | ||
| private final SystemReduceFn reduceFn; | ||
| private transient InMemoryTimerInternalsFactory<K> inMemoryTimerInternalsFactory; | ||
| private transient InMemoryStateInternalsFactory<K> inMemoryStateInternalsFactory; | ||
|
|
@@ -57,16 +57,31 @@ public final class GBKTransform<K, InputT, OutputT> | |
| private boolean dataReceived = false; | ||
| private transient OutputCollector originOc; | ||
| private final boolean isPartialCombining; | ||
| private final CombineTransform intermediateCombine; | ||
|
|
||
| public GBKTransform(final Coder<KV<K, InputT>> inputCoder, | ||
| final Map<TupleTag<?>, Coder<?>> outputCoders, | ||
| final TupleTag<KV<K, OutputT>> mainOutputTag, | ||
| final WindowingStrategy<?, ?> windowingStrategy, | ||
| final PipelineOptions options, | ||
| final SystemReduceFn reduceFn, | ||
| final DoFnSchemaInformation doFnSchemaInformation, | ||
| final DisplayData displayData, | ||
| final boolean isPartialCombining) { | ||
| public CombineTransform(final Coder<KV<K, InputT>> inputCoder, | ||
| final Map<TupleTag<?>, Coder<?>> outputCoders, | ||
| final TupleTag<KV<K, OutputT>> mainOutputTag, | ||
| final WindowingStrategy<?, ?> windowingStrategy, | ||
| final PipelineOptions options, | ||
| final SystemReduceFn reduceFn, | ||
| final DoFnSchemaInformation doFnSchemaInformation, | ||
| final DisplayData displayData, | ||
| final boolean isPartialCombining) { | ||
| this(inputCoder, outputCoders, mainOutputTag, windowingStrategy, options, reduceFn, | ||
| doFnSchemaInformation, displayData, isPartialCombining, null); | ||
| } | ||
|
|
||
| public CombineTransform(final Coder<KV<K, InputT>> inputCoder, | ||
| final Map<TupleTag<?>, Coder<?>> outputCoders, | ||
| final TupleTag<KV<K, OutputT>> mainOutputTag, | ||
| final WindowingStrategy<?, ?> windowingStrategy, | ||
| final PipelineOptions options, | ||
| final SystemReduceFn reduceFn, | ||
| final DoFnSchemaInformation doFnSchemaInformation, | ||
| final DisplayData displayData, | ||
| final boolean isPartialCombining, | ||
| final CombineTransform intermediateCombine) { | ||
| super(null, | ||
| inputCoder, | ||
| outputCoders, | ||
|
|
@@ -80,6 +95,7 @@ public GBKTransform(final Coder<KV<K, InputT>> inputCoder, | |
| Collections.emptyMap()); /* does not have side inputs */ | ||
| this.reduceFn = reduceFn; | ||
| this.isPartialCombining = isPartialCombining; | ||
| this.intermediateCombine = intermediateCombine; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -272,6 +288,13 @@ public boolean getIsPartialCombining() { | |
| return isPartialCombining; | ||
| } | ||
|
|
||
| /** | ||
| * Get the intermediate combine transform if possible. | ||
| * @return the intermediate transform if possible. | ||
| */ | ||
| public Optional<CombineTransform> getIntermediateCombine() { | ||
| return Optional.ofNullable(intermediateCombine); | ||
| } | ||
|
|
||
| /** Wrapper class for {@link OutputCollector}. */ | ||
| public class GBKOutputCollector implements OutputCollector<WindowedValue<KV<K, OutputT>>> { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.