Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 1f7cbd1

Browse files
Separiusafrozenator
authored andcommitted
Merge of PR #1724
PiperOrigin-RevId: 281772522
1 parent c57ccbb commit 1f7cbd1

File tree

7 files changed

+15
-157
lines changed

7 files changed

+15
-157
lines changed

tensor2tensor/data_generators/text_problems.py

Lines changed: 0 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
1818
* Text2TextProblem: input=text, target=text.
1919
* Text2ClassProblem: input=text, target=class.
20-
* Text2RealProblem: input=text, target=float.
2120
* Text2SelfProblem (for language modeling): target=text
2221
* QuestionAndContext2TextProblem: input=text, context=text, target=text.
2322
@@ -606,94 +605,6 @@ def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
606605
yield {"inputs": inputs, "targets": [label]}
607606

608607

609-
class Text2RealProblem(Text2TextProblem):
610-
"""Base class for text regression problems with one or more tasks.
611-
Suitable for text-based problems where targets are continuous, real values.
612-
When ntasks = 1, each text example is mapped to a single scalar value. When
613-
ntasks > 1, each text example is mapped to a 1-d vector of length ntasks.
614-
"""
615-
616-
@property
617-
def ntasks(self):
618-
"""Set to n > 1 for multitask regression."""
619-
return 1
620-
621-
def generate_samples(self, data_dir, tmp_dir, dataset_split):
622-
"""Generate samples of text and real-valued target pairs.
623-
Each yielded dict will be a single example. The inputs should be raw text.
624-
The target should be a list containing ntasks floats.
625-
Args:
626-
data_dir: final data directory. Typically only used in this method to copy
627-
over user-supplied vocab files (for example, if vocab_type ==
628-
VocabType.TOKEN).
629-
tmp_dir: temporary directory that you can use for downloading and scratch.
630-
dataset_split: problem.DatasetSplit, which data split to generate samples
631-
for (for example, training and evaluation).
632-
Yields:
633-
{"inputs": text, "targets": [x1, x2, ..., xN]} where N is ntasks
634-
"""
635-
raise NotImplementedError()
636-
637-
def generate_text_for_vocab(self, data_dir, tmp_dir):
638-
for i, sample in enumerate(
639-
self.generate_samples(data_dir, tmp_dir, problem.DatasetSplit.TRAIN)):
640-
yield sample["inputs"]
641-
if self.max_samples_for_vocab and (i + 1) >= self.max_samples_for_vocab:
642-
break
643-
644-
def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
645-
generator = self.generate_samples(data_dir, tmp_dir, dataset_split)
646-
encoder = self.get_or_create_vocab(data_dir, tmp_dir)
647-
for sample in generator:
648-
inputs = encoder.encode(sample["inputs"])
649-
inputs.append(text_encoder.EOS_ID)
650-
yield {"inputs": inputs, "targets": sample["targets"]}
651-
652-
def feature_encoders(self, data_dir):
653-
encoder = self.get_or_create_vocab(data_dir, None, force_get=True)
654-
655-
return {
656-
"inputs": encoder,
657-
"targets": text_encoder.RealEncoder(),
658-
}
659-
660-
def hparams(self, defaults, unused_model_hparams):
661-
p = defaults
662-
p.modality = {
663-
"inputs": modalities.ModalityType.SYMBOL,
664-
"targets": modalities.ModalityType.REAL_L2_LOSS,
665-
}
666-
p.vocab_size = {
667-
"inputs": self._encoders["inputs"].vocab_size,
668-
"targets": self.ntasks
669-
}
670-
p.target_space_id = problem.SpaceID.REAL
671-
p.add_hparam("regression_targets", True)
672-
673-
def max_length(self, model_hparams):
674-
return model_hparams.batch_size * self.ntasks
675-
676-
def preprocess_example(self, example, unused_mode, unused_hparams):
677-
example = problem.preprocess_example_common(example, unused_mode,
678-
unused_hparams)
679-
example["targets"] = tf.reshape(example["targets"], [1, 1, self.ntasks])
680-
return example
681-
682-
def example_reading_spec(self):
683-
data_fields = {
684-
"inputs": tf.VarLenFeature(tf.int64),
685-
"targets": tf.FixedLenFeature([self.ntasks], tf.float32),
686-
}
687-
data_items_to_decoders = None
688-
return (data_fields, data_items_to_decoders)
689-
690-
def eval_metrics(self):
691-
metrics_list = [metrics.Metrics.RMSE]
692-
if self.ntasks == 1:
693-
metrics_list.append(metrics.Metrics.PEARSON)
694-
return metrics_list
695-
696-
697608
def txt_line_iterator(txt_path):
698609
"""Iterate through lines of file."""
699610
with tf.gfile.Open(txt_path) as f:
@@ -781,21 +692,6 @@ def text2class_txt_iterator(source_txt_path, label_txt_path, class_strs=None):
781692
yield {"inputs": inputs, "label": label}
782693

783694

784-
def text2real_txt_iterator(source_txt_path, target_txt_path):
785-
"""Yield dicts for Text2RealProblem.generate_samples from lines of files.
786-
Args:
787-
source_txt_path: txt file with record per line.
788-
target_txt_path: txt file with float (or space-separated float list for
789-
multitask) per line.
790-
Yields:
791-
{"inputs": inputs, "targets": targets}
792-
"""
793-
for inputs, targets in zip(
794-
txt_line_iterator(source_txt_path), txt_line_iterator(target_txt_path)):
795-
targets = [float(x) for x in targets.split(" ")]
796-
yield {"inputs": inputs, "targets": targets}
797-
798-
799695
def text2text_txt_tab_iterator(txt_path):
800696
"""Yield dicts for Text2TextProblem.generate_samples from lines of txt_path.
801697

tensor2tensor/data_generators/text_problems_test.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ def setUpClass(cls):
9494
tf.gfile.Copy(cls.targets_file, os.path.join(cls.tmp_dir,
9595
"targets.eval.txt"))
9696

97-
cls.targets_regr = [[1.23, 2.34], [4.56, 5.67]]
98-
cls.targets_regr_file = os.path.join(cls.tmp_dir, "targets_regr.train.txt")
99-
with tf.gfile.Open(cls.targets_regr_file, "w") as f:
100-
for targets in cls.targets_regr:
101-
f.write(" ".join([str(x) for x in targets]) + "\n")
102-
103-
10497
def testTxtLineIterator(self):
10598
lines = [line for line in text_problems.txt_line_iterator(self.inputs_file)]
10699
self.assertEqual(lines, self.inputs)
@@ -143,16 +136,6 @@ def testText2ClassTxtIteratorWithStrs(self):
143136
self.assertEqual(inputs, self.inputs)
144137
self.assertEqual(labels, self.labels)
145138

146-
def testText2RealTxtIterator(self):
147-
inputs = []
148-
targets = []
149-
for entry in text_problems.text2real_txt_iterator(self.inputs_file,
150-
self.targets_regr_file):
151-
inputs.append(entry["inputs"])
152-
targets.append(entry["targets"])
153-
self.assertEqual(inputs, self.inputs)
154-
self.assertEqual(targets, self.targets_regr)
155-
156139
def testText2TextTxtTabIterator(self):
157140
inputs = []
158141
targets = []

tensor2tensor/models/image_transformer_2d_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def _test_img2img_transformer(self, net):
3535
hparams = image_transformer_2d.img2img_transformer2d_tiny()
3636
hparams.data_dir = ""
3737
p_hparams = registry.problem("image_celeba").get_hparams(hparams)
38-
inputs = np.random.randint(256, size=(3, 4, 4, 3))
39-
targets = np.random.randint(256, size=(3, 8, 8, 3))
38+
inputs = np.random.randint(256, size=(batch_size, 4, 4, 3))
39+
targets = np.random.randint(256, size=(batch_size, 8, 8, 3))
4040
with self.test_session() as session:
4141
features = {
4242
"inputs": tf.constant(inputs, dtype=tf.int32),

tensor2tensor/models/research/universal_transformer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ def universal_transformer_base():
458458
@registry.register_hparams
459459
def universal_transformer_base_tpu():
460460
hparams = universal_transformer_base()
461+
hparams = update_hparams_for_universal_transformer(hparams)
461462
transformer.update_hparams_for_tpu(hparams)
462463
hparams.add_step_timing_signal = False
463464
return hparams
@@ -466,6 +467,7 @@ def universal_transformer_base_tpu():
466467
@registry.register_hparams
467468
def universal_transformer_big():
468469
hparams = universal_transformer_base()
470+
hparams = update_hparams_for_universal_transformer(hparams)
469471
hparams.hidden_size = 2048
470472
hparams.filter_size = 8192
471473
return hparams

tensor2tensor/models/transformer.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,7 @@ def _fast_decode_tpu(self,
462462

463463
if self.has_input:
464464
inputs_shape = common_layers.shape_list(features["inputs"])
465-
if (target_modality == modalities.ModalityType.CLASS_LABEL or
466-
self._problem_hparams.get("regression_targets")):
465+
if target_modality == modalities.ModalityType.CLASS_LABEL:
467466
decode_length = 1
468467
else:
469468
decode_length = (
@@ -705,8 +704,7 @@ def _fast_decode(self,
705704
" of the dataset when decoding.")
706705
if self.has_input:
707706
inputs_shape = common_layers.shape_list(features["inputs"])
708-
if (target_modality == modalities.ModalityType.CLASS_LABEL or
709-
self._problem_hparams.get("regression_targets")):
707+
if target_modality == modalities.ModalityType.CLASS_LABEL:
710708
decode_length = 1
711709
else:
712710
decode_length = (
@@ -865,15 +863,9 @@ def symbols_to_logits_fn(ids, i, cache):
865863
vocab_size = tf.shape(ret)[1]
866864

867865
def forced_logits():
868-
# Workaround for: tf.one_hot(
869-
# tf.repeat(partial_targets[:, i], [beam_size]), vocab_size, 0.0,
870-
# -1e9)
871-
# Can be replaced by the above in future versions (from tf 1.15).
872866
return tf.one_hot(
873-
tf.reshape(tf.tile(
874-
tf.reshape(partial_targets[:, i], [-1, 1]),
875-
[1, beam_size]), [-1]),
876-
vocab_size, 0.0, -1e9)
867+
tf.tile(partial_targets[:, i], [beam_size]), vocab_size, 0.0,
868+
-1e9)
877869

878870
ret = tf.cond(
879871
tf.less(i, partial_targets_length), forced_logits, lambda: ret)
@@ -1176,6 +1168,9 @@ def fast_decode(encoder_output,
11761168
"scores": decoding log probs from the beam search,
11771169
None if using greedy decoding (beam_size=1)
11781170
}
1171+
1172+
Raises:
1173+
NotImplementedError: If beam size > 1 with partial targets.
11791174
"""
11801175
if encoder_output is not None:
11811176
batch_size = common_layers.shape_list(encoder_output)[0]

tensor2tensor/utils/decoding.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -927,13 +927,6 @@ def _interactive_input_tensor_to_features_dict(feature_map, hparams):
927927
features["decode_length"] = (
928928
IMAGE_DECODE_LENGTH if input_is_image else inputs[1])
929929
features["inputs"] = x
930-
# Save inputs to "partial_targets" when prepending inputs to targets. Also
931-
# keep "inputs" as some models crash if they don't exist.
932-
if getattr(hparams, "prepend_mode", "none") != "none":
933-
shape = tf.shape(x)
934-
partial_targets = tf.reshape(x, [shape[0], shape[1]])
935-
partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]])
936-
features["partial_targets"] = partial_targets
937930
return features
938931

939932

@@ -964,13 +957,6 @@ def _decode_input_tensor_to_features_dict(feature_map, hparams):
964957
features["decode_length"] = (
965958
IMAGE_DECODE_LENGTH if input_is_image else tf.shape(x)[1] + 50)
966959
features["inputs"] = x
967-
# Save inputs to "partial_targets" when prepending inputs to targets. Also
968-
# keep "inputs" as some models crash if they don't exist.
969-
if getattr(hparams, "prepend_mode", "none") != "none":
970-
shape = tf.shape(x)
971-
partial_targets = tf.reshape(x, [shape[0], shape[1]])
972-
partial_targets = tf.pad(partial_targets, [[0, 0], [0, 1]])
973-
features["partial_targets"] = partial_targets
974960
return features
975961

976962

tensor2tensor/utils/t2t_model.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -806,10 +806,8 @@ def infer(self,
806806

807807
if self._problem_hparams:
808808
target_modality = self._problem_hparams.modality["targets"]
809-
if (target_modality == modalities.ModalityType.CLASS_LABEL or
810-
self._problem_hparams.get("regression_targets")):
811-
# No use to run beam-search for classification or regression.
812-
beam_size = 1
809+
if target_modality == modalities.ModalityType.CLASS_LABEL:
810+
beam_size = 1 # No use to run beam-search for a single class.
813811
if beam_size == 1:
814812
log_info("Greedy Decoding")
815813
results = self._greedy_infer(features, decode_length, use_tpu)
@@ -1066,8 +1064,7 @@ def infer_step(i, recent_output, recent_logits, unused_loss):
10661064
initial_output = tf.slice(initial_output, [0, 0, 0, 0],
10671065
common_layers.shape_list(initial_output))
10681066
target_modality = self._problem_hparams.modality["targets"]
1069-
if (target_modality == modalities.ModalityType.CLASS_LABEL or
1070-
self._problem_hparams.get("regression_targets")):
1067+
if target_modality == modalities.ModalityType.CLASS_LABEL:
10711068
decode_length = 1
10721069
else:
10731070
if "partial_targets" in features:
@@ -1246,8 +1243,7 @@ def infer_step(recent_output, recent_logits, unused_loss):
12461243
initial_output = tf.slice(initial_output, [0, 0, 0, 0],
12471244
common_layers.shape_list(initial_output))
12481245
target_modality = self._problem_hparams.modality["targets"]
1249-
if (target_modality == modalities.ModalityType.CLASS_LABEL or
1250-
self._problem_hparams.get("regression_targets")):
1246+
if target_modality == modalities.ModalityType.CLASS_LABEL:
12511247
decode_length = 1
12521248
else:
12531249
if "partial_targets" in features:

0 commit comments

Comments
 (0)