diff --git a/embeddings/Skip-Grams-Solution.ipynb b/embeddings/Skip-Grams-Solution.ipynb index 4eb884b9e9..5b9f96c927 100644 --- a/embeddings/Skip-Grams-Solution.ipynb +++ b/embeddings/Skip-Grams-Solution.ipynb @@ -453,1216 +453,11 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": null, "metadata": { - "scrolled": true + "scrolled": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10 Iteration: 100 Avg. Training loss: 5.6559 0.1018 sec/batch\n", - "Epoch 1/10 Iteration: 200 Avg. Training loss: 5.6093 0.1028 sec/batch\n", - "Epoch 1/10 Iteration: 300 Avg. Training loss: 5.5315 0.1023 sec/batch\n", - "Epoch 1/10 Iteration: 400 Avg. Training loss: 5.5730 0.1030 sec/batch\n", - "Epoch 1/10 Iteration: 500 Avg. Training loss: 5.5062 0.1014 sec/batch\n", - "Epoch 1/10 Iteration: 600 Avg. Training loss: 5.5396 0.1025 sec/batch\n", - "Epoch 1/10 Iteration: 700 Avg. Training loss: 5.5646 0.1033 sec/batch\n", - "Epoch 1/10 Iteration: 800 Avg. Training loss: 5.5273 0.1035 sec/batch\n", - "Epoch 1/10 Iteration: 900 Avg. Training loss: 5.5067 0.1030 sec/batch\n", - "Epoch 1/10 Iteration: 1000 Avg. Training loss: 5.4201 0.0999 sec/batch\n", - "Nearest to for: hoffman, rogue, jehoiakim, montinari, aldington, silos, explains, ilayaraja,\n", - "Nearest to would: louisiane, lampoon, albertina, bottle, olin, allahabad, disobey, tcl,\n", - "Nearest to known: homicide, intervening, tori, satrapies, mated, rtgs, lodbrok, assistants,\n", - "Nearest to used: contributing, brazil, institutionalization, ceilings, breed, gilchrist, superstitious, hawat,\n", - "Nearest to at: squaresoft, taya, buffalo, ferraris, poststructuralism, osiris, bathory, fina,\n", - "Nearest to such: expellees, wanderer, monopolistic, seldom, nanda, imperii, portnoy, heseltine,\n", - "Nearest to called: ramp, philology, lacklustre, stoner, purification, nuisances, implementing, vegetative,\n", - "Nearest to when: benguela, edinburgh, sul, tze, konkani, fo, gigue, iranic,\n", - "Nearest to taking: leopards, arlene, disembodied, maharishi, offal, krulak, sidgwick, rational,\n", - "Nearest to consists: lippe, karaca, anthropic, gramophone, squids, cbd, buildup, detox,\n", - "Nearest to scale: exposed, shrek, allude, chappell, foretells, childe, sheltered, escola,\n", - "Nearest to units: experimenter, lawn, fortieth, jagdish, mileposts, summit, danse, decorations,\n", - "Nearest to ice: pediment, witnessing, staining, plasmodium, habibie, riggs, detection, reconstruction,\n", - "Nearest to instance: caesarean, healthy, wong, resize, corals, movers, attitudes, buena,\n", - "Nearest to channel: creditors, tritium, bouchard, mastercard, gli, dray, stringy, frees,\n", - "Nearest to report: conscious, hellas, candlestick, midwinter, presidents, girls, bathyscaphe, haryana,\n", - "Epoch 1/10 Iteration: 1100 Avg. Training loss: 5.4772 0.1044 sec/batch\n", - "Epoch 1/10 Iteration: 1200 Avg. Training loss: 5.4192 0.1002 sec/batch\n", - "Epoch 1/10 Iteration: 1300 Avg. Training loss: 5.3636 0.1020 sec/batch\n", - "Epoch 1/10 Iteration: 1400 Avg. Training loss: 5.2318 0.1000 sec/batch\n", - "Epoch 1/10 Iteration: 1500 Avg. Training loss: 5.1699 0.0994 sec/batch\n", - "Epoch 1/10 Iteration: 1600 Avg. Training loss: 5.1744 0.0986 sec/batch\n", - "Epoch 1/10 Iteration: 1700 Avg. Training loss: 5.1248 0.1007 sec/batch\n", - "Epoch 1/10 Iteration: 1800 Avg. Training loss: 5.0379 0.1045 sec/batch\n", - "Epoch 1/10 Iteration: 1900 Avg. Training loss: 4.9862 0.0994 sec/batch\n", - "Epoch 1/10 Iteration: 2000 Avg. Training loss: 4.9961 0.0995 sec/batch\n", - "Nearest to for: hoffman, rogue, explains, cited, dod, listed, census, oxford,\n", - "Nearest to would: louisiane, still, bottle, nyquist, lampoon, introduced, disobey, feet,\n", - "Nearest to known: homicide, intervening, tori, assistants, lodbrok, mated, millions, justified,\n", - "Nearest to used: contributing, ceilings, institutionalization, brazil, pre, question, superstitious, incorporates,\n", - "Nearest to at: squaresoft, help, taya, good, degree, their, melody, ferraris,\n", - "Nearest to such: school, seldom, noise, distances, desired, wanderer, heseltine, next,\n", - "Nearest to called: purification, implementing, industry, ramp, stoner, philology, cost, vegetative,\n", - "Nearest to when: edinburgh, tze, preservation, sul, five, order, benguela, fo,\n", - "Nearest to taking: rational, death, disembodied, countless, krulak, quaternions, carpal, audited,\n", - "Nearest to consists: gramophone, karaca, whigs, squids, brighton, anthropic, heterosexuals, increase,\n", - "Nearest to scale: exposed, formation, shrek, full, childe, sheltered, aggregated, speciation,\n", - "Nearest to units: summit, begins, independent, dod, asserted, appoint, lawn, experimenter,\n", - "Nearest to ice: pediment, witnessing, reconstruction, habibie, aiding, riggs, inflammable, detection,\n", - "Nearest to instance: healthy, wong, census, attitudes, believed, buena, corals, husband,\n", - "Nearest to channel: creditors, tritium, mastercard, bouchard, frees, stringy, bypassing, nietzsche,\n", - "Nearest to report: conscious, presidents, hellas, but, girls, cooper, lineage, publishing,\n", - "Epoch 1/10 Iteration: 2100 Avg. Training loss: 4.9267 0.0995 sec/batch\n", - "Epoch 1/10 Iteration: 2200 Avg. Training loss: 4.9097 0.1014 sec/batch\n", - "Epoch 1/10 Iteration: 2300 Avg. Training loss: 4.8684 0.1004 sec/batch\n", - "Epoch 1/10 Iteration: 2400 Avg. Training loss: 4.8427 0.1060 sec/batch\n", - "Epoch 1/10 Iteration: 2500 Avg. Training loss: 4.8111 0.1087 sec/batch\n", - "Epoch 1/10 Iteration: 2600 Avg. Training loss: 4.8307 0.1029 sec/batch\n", - "Epoch 1/10 Iteration: 2700 Avg. Training loss: 4.7947 0.1068 sec/batch\n", - "Epoch 1/10 Iteration: 2800 Avg. Training loss: 4.8068 0.1025 sec/batch\n", - "Epoch 1/10 Iteration: 2900 Avg. Training loss: 4.7837 0.1026 sec/batch\n", - "Epoch 1/10 Iteration: 3000 Avg. Training loss: 4.7842 0.1076 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, housed, cited, explains, dod, silos,\n", - "Nearest to would: louisiane, still, concentrate, lampoon, disobey, nyquist, bottle, kaiju,\n", - "Nearest to known: homicide, intervening, tori, millions, justified, mated, lodbrok, satrapies,\n", - "Nearest to used: contributing, ceilings, brazil, institutionalization, breed, superstitious, incorporates, tends,\n", - "Nearest to at: squaresoft, melody, ferraris, buffalo, competed, emi, taya, kids,\n", - "Nearest to such: seldom, desired, school, noise, distances, wanderer, rays, unions,\n", - "Nearest to called: ramp, philology, implementing, purification, industry, lacklustre, stoner, strategic,\n", - "Nearest to when: edinburgh, attractive, preservation, fo, sul, itv, tze, scotland,\n", - "Nearest to taking: rational, disembodied, india, death, arlene, exercised, quaternions, countless,\n", - "Nearest to consists: gramophone, karaca, anthropic, brighton, buildup, whigs, squids, fascist,\n", - "Nearest to scale: exposed, formation, coral, curved, childe, chappell, unusable, shrek,\n", - "Nearest to units: lawn, summit, appoint, begins, dod, laid, independent, experimenter,\n", - "Nearest to ice: witnessing, reconstruction, detection, pediment, aiding, inflammable, drugs, habibie,\n", - "Nearest to instance: healthy, wong, buena, census, attitudes, implementations, caesarean, corals,\n", - "Nearest to channel: creditors, tritium, mastercard, bouchard, frees, bypassing, nietzsche, dray,\n", - "Nearest to report: conscious, presidents, hellas, cooper, ts, girls, isomorphism, credibility,\n", - "Epoch 1/10 Iteration: 3100 Avg. Training loss: 4.7704 0.1056 sec/batch\n", - "Epoch 1/10 Iteration: 3200 Avg. Training loss: 4.7655 0.1045 sec/batch\n", - "Epoch 1/10 Iteration: 3300 Avg. Training loss: 4.7184 0.1032 sec/batch\n", - "Epoch 1/10 Iteration: 3400 Avg. Training loss: 4.7202 0.1049 sec/batch\n", - "Epoch 1/10 Iteration: 3500 Avg. Training loss: 4.7368 0.1028 sec/batch\n", - "Epoch 1/10 Iteration: 3600 Avg. Training loss: 4.7046 0.1022 sec/batch\n", - "Epoch 1/10 Iteration: 3700 Avg. Training loss: 4.6942 0.1021 sec/batch\n", - "Epoch 1/10 Iteration: 3800 Avg. Training loss: 4.7397 0.1023 sec/batch\n", - "Epoch 1/10 Iteration: 3900 Avg. Training loss: 4.7120 0.1021 sec/batch\n", - "Epoch 1/10 Iteration: 4000 Avg. Training loss: 4.6501 0.1022 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, housed, silos, cited, dod, jehoiakim,\n", - "Nearest to would: louisiane, lampoon, concentrate, disobey, nyquist, still, albertina, bottle,\n", - "Nearest to known: homicide, mated, tori, intervening, justified, satrapies, millions, lodbrok,\n", - "Nearest to used: ceilings, contributing, institutionalization, brazil, breed, gilchrist, hawat, superstitious,\n", - "Nearest to at: squaresoft, emi, buffalo, melody, worded, polls, competed, lander,\n", - "Nearest to such: desired, seldom, distances, wanderer, noise, license, expellees, heseltine,\n", - "Nearest to called: ramp, philology, implementing, purification, lacklustre, vegetative, industry, intimidated,\n", - "Nearest to when: edinburgh, sul, preservation, fo, attractive, tze, launchers, benguela,\n", - "Nearest to taking: leopards, maharishi, india, rational, forge, concordat, arlene, disembodied,\n", - "Nearest to consists: gramophone, buildup, karaca, coronets, brighton, terminals, efficiencies, anthropic,\n", - "Nearest to scale: exposed, chappell, childe, formation, allude, sheltered, embroiled, unusable,\n", - "Nearest to units: lawn, experimenter, summit, typewriter, fortieth, torsion, independent, jagdish,\n", - "Nearest to ice: witnessing, reconstruction, detection, pediment, habibie, aiding, pyotr, inflammable,\n", - "Nearest to instance: healthy, wong, attitudes, resize, buena, implementations, synapses, census,\n", - "Nearest to channel: creditors, tritium, mastercard, odor, frees, bouchard, dray, speculators,\n", - "Nearest to report: conscious, candlestick, hellas, presidents, haight, credibility, cooper, isomorphism,\n", - "Epoch 1/10 Iteration: 4100 Avg. Training loss: 4.6614 0.1032 sec/batch\n", - "Epoch 1/10 Iteration: 4200 Avg. Training loss: 4.6734 0.1022 sec/batch\n", - "Epoch 1/10 Iteration: 4300 Avg. Training loss: 4.6329 0.1024 sec/batch\n", - "Epoch 1/10 Iteration: 4400 Avg. Training loss: 4.6284 0.1037 sec/batch\n", - "Epoch 1/10 Iteration: 4500 Avg. Training loss: 4.6296 0.1047 sec/batch\n", - "Epoch 1/10 Iteration: 4600 Avg. Training loss: 4.6149 0.1042 sec/batch\n", - "Epoch 2/10 Iteration: 4700 Avg. Training loss: 4.5956 0.0812 sec/batch\n", - "Epoch 2/10 Iteration: 4800 Avg. Training loss: 4.5381 0.1114 sec/batch\n", - "Epoch 2/10 Iteration: 4900 Avg. Training loss: 4.5008 0.1046 sec/batch\n", - "Epoch 2/10 Iteration: 5000 Avg. Training loss: 4.5004 0.1017 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, housed, cited, explains, appropriately, silos,\n", - "Nearest to would: lampoon, concentrate, disobey, nyquist, louisiane, albertina, still, bottle,\n", - "Nearest to known: homicide, mated, assistants, satrapies, justified, tori, uppercase, rtgs,\n", - "Nearest to used: ceilings, contributing, institutionalization, gilchrist, mollusks, breed, hawat, tends,\n", - "Nearest to at: squaresoft, taya, emi, melody, buffalo, lander, awarding, polls,\n", - "Nearest to such: desired, noise, distances, seldom, license, heseltine, expellees, plosives,\n", - "Nearest to called: ramp, philology, lacklustre, purification, implementing, vegetative, bakunin, intimidated,\n", - "Nearest to when: edinburgh, attractive, preservation, fo, sul, tze, launchers, ragga,\n", - "Nearest to taking: leopards, arlene, rational, sidgwick, concordat, india, maharishi, representational,\n", - "Nearest to consists: gramophone, efficiencies, karaca, buildup, coronets, coasts, terminals, anthropic,\n", - "Nearest to scale: exposed, chappell, allude, formation, childe, fuse, aggregated, curved,\n", - "Nearest to units: torsion, lawn, fortieth, experimenter, typewriter, overlordship, jagdish, latest,\n", - "Nearest to ice: reconstruction, witnessing, detection, plasmodium, pinstripes, habibie, pediment, pyotr,\n", - "Nearest to instance: healthy, resize, synapses, attitudes, lenses, wong, implementations, corals,\n", - "Nearest to channel: tritium, creditors, mastercard, speculators, gli, dray, bouchard, frees,\n", - "Nearest to report: candlestick, conscious, haight, hellas, presidents, leaped, credibility, cooper,\n", - "Epoch 2/10 Iteration: 5100 Avg. Training loss: 4.5328 0.1027 sec/batch\n", - "Epoch 2/10 Iteration: 5200 Avg. Training loss: 4.4976 0.1024 sec/batch\n", - "Epoch 2/10 Iteration: 5300 Avg. Training loss: 4.4784 0.1023 sec/batch\n", - "Epoch 2/10 Iteration: 5400 Avg. Training loss: 4.5429 0.1024 sec/batch\n", - "Epoch 2/10 Iteration: 5500 Avg. Training loss: 4.5072 0.1021 sec/batch\n", - "Epoch 2/10 Iteration: 5600 Avg. Training loss: 4.4743 0.1062 sec/batch\n", - "Epoch 2/10 Iteration: 5700 Avg. Training loss: 4.4699 0.1040 sec/batch\n", - "Epoch 2/10 Iteration: 5800 Avg. Training loss: 4.3911 0.1088 sec/batch\n", - "Epoch 2/10 Iteration: 5900 Avg. Training loss: 4.4513 0.1101 sec/batch\n", - "Epoch 2/10 Iteration: 6000 Avg. Training loss: 4.4301 0.1096 sec/batch\n", - "Nearest to for: rogue, hoffman, searchable, appropriately, cited, meats, silos, housed,\n", - "Nearest to would: disobey, nyquist, concentrate, lampoon, louisiane, whyte, still, albertina,\n", - "Nearest to known: homicide, mated, satrapies, rtgs, justified, tori, ctor, millions,\n", - "Nearest to used: ceilings, contributing, mollusks, institutionalization, hawat, user, breed, weight,\n", - "Nearest to at: squaresoft, taya, emi, awarding, buffalo, melody, lander, polls,\n", - "Nearest to such: desired, license, seldom, distances, noise, heseltine, plosives, consumers,\n", - "Nearest to called: ramp, vegetative, lacklustre, philology, implementing, bakunin, supersessionism, purification,\n", - "Nearest to when: edinburgh, fo, attractive, ragga, preservation, tze, be, benguela,\n", - "Nearest to taking: leopards, arlene, rational, sidgwick, concordat, bhagavan, vicar, applause,\n", - "Nearest to consists: efficiencies, gramophone, karaca, buildup, coasts, coronets, cbd, terminals,\n", - "Nearest to scale: exposed, chappell, formation, allude, childe, curved, fuse, coral,\n", - "Nearest to units: torsion, typewriter, fortieth, lawn, latest, experimenter, torrens, arched,\n", - "Nearest to ice: reconstruction, detection, plasmodium, witnessing, staining, soils, pediment, habibie,\n", - "Nearest to instance: healthy, resize, synapses, implementations, lenses, attitudes, spreads, what,\n", - "Nearest to channel: tritium, speculators, creditors, dray, restructured, mastercard, gli, frees,\n", - "Nearest to report: candlestick, haight, conscious, leaped, credibility, presidents, hellas, standish,\n", - "Epoch 2/10 Iteration: 6100 Avg. Training loss: 4.4451 0.1131 sec/batch\n", - "Epoch 2/10 Iteration: 6200 Avg. Training loss: 4.4053 0.1095 sec/batch\n", - "Epoch 2/10 Iteration: 6300 Avg. Training loss: 4.4466 0.1095 sec/batch\n", - "Epoch 2/10 Iteration: 6400 Avg. Training loss: 4.4000 0.1088 sec/batch\n", - "Epoch 2/10 Iteration: 6500 Avg. Training loss: 4.4273 0.1082 sec/batch\n", - "Epoch 2/10 Iteration: 6600 Avg. Training loss: 4.4487 0.1098 sec/batch\n", - "Epoch 2/10 Iteration: 6700 Avg. Training loss: 4.3700 0.1094 sec/batch\n", - "Epoch 2/10 Iteration: 6800 Avg. Training loss: 4.3856 0.1091 sec/batch\n", - "Epoch 2/10 Iteration: 6900 Avg. Training loss: 4.4200 0.1091 sec/batch\n", - "Epoch 2/10 Iteration: 7000 Avg. Training loss: 4.3654 0.1083 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, cited, appropriately, silos, caller, jehoiakim,\n", - "Nearest to would: disobey, nyquist, lampoon, concentrate, louisiane, whyte, still, olin,\n", - "Nearest to known: mated, homicide, satrapies, tori, rtgs, assistants, grady, oak,\n", - "Nearest to used: ceilings, mollusks, institutionalization, contributing, user, breed, gilchrist, negating,\n", - "Nearest to at: squaresoft, taya, emi, awarding, room, bathory, berke, melody,\n", - "Nearest to such: desired, license, noise, seldom, plosives, distances, itself, techniques,\n", - "Nearest to called: ramp, vegetative, bakunin, lacklustre, philology, supersessionism, intimidated, sealand,\n", - "Nearest to when: edinburgh, ragga, attractive, benguela, be, fo, preservation, launchers,\n", - "Nearest to taking: leopards, rational, arlene, concordat, sidgwick, bhagavan, vicar, tents,\n", - "Nearest to consists: karaca, gramophone, coasts, efficiencies, cbd, buildup, anthropic, eee,\n", - "Nearest to scale: exposed, chappell, formation, childe, speciation, allude, curved, coral,\n", - "Nearest to units: torsion, typewriter, fortieth, force, experimenter, arched, latest, teletype,\n", - "Nearest to ice: reconstruction, detection, plasmodium, staining, soils, witnessing, pediment, robotics,\n", - "Nearest to instance: synapses, resize, healthy, implementations, lenses, attitudes, spreads, krugerrand,\n", - "Nearest to channel: tritium, speculators, creditors, curler, mastercard, restructured, dray, almohades,\n", - "Nearest to report: candlestick, presidents, haight, leaped, conscious, standish, credibility, tillman,\n", - "Epoch 2/10 Iteration: 7100 Avg. Training loss: 4.3969 0.1102 sec/batch\n", - "Epoch 2/10 Iteration: 7200 Avg. Training loss: 4.3768 0.1086 sec/batch\n", - "Epoch 2/10 Iteration: 7300 Avg. Training loss: 4.3602 0.1087 sec/batch\n", - "Epoch 2/10 Iteration: 7400 Avg. Training loss: 4.3689 0.1125 sec/batch\n", - "Epoch 2/10 Iteration: 7500 Avg. Training loss: 4.4073 0.1099 sec/batch\n", - "Epoch 2/10 Iteration: 7600 Avg. Training loss: 4.3354 0.1114 sec/batch\n", - "Epoch 2/10 Iteration: 7700 Avg. Training loss: 4.3640 0.1068 sec/batch\n", - "Epoch 2/10 Iteration: 7800 Avg. Training loss: 4.3759 0.1094 sec/batch\n", - "Epoch 2/10 Iteration: 7900 Avg. Training loss: 4.3205 0.1064 sec/batch\n", - "Epoch 2/10 Iteration: 8000 Avg. Training loss: 4.3363 0.1084 sec/batch\n", - "Nearest to for: hoffman, rogue, silos, searchable, housed, entities, appropriately, jehoiakim,\n", - "Nearest to would: disobey, nyquist, lampoon, louisiane, zubaydah, habilis, concentrate, despaired,\n", - "Nearest to known: satrapies, mated, oak, homicide, demographically, justified, conglomerates, uppercase,\n", - "Nearest to used: ceilings, mollusks, institutionalization, gilchrist, bp, negating, nazca, contributing,\n", - "Nearest to at: emi, awarding, taya, bathory, squaresoft, sharps, motivates, room,\n", - "Nearest to such: desired, license, seldom, plosives, noise, assumes, techniques, furtherance,\n", - "Nearest to called: ramp, vegetative, bakunin, lacklustre, reintroduce, philology, purification, supersessionism,\n", - "Nearest to when: edinburgh, ragga, refuse, attractive, be, benguela, tze, fo,\n", - "Nearest to taking: leopards, rational, concordat, sidgwick, arlene, anoxic, bhagavan, vicar,\n", - "Nearest to consists: karaca, cbd, coasts, gramophone, brighton, eee, circumcising, efficiencies,\n", - "Nearest to scale: exposed, chappell, formation, speciation, curved, allude, childe, coral,\n", - "Nearest to units: torsion, fortieth, typewriter, force, arched, experimenter, latest, torrens,\n", - "Nearest to ice: soils, plasmodium, reconstruction, staining, detection, golem, hartsfield, witnessing,\n", - "Nearest to instance: synapses, resize, healthy, lenses, implementations, illogical, krugerrand, attitudes,\n", - "Nearest to channel: speculators, tritium, curler, creditors, mastercard, restructured, almohades, odor,\n", - "Nearest to report: haight, candlestick, presidents, leaped, corte, conscious, tillman, standish,\n", - "Epoch 2/10 Iteration: 8100 Avg. Training loss: 4.3422 0.1105 sec/batch\n", - "Epoch 2/10 Iteration: 8200 Avg. Training loss: 4.2877 0.1093 sec/batch\n", - "Epoch 2/10 Iteration: 8300 Avg. Training loss: 4.3619 0.1113 sec/batch\n", - "Epoch 2/10 Iteration: 8400 Avg. Training loss: 4.3875 0.1123 sec/batch\n", - "Epoch 2/10 Iteration: 8500 Avg. Training loss: 4.3750 0.1136 sec/batch\n", - "Epoch 2/10 Iteration: 8600 Avg. Training loss: 4.2679 0.1082 sec/batch\n", - "Epoch 2/10 Iteration: 8700 Avg. Training loss: 4.3009 0.1120 sec/batch\n", - "Epoch 2/10 Iteration: 8800 Avg. Training loss: 4.3798 0.1139 sec/batch\n", - "Epoch 2/10 Iteration: 8900 Avg. Training loss: 4.2172 0.1133 sec/batch\n", - "Epoch 2/10 Iteration: 9000 Avg. Training loss: 4.2966 0.1099 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, silos, serrated, appropriately, emeryville, jehoiakim,\n", - "Nearest to would: disobey, nyquist, habilis, whyte, zubaydah, despaired, replied, concentrate,\n", - "Nearest to known: mated, satrapies, rtgs, uppercase, oak, homicide, demographically, very,\n", - "Nearest to used: ceilings, mollusks, bp, comprehensible, institutionalization, gilchrist, nazca, negating,\n", - "Nearest to at: emi, taya, bathory, squaresoft, awarding, motivates, room, summer,\n", - "Nearest to such: desired, license, heseltine, furtherance, seldom, techniques, monopolistic, plosives,\n", - "Nearest to called: ramp, vegetative, lacklustre, bakunin, philology, purification, supersessionism, reintroduce,\n", - "Nearest to when: edinburgh, ragga, be, refuse, benguela, attractive, tze, bursa,\n", - "Nearest to taking: leopards, rational, concordat, sidgwick, bhagavan, go, arlene, garis,\n", - "Nearest to consists: eee, karaca, cbd, efficiencies, coasts, brighton, coronets, circumcising,\n", - "Nearest to scale: exposed, chappell, formation, allude, curved, speciation, fuse, coral,\n", - "Nearest to units: torsion, fortieth, typewriter, force, torrens, arched, teletype, experimenter,\n", - "Nearest to ice: soils, plasmodium, reconstruction, staining, golem, detection, hartsfield, pyotr,\n", - "Nearest to instance: synapses, resize, healthy, lenses, krugerrand, illogical, implementations, spreads,\n", - "Nearest to channel: tritium, speculators, curler, mastercard, restructured, creditors, almohades, dray,\n", - "Nearest to report: haight, leaped, candlestick, presidents, standish, corte, conscious, credibility,\n", - "Epoch 2/10 Iteration: 9100 Avg. Training loss: 4.3073 0.1099 sec/batch\n", - "Epoch 2/10 Iteration: 9200 Avg. Training loss: 4.3067 0.1088 sec/batch\n", - "Epoch 3/10 Iteration: 9300 Avg. Training loss: 4.3305 0.0503 sec/batch\n", - "Epoch 3/10 Iteration: 9400 Avg. Training loss: 4.2538 0.1096 sec/batch\n", - "Epoch 3/10 Iteration: 9500 Avg. Training loss: 4.2195 0.1093 sec/batch\n", - "Epoch 3/10 Iteration: 9600 Avg. Training loss: 4.2297 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 9700 Avg. Training loss: 4.2225 0.1116 sec/batch\n", - "Epoch 3/10 Iteration: 9800 Avg. Training loss: 4.2412 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 9900 Avg. Training loss: 4.2439 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 10000 Avg. Training loss: 4.1912 0.1096 sec/batch\n", - "Nearest to for: rogue, hoffman, searchable, silos, caller, converged, appropriately, pokey,\n", - "Nearest to would: disobey, nyquist, whyte, habilis, zubaydah, concentrate, lampoon, weaponry,\n", - "Nearest to known: mated, rtgs, conglomerates, demographically, oak, uppercase, satrapies, assistants,\n", - "Nearest to used: ceilings, mollusks, bp, negating, comprehensible, institutionalization, cages, bleaches,\n", - "Nearest to at: emi, taya, bathory, awarding, room, summer, squaresoft, sharps,\n", - "Nearest to such: license, desired, heseltine, plosives, afips, furtherance, expellees, techniques,\n", - "Nearest to called: ramp, bakunin, philology, vegetative, lacklustre, supersessionism, purification, reintroduce,\n", - "Nearest to when: edinburgh, ragga, refuse, benguela, attractive, remove, be, falklands,\n", - "Nearest to taking: leopards, rational, concordat, go, sidgwick, garis, bhagavan, applause,\n", - "Nearest to consists: eee, cbd, coasts, efficiencies, karaca, brighton, coronets, located,\n", - "Nearest to scale: exposed, chappell, coral, allude, curved, formation, fuse, speciation,\n", - "Nearest to units: torsion, fortieth, force, typewriter, teletype, torrens, pucker, arched,\n", - "Nearest to ice: soils, plasmodium, staining, reconstruction, detection, golem, pyotr, pinstripes,\n", - "Nearest to instance: resize, synapses, healthy, lenses, krugerrand, illogical, attitudes, caesarean,\n", - "Nearest to channel: speculators, tritium, curler, mastercard, restructured, creditors, bypassing, almohades,\n", - "Nearest to report: candlestick, standish, credibility, haight, leaped, presidents, conscious, corte,\n", - "Epoch 3/10 Iteration: 10100 Avg. Training loss: 4.2465 0.1103 sec/batch\n", - "Epoch 3/10 Iteration: 10200 Avg. Training loss: 4.2411 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 10300 Avg. Training loss: 4.2232 0.1098 sec/batch\n", - "Epoch 3/10 Iteration: 10400 Avg. Training loss: 4.1565 0.1094 sec/batch\n", - "Epoch 3/10 Iteration: 10500 Avg. Training loss: 4.1659 0.1097 sec/batch\n", - "Epoch 3/10 Iteration: 10600 Avg. Training loss: 4.1560 0.1100 sec/batch\n", - "Epoch 3/10 Iteration: 10700 Avg. Training loss: 4.1616 0.1101 sec/batch\n", - "Epoch 3/10 Iteration: 10800 Avg. Training loss: 4.1829 0.1101 sec/batch\n", - "Epoch 3/10 Iteration: 10900 Avg. Training loss: 4.1989 0.1096 sec/batch\n", - "Epoch 3/10 Iteration: 11000 Avg. Training loss: 4.1676 0.1097 sec/batch\n", - "Nearest to for: hoffman, rogue, searchable, caller, silos, appropriately, typeface, converged,\n", - "Nearest to would: disobey, nyquist, whyte, weaponry, habilis, zubaydah, concentrate, despaired,\n", - "Nearest to known: rtgs, demographically, mated, satrapies, very, conical, usability, uppercase,\n", - "Nearest to used: ceilings, mollusks, negating, bp, institutionalization, grams, cages, painstaking,\n", - "Nearest to at: emi, taya, awarding, room, squaresoft, sharps, bathory, italia,\n", - "Nearest to such: license, desired, plosives, techniques, heseltine, undercurrent, imperii, procedure,\n", - "Nearest to called: vegetative, ramp, supersessionism, bakunin, sealand, philology, purification, reintroduce,\n", - "Nearest to when: ragga, edinburgh, attractive, refuse, be, benguela, remove, falklands,\n", - "Nearest to taking: leopards, rational, go, concordat, garis, sidgwick, carpal, anoxic,\n", - "Nearest to consists: eee, cbd, coasts, located, condorcet, circumcising, gramophone, brighton,\n", - "Nearest to scale: exposed, chappell, fuse, childe, curved, allude, formation, speciation,\n", - "Nearest to units: torsion, force, fortieth, typewriter, teletype, latest, unit, prefixes,\n", - "Nearest to ice: soils, plasmodium, staining, detection, reconstruction, pinstripes, fracture, golem,\n", - "Nearest to instance: resize, synapses, lenses, implementations, healthy, illogical, oscillators, krugerrand,\n", - "Nearest to channel: curler, speculators, tritium, restructured, creditors, bypassing, mastercard, dray,\n", - "Nearest to report: credibility, presidents, candlestick, standish, leaped, haight, corte, conscious,\n", - "Epoch 3/10 Iteration: 11100 Avg. Training loss: 4.1830 0.1103 sec/batch\n", - "Epoch 3/10 Iteration: 11200 Avg. Training loss: 4.2133 0.1089 sec/batch\n", - "Epoch 3/10 Iteration: 11300 Avg. Training loss: 4.1865 0.1096 sec/batch\n", - "Epoch 3/10 Iteration: 11400 Avg. Training loss: 4.1479 0.1090 sec/batch\n", - "Epoch 3/10 Iteration: 11500 Avg. Training loss: 4.2011 0.1093 sec/batch\n", - "Epoch 3/10 Iteration: 11600 Avg. Training loss: 4.1720 0.1095 sec/batch\n", - "Epoch 3/10 Iteration: 11700 Avg. Training loss: 4.2111 0.1095 sec/batch\n", - "Epoch 3/10 Iteration: 11800 Avg. Training loss: 4.1659 0.1095 sec/batch\n", - "Epoch 3/10 Iteration: 11900 Avg. Training loss: 4.1315 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 12000 Avg. Training loss: 4.1508 0.1092 sec/batch\n", - "Nearest to for: hoffman, rogue, given, searchable, silos, census, converged, caller,\n", - "Nearest to would: disobey, habilis, nyquist, zubaydah, whyte, despaired, weaponry, preeminence,\n", - "Nearest to known: rtgs, mated, satrapies, uppercase, usability, conical, very, oak,\n", - "Nearest to used: ceilings, mollusks, bp, negating, institutionalization, decorator, supplementation, cirth,\n", - "Nearest to at: emi, taya, awarding, habr, squaresoft, sharps, coronets, dini,\n", - "Nearest to such: desired, techniques, plosives, license, pollutant, procedure, unfair, lysenkoism,\n", - "Nearest to called: ramp, vegetative, supersessionism, bakunin, philology, sealand, reintroduce, denunciations,\n", - "Nearest to when: ragga, edinburgh, attractive, be, refuse, benguela, bush, remove,\n", - "Nearest to taking: leopards, rational, concordat, sidgwick, arlene, garis, carpal, anoxic,\n", - "Nearest to consists: eee, cbd, coasts, gramophone, located, morisot, condorcet, brighton,\n", - "Nearest to scale: exposed, chappell, curved, allude, formation, fuse, speciation, childe,\n", - "Nearest to units: force, torsion, fortieth, typewriter, teletype, unit, prefixes, pucker,\n", - "Nearest to ice: soils, staining, plasmodium, fracture, pinstripes, reconstruction, pyotr, louth,\n", - "Nearest to instance: resize, lenses, synapses, implementations, illogical, healthy, krugerrand, oscillators,\n", - "Nearest to channel: curler, tritium, speculators, restructured, mastercard, creditors, bypassing, almohades,\n", - "Nearest to report: credibility, presidents, standish, candlestick, leaped, annotated, haight, serviced,\n", - "Epoch 3/10 Iteration: 12100 Avg. Training loss: 4.1912 0.1103 sec/batch\n", - "Epoch 3/10 Iteration: 12200 Avg. Training loss: 4.1658 0.1091 sec/batch\n", - "Epoch 3/10 Iteration: 12300 Avg. Training loss: 4.1775 0.1089 sec/batch\n", - "Epoch 3/10 Iteration: 12400 Avg. Training loss: 4.1726 0.1093 sec/batch\n", - "Epoch 3/10 Iteration: 12500 Avg. Training loss: 4.1599 0.1099 sec/batch\n", - "Epoch 3/10 Iteration: 12600 Avg. Training loss: 4.1498 0.1099 sec/batch\n", - "Epoch 3/10 Iteration: 12700 Avg. Training loss: 4.1615 0.1097 sec/batch\n", - "Epoch 3/10 Iteration: 12800 Avg. Training loss: 4.1188 0.1095 sec/batch\n", - "Epoch 3/10 Iteration: 12900 Avg. Training loss: 4.1679 0.1098 sec/batch\n", - "Epoch 3/10 Iteration: 13000 Avg. Training loss: 4.2005 0.1100 sec/batch\n", - "Nearest to for: hoffman, rogue, emeryville, census, given, scriptwriter, searchable, converged,\n", - "Nearest to would: disobey, habilis, despaired, zubaydah, amontillado, preeminence, whyte, replied,\n", - "Nearest to known: satrapies, mated, rtgs, oak, grady, tori, demographically, usability,\n", - "Nearest to used: ceilings, bp, negating, cirth, decorator, supplementation, comprehensible, hyphen,\n", - "Nearest to at: emi, taya, italia, habr, bathory, dini, nde, awarding,\n", - "Nearest to such: desired, unfair, expellees, eudicots, actus, nanda, plosives, license,\n", - "Nearest to called: supersessionism, bakunin, reintroduce, excommunicating, faithless, denunciations, ramp, vegetative,\n", - "Nearest to when: edinburgh, ragga, refuse, attractive, bush, be, benguela, convinced,\n", - "Nearest to taking: leopards, rational, sidgwick, concordat, go, garis, anoxic, arlene,\n", - "Nearest to consists: eee, cbd, condorcet, located, coasts, brighton, morisot, circumcising,\n", - "Nearest to scale: exposed, chappell, allude, curved, fuse, speciation, hashes, sheltered,\n", - "Nearest to units: force, torsion, fortieth, typewriter, teletype, unit, pucker, prefixes,\n", - "Nearest to ice: staining, plasmodium, soils, pinstripes, pyotr, fracture, louth, golem,\n", - "Nearest to instance: resize, synapses, lenses, illogical, implementations, unappreciated, healthy, krugerrand,\n", - "Nearest to channel: curler, tritium, restructured, speculators, creditors, mastercard, bypassing, dray,\n", - "Nearest to report: presidents, credibility, leaped, standish, candlestick, focusing, haight, corte,\n", - "Epoch 3/10 Iteration: 13100 Avg. Training loss: 4.2402 0.1103 sec/batch\n", - "Epoch 3/10 Iteration: 13200 Avg. Training loss: 4.1416 0.1096 sec/batch\n", - "Epoch 3/10 Iteration: 13300 Avg. Training loss: 4.1287 0.1098 sec/batch\n", - "Epoch 3/10 Iteration: 13400 Avg. Training loss: 4.1439 0.1095 sec/batch\n", - "Epoch 3/10 Iteration: 13500 Avg. Training loss: 4.0455 0.1098 sec/batch\n", - "Epoch 3/10 Iteration: 13600 Avg. Training loss: 4.1497 0.1102 sec/batch\n", - "Epoch 3/10 Iteration: 13700 Avg. Training loss: 4.1528 0.1098 sec/batch\n", - "Epoch 3/10 Iteration: 13800 Avg. Training loss: 4.1375 0.1094 sec/batch\n", - "Epoch 4/10 Iteration: 13900 Avg. Training loss: 4.1982 0.0209 sec/batch\n", - "Epoch 4/10 Iteration: 14000 Avg. Training loss: 4.1256 0.1089 sec/batch\n", - "Nearest to for: hoffman, rogue, given, converged, searchable, scriptwriter, typeface, emeryville,\n", - "Nearest to would: disobey, habilis, nyquist, whyte, zubaydah, busting, amontillado, gimme,\n", - "Nearest to known: rtgs, very, perihelion, uppercase, satrapies, usability, fervour, conglomerates,\n", - "Nearest to used: ceilings, bp, bleaches, cirth, negating, supplementation, institutionalization, stds,\n", - "Nearest to at: emi, taya, travelling, seated, bathory, coronets, breach, awarding,\n", - "Nearest to such: license, pollutant, techniques, desired, conceals, actus, procedure, unfair,\n", - "Nearest to called: ramp, vegetative, supersessionism, reintroduce, faithless, ripples, sealand, joliot,\n", - "Nearest to when: edinburgh, ragga, attractive, bush, refuse, be, benguela, bursa,\n", - "Nearest to taking: leopards, rational, sidgwick, garis, anoxic, go, concordat, carpal,\n", - "Nearest to consists: eee, cbd, located, brighton, condorcet, chamber, appoints, coasts,\n", - "Nearest to scale: exposed, allude, curved, fuse, chappell, mellin, capricornus, gears,\n", - "Nearest to units: force, torsion, fortieth, unit, prefixes, typewriter, teletype, pucker,\n", - "Nearest to ice: staining, plasmodium, soils, pinstripes, pyotr, louth, hawk, golem,\n", - "Nearest to instance: resize, synapses, illogical, lenses, krugerrand, healthy, unappreciated, oscillators,\n", - "Nearest to channel: curler, creditors, tritium, dray, restructured, bypassing, mastercard, speculators,\n", - "Nearest to report: credibility, presidents, leaped, standish, candlestick, annotated, haight, targeted,\n", - "Epoch 4/10 Iteration: 14100 Avg. Training loss: 4.0816 0.1103 sec/batch\n", - "Epoch 4/10 Iteration: 14200 Avg. Training loss: 4.1231 0.1090 sec/batch\n", - "Epoch 4/10 Iteration: 14300 Avg. Training loss: 4.0923 0.1093 sec/batch\n", - "Epoch 4/10 Iteration: 14400 Avg. Training loss: 4.0457 0.1082 sec/batch\n", - "Epoch 4/10 Iteration: 14500 Avg. Training loss: 4.0987 0.1090 sec/batch\n", - "Epoch 4/10 Iteration: 14600 Avg. Training loss: 4.0307 0.1086 sec/batch\n", - "Epoch 4/10 Iteration: 14700 Avg. Training loss: 4.0652 0.1095 sec/batch\n", - "Epoch 4/10 Iteration: 14800 Avg. Training loss: 4.0900 0.1090 sec/batch\n", - "Epoch 4/10 Iteration: 14900 Avg. Training loss: 4.1109 0.1091 sec/batch\n", - "Epoch 4/10 Iteration: 15000 Avg. Training loss: 4.0441 0.1098 sec/batch\n", - "Nearest to for: rogue, given, converged, census, autrefois, hoffman, silos, searchable,\n", - "Nearest to would: disobey, nyquist, habilis, whyte, gimme, busting, preeminence, amontillado,\n", - "Nearest to known: rtgs, oak, usability, very, perihelion, mated, satrapies, fervour,\n", - "Nearest to used: ceilings, bp, grams, alliances, pacemakers, stds, epoxy, mollusks,\n", - "Nearest to at: emi, seated, travelling, aviators, coronets, taya, italia, awarding,\n", - "Nearest to such: desired, license, undercurrent, hinges, pollutant, unfair, techniques, heseltine,\n", - "Nearest to called: ramp, vegetative, supersessionism, reintroduce, sealand, denunciations, faithless, purification,\n", - "Nearest to when: ragga, edinburgh, attractive, bush, be, refuse, benguela, remove,\n", - "Nearest to taking: leopards, rational, garis, sidgwick, concordat, go, nba, anoxic,\n", - "Nearest to consists: eee, cbd, located, chamber, coasts, twos, consist, morisot,\n", - "Nearest to scale: exposed, allude, curved, capricornus, mellin, fuse, chappell, sheltered,\n", - "Nearest to units: force, unit, torsion, fortieth, prefixes, teletype, typewriter, pucker,\n", - "Nearest to ice: plasmodium, soils, staining, pinstripes, pyotr, louth, golem, gskola,\n", - "Nearest to instance: resize, lenses, illogical, synapses, krugerrand, healthy, unappreciated, caesarean,\n", - "Nearest to channel: curler, restructured, bypassing, creditors, dray, tritium, speculators, mastercard,\n", - "Nearest to report: credibility, presidents, spirituality, leaped, focusing, standish, annotated, targeted,\n", - "Epoch 4/10 Iteration: 15100 Avg. Training loss: 4.0226 0.1103 sec/batch\n", - "Epoch 4/10 Iteration: 15200 Avg. Training loss: 4.0229 0.1098 sec/batch\n", - "Epoch 4/10 Iteration: 15300 Avg. Training loss: 4.0029 0.1098 sec/batch\n", - "Epoch 4/10 Iteration: 15400 Avg. Training loss: 4.0458 0.1080 sec/batch\n", - "Epoch 4/10 Iteration: 15500 Avg. Training loss: 4.0678 0.0983 sec/batch\n", - "Epoch 4/10 Iteration: 15600 Avg. Training loss: 4.0606 0.1029 sec/batch\n", - "Epoch 4/10 Iteration: 15700 Avg. Training loss: 4.0898 0.1005 sec/batch\n", - "Epoch 4/10 Iteration: 15800 Avg. Training loss: 4.1047 0.0983 sec/batch\n", - "Epoch 4/10 Iteration: 15900 Avg. Training loss: 4.0668 0.1013 sec/batch\n", - "Epoch 4/10 Iteration: 16000 Avg. Training loss: 4.0396 0.1101 sec/batch\n", - "Nearest to for: given, census, hoffman, rogue, converged, parliamentary, autrefois, tomo,\n", - "Nearest to would: disobey, whyte, nyquist, habilis, gimme, despaired, busting, relegated,\n", - "Nearest to known: rtgs, banach, pisin, perihelion, oak, satrapies, mated, usability,\n", - "Nearest to used: bp, ceilings, grams, cirth, stds, bleaches, pacemakers, primary,\n", - "Nearest to at: emi, travelling, degree, taya, dominants, aviators, habr, awarding,\n", - "Nearest to such: desired, actus, plosives, lysenkoism, hinges, license, pollutant, conceals,\n", - "Nearest to called: supersessionism, reintroduce, denunciations, vegetative, faithless, ramp, core, sealand,\n", - "Nearest to when: ragga, edinburgh, attractive, be, refuse, bush, remove, painda,\n", - "Nearest to taking: rational, leopards, garis, sidgwick, concordat, go, anoxic, carpal,\n", - "Nearest to consists: eee, chamber, cbd, located, consist, morisot, condorcet, coasts,\n", - "Nearest to scale: exposed, mellin, allude, capricornus, fuse, childe, visualizing, curved,\n", - "Nearest to units: force, unit, fortieth, torsion, prefixes, teletype, typewriter, pucker,\n", - "Nearest to ice: plasmodium, staining, soils, pinstripes, louth, fracture, pyotr, detection,\n", - "Nearest to instance: resize, synapses, lenses, implementations, unappreciated, illogical, caesarean, oscillators,\n", - "Nearest to channel: curler, creditors, bypassing, restructured, mbit, tritium, dray, speculators,\n", - "Nearest to report: credibility, presidents, leaped, standish, spirituality, focusing, annotated, candlestick,\n", - "Epoch 4/10 Iteration: 16100 Avg. Training loss: 4.0831 0.1100 sec/batch\n", - "Epoch 4/10 Iteration: 16200 Avg. Training loss: 4.0817 0.1094 sec/batch\n", - "Epoch 4/10 Iteration: 16300 Avg. Training loss: 4.0709 0.1093 sec/batch\n", - "Epoch 4/10 Iteration: 16400 Avg. Training loss: 4.0693 0.1013 sec/batch\n", - "Epoch 4/10 Iteration: 16500 Avg. Training loss: 4.0710 0.1000 sec/batch\n", - "Epoch 4/10 Iteration: 16600 Avg. Training loss: 4.0771 0.1090 sec/batch\n", - "Epoch 4/10 Iteration: 16700 Avg. Training loss: 4.0465 0.1083 sec/batch\n", - "Epoch 4/10 Iteration: 16800 Avg. Training loss: 4.0753 0.1018 sec/batch\n", - "Epoch 4/10 Iteration: 16900 Avg. Training loss: 4.1115 0.1103 sec/batch\n", - "Epoch 4/10 Iteration: 17000 Avg. Training loss: 4.0615 0.1194 sec/batch\n", - "Nearest to for: given, scriptwriter, census, rogue, emeryville, hoffman, autrefois, converged,\n", - "Nearest to would: disobey, nyquist, habilis, whyte, busting, gimme, despaired, maecenas,\n", - "Nearest to known: satrapies, fervour, pisin, sixteenth, banach, with, perihelion, oak,\n", - "Nearest to used: ceilings, cirth, bp, alliances, stds, grams, machining, hyphen,\n", - "Nearest to at: emi, travelling, breach, dominants, taya, dini, bathory, degree,\n", - "Nearest to such: plosives, pollutant, desired, hinges, lysenkoism, undercurrent, actus, characterised,\n", - "Nearest to called: supersessionism, reintroduce, vegetative, denunciations, faithless, ramp, sealand, purification,\n", - "Nearest to when: ragga, edinburgh, attractive, refuse, be, painda, bush, manor,\n", - "Nearest to taking: leopards, rational, sidgwick, garis, concordat, templar, anoxic, carpal,\n", - "Nearest to consists: eee, chamber, cbd, morisot, consist, located, brighton, trending,\n", - "Nearest to scale: exposed, mellin, capricornus, allude, curved, regolith, fuse, speciation,\n", - "Nearest to units: force, unit, fortieth, torsion, prefixes, typewriter, teletype, pucker,\n", - "Nearest to ice: plasmodium, pinstripes, soils, pyotr, staining, louth, gory, fracture,\n", - "Nearest to instance: synapses, lenses, resize, unappreciated, implementations, illogical, placed, oscillators,\n", - "Nearest to channel: curler, restructured, creditors, mbit, bypassing, dray, dts, tritium,\n", - "Nearest to report: presidents, credibility, annotated, standish, spirituality, leaped, focusing, targeted,\n", - "Epoch 4/10 Iteration: 17100 Avg. Training loss: 4.0576 0.1166 sec/batch\n", - "Epoch 4/10 Iteration: 17200 Avg. Training loss: 4.0014 0.1178 sec/batch\n", - "Epoch 4/10 Iteration: 17300 Avg. Training loss: 4.0085 0.1100 sec/batch\n", - "Epoch 4/10 Iteration: 17400 Avg. Training loss: 4.0609 0.1082 sec/batch\n", - "Epoch 4/10 Iteration: 17500 Avg. Training loss: 4.0888 0.1111 sec/batch\n", - "Epoch 4/10 Iteration: 17600 Avg. Training loss: 4.1041 0.1124 sec/batch\n", - "Epoch 4/10 Iteration: 17700 Avg. Training loss: 4.1330 0.1147 sec/batch\n", - "Epoch 4/10 Iteration: 17800 Avg. Training loss: 4.0638 0.1094 sec/batch\n", - "Epoch 4/10 Iteration: 17900 Avg. Training loss: 4.0446 0.1126 sec/batch\n", - "Epoch 4/10 Iteration: 18000 Avg. Training loss: 4.0699 0.1122 sec/batch\n", - "Nearest to for: given, scriptwriter, rogue, census, autrefois, emeryville, converged, first,\n", - "Nearest to would: disobey, whyte, habilis, nyquist, busting, gimme, relegated, maecenas,\n", - "Nearest to known: satrapies, banach, rtgs, perihelion, pisin, quetzal, fervour, with,\n", - "Nearest to used: ceilings, cirth, machining, bp, stds, alliances, ido, okinawan,\n", - "Nearest to at: emi, travelling, breach, bathory, italia, dominants, dini, taya,\n", - "Nearest to such: hinges, cc, actus, plosives, desired, conceals, license, eudicots,\n", - "Nearest to called: supersessionism, reintroduce, ramp, faithless, denunciations, sealand, excommunicating, vegetative,\n", - "Nearest to when: edinburgh, ragga, attractive, refuse, be, bush, remove, painda,\n", - "Nearest to taking: rational, leopards, sidgwick, garis, anoxic, concordat, go, nba,\n", - "Nearest to consists: eee, chamber, cbd, appoints, consist, morisot, located, condorcet,\n", - "Nearest to scale: exposed, mellin, capricornus, allude, curved, fuse, regolith, speciation,\n", - "Nearest to units: unit, force, fortieth, prefixes, torsion, si, typewriter, teletype,\n", - "Nearest to ice: pinstripes, soils, louth, pyotr, plasmodium, staining, gory, rink,\n", - "Nearest to instance: illogical, resize, lenses, unappreciated, synapses, oscillators, implementations, krugerrand,\n", - "Nearest to channel: curler, restructured, dray, creditors, mbit, bypassing, mastercard, tritium,\n", - "Nearest to report: presidents, credibility, spirituality, leaped, annotated, standish, focusing, reports,\n", - "Epoch 4/10 Iteration: 18100 Avg. Training loss: 3.9760 0.1089 sec/batch\n", - "Epoch 4/10 Iteration: 18200 Avg. Training loss: 4.0450 0.1039 sec/batch\n", - "Epoch 4/10 Iteration: 18300 Avg. Training loss: 4.0234 0.1026 sec/batch\n", - "Epoch 4/10 Iteration: 18400 Avg. Training loss: 4.0367 0.1004 sec/batch\n", - "Epoch 4/10 Iteration: 18500 Avg. Training loss: 4.0817 0.1018 sec/batch\n", - "Epoch 5/10 Iteration: 18600 Avg. Training loss: 4.0321 0.0936 sec/batch\n", - "Epoch 5/10 Iteration: 18700 Avg. Training loss: 4.0089 0.1002 sec/batch\n", - "Epoch 5/10 Iteration: 18800 Avg. Training loss: 3.9820 0.1098 sec/batch\n", - "Epoch 5/10 Iteration: 18900 Avg. Training loss: 4.0002 0.1016 sec/batch\n", - "Epoch 5/10 Iteration: 19000 Avg. Training loss: 3.9676 0.1011 sec/batch\n", - "Nearest to for: given, scriptwriter, rogue, census, autrefois, converged, to, emeryville,\n", - "Nearest to would: disobey, whyte, habilis, nyquist, maecenas, busting, gimme, relegated,\n", - "Nearest to known: perihelion, rtgs, banach, satrapies, pisin, fervour, oak, quetzal,\n", - "Nearest to used: ceilings, stds, cirth, machining, bp, alliances, grams, common,\n", - "Nearest to at: emi, travelling, dominants, breach, italia, taya, bathory, seated,\n", - "Nearest to such: hinges, actus, undercurrent, pollutant, lysenkoism, desired, cc, license,\n", - "Nearest to called: supersessionism, reintroduce, keno, faithless, bother, sealand, vegetative, denunciations,\n", - "Nearest to when: edinburgh, refuse, attractive, ragga, bush, be, remove, painda,\n", - "Nearest to taking: leopards, garis, rational, sidgwick, go, anoxic, nba, boosts,\n", - "Nearest to consists: eee, chamber, cbd, consist, located, morisot, twos, appoints,\n", - "Nearest to scale: exposed, capricornus, curved, allude, mellin, regolith, fuse, gears,\n", - "Nearest to units: unit, fortieth, prefixes, force, torsion, typewriter, si, irl,\n", - "Nearest to ice: soils, pinstripes, plasmodium, louth, rink, pyotr, staining, joaquin,\n", - "Nearest to instance: illogical, synapses, lenses, resize, krugerrand, healthy, placed, caesarean,\n", - "Nearest to channel: curler, restructured, dray, creditors, bypassing, mastercard, wb, mbit,\n", - "Nearest to report: credibility, spirituality, presidents, reports, annotated, standish, focusing, leaped,\n", - "Epoch 5/10 Iteration: 19100 Avg. Training loss: 3.9968 0.1027 sec/batch\n", - "Epoch 5/10 Iteration: 19200 Avg. Training loss: 3.9635 0.1035 sec/batch\n", - "Epoch 5/10 Iteration: 19300 Avg. Training loss: 4.0181 0.1107 sec/batch\n", - "Epoch 5/10 Iteration: 19400 Avg. Training loss: 4.0267 0.1175 sec/batch\n", - "Epoch 5/10 Iteration: 19500 Avg. Training loss: 4.0411 0.1127 sec/batch\n", - "Epoch 5/10 Iteration: 19600 Avg. Training loss: 3.9779 0.1149 sec/batch\n", - "Epoch 5/10 Iteration: 19700 Avg. Training loss: 3.9253 0.1095 sec/batch\n", - "Epoch 5/10 Iteration: 19800 Avg. Training loss: 3.9642 0.1090 sec/batch\n", - "Epoch 5/10 Iteration: 19900 Avg. Training loss: 3.9214 0.1154 sec/batch\n", - "Epoch 5/10 Iteration: 20000 Avg. Training loss: 3.9692 0.1104 sec/batch\n", - "Nearest to for: given, census, to, scriptwriter, first, converged, emeryville, autrefois,\n", - "Nearest to would: disobey, relegated, whyte, habilis, nyquist, capitalistic, busting, maecenas,\n", - "Nearest to known: rtgs, banach, oak, perihelion, satrapies, with, nbi, hoosiers,\n", - "Nearest to used: ceilings, grams, cirth, machining, bp, stds, nazca, epoxy,\n", - "Nearest to at: emi, dominants, travelling, the, italia, degree, breach, surrounding,\n", - "Nearest to such: undercurrent, actus, cc, hinges, license, lysenkoism, group, techniques,\n", - "Nearest to called: supersessionism, vegetative, the, reintroduce, core, bother, denunciations, sealand,\n", - "Nearest to when: edinburgh, ragga, attractive, be, refuse, remove, down, itv,\n", - "Nearest to taking: leopards, rational, garis, go, anoxic, sidgwick, nba, carpal,\n", - "Nearest to consists: eee, chamber, consist, located, cbd, morisot, leblanc, appoints,\n", - "Nearest to scale: exposed, mellin, capricornus, allude, fuse, curved, townes, gears,\n", - "Nearest to units: unit, force, prefixes, fortieth, torsion, typewriter, si, teletype,\n", - "Nearest to ice: plasmodium, pinstripes, louth, soils, pyotr, staining, cools, rink,\n", - "Nearest to instance: lenses, resize, placed, synapses, bookstore, illogical, oscillators, unappreciated,\n", - "Nearest to channel: curler, restructured, dray, creditors, wb, channels, hearsay, dts,\n", - "Nearest to report: credibility, presidents, spirituality, reports, annotated, standish, leaped, timeline,\n", - "Epoch 5/10 Iteration: 20100 Avg. Training loss: 3.9983 0.1107 sec/batch\n", - "Epoch 5/10 Iteration: 20200 Avg. Training loss: 3.9932 0.1185 sec/batch\n", - "Epoch 5/10 Iteration: 20300 Avg. Training loss: 3.9784 0.1098 sec/batch\n", - "Epoch 5/10 Iteration: 20400 Avg. Training loss: 3.9886 0.1104 sec/batch\n", - "Epoch 5/10 Iteration: 20500 Avg. Training loss: 4.0409 0.1045 sec/batch\n", - "Epoch 5/10 Iteration: 20600 Avg. Training loss: 3.9733 0.1048 sec/batch\n", - "Epoch 5/10 Iteration: 20700 Avg. Training loss: 3.9866 0.1072 sec/batch\n", - "Epoch 5/10 Iteration: 20800 Avg. Training loss: 4.0136 0.1085 sec/batch\n", - "Epoch 5/10 Iteration: 20900 Avg. Training loss: 3.9813 0.1100 sec/batch\n", - "Epoch 5/10 Iteration: 21000 Avg. Training loss: 4.0106 0.1119 sec/batch\n", - "Nearest to for: given, census, scriptwriter, first, to, cited, autrefois, awards,\n", - "Nearest to would: disobey, whyte, relegated, nyquist, maecenas, habilis, lege, forbid,\n", - "Nearest to known: banach, rtgs, pisin, satrapies, nbi, hoosiers, sixteenth, perihelion,\n", - "Nearest to used: cirth, bjarne, ceilings, alliances, grams, bp, machining, stds,\n", - "Nearest to at: emi, travelling, dominants, degree, breach, their, the, awarding,\n", - "Nearest to such: lysenkoism, actus, hinges, desired, cc, unfair, plosives, license,\n", - "Nearest to called: supersessionism, bother, reintroduce, the, screenname, denunciations, ripples, core,\n", - "Nearest to when: edinburgh, be, ragga, attractive, refuse, itv, retrospect, remove,\n", - "Nearest to taking: rational, garis, leopards, go, sidgwick, anoxic, salim, nba,\n", - "Nearest to consists: chamber, eee, consist, morisot, leblanc, cbd, located, hydrohalic,\n", - "Nearest to scale: mellin, exposed, capricornus, townes, speciation, allude, fuse, curved,\n", - "Nearest to units: unit, force, prefixes, fortieth, torsion, typewriter, si, kilogram,\n", - "Nearest to ice: louth, pinstripes, rink, pyotr, plasmodium, staining, joaquin, sweden,\n", - "Nearest to instance: lenses, bookstore, unappreciated, resize, illogical, synapses, placed, caesarean,\n", - "Nearest to channel: curler, restructured, wb, dray, creditors, bandwidth, bypassing, mbit,\n", - "Nearest to report: reports, credibility, presidents, spirituality, annotated, standish, leaped, timeline,\n", - "Epoch 5/10 Iteration: 21100 Avg. Training loss: 3.9997 0.1121 sec/batch\n", - "Epoch 5/10 Iteration: 21200 Avg. Training loss: 3.9752 0.1114 sec/batch\n", - "Epoch 5/10 Iteration: 21300 Avg. Training loss: 4.0002 0.1109 sec/batch\n", - "Epoch 5/10 Iteration: 21400 Avg. Training loss: 3.9800 0.1107 sec/batch\n", - "Epoch 5/10 Iteration: 21500 Avg. Training loss: 4.0198 0.1114 sec/batch\n", - "Epoch 5/10 Iteration: 21600 Avg. Training loss: 4.0034 0.1111 sec/batch\n", - "Epoch 5/10 Iteration: 21700 Avg. Training loss: 3.9504 0.1112 sec/batch\n", - "Epoch 5/10 Iteration: 21800 Avg. Training loss: 3.9446 0.1112 sec/batch\n", - "Epoch 5/10 Iteration: 21900 Avg. Training loss: 3.9754 0.1101 sec/batch\n", - "Epoch 5/10 Iteration: 22000 Avg. Training loss: 4.0392 0.1137 sec/batch\n", - "Nearest to for: given, census, scriptwriter, first, to, emeryville, unusually, from,\n", - "Nearest to would: disobey, relegated, whyte, nyquist, maecenas, habilis, in, lege,\n", - "Nearest to known: satrapies, banach, rtgs, pisin, with, oak, yemenite, aalborg,\n", - "Nearest to used: cirth, grams, machining, common, bp, ceilings, other, alliances,\n", - "Nearest to at: emi, travelling, degree, dominants, the, breach, italia, their,\n", - "Nearest to such: lysenkoism, cc, actus, hinges, license, desired, baa, undercurrent,\n", - "Nearest to called: supersessionism, bother, reintroduce, denunciations, sealand, vegetative, ripples, faithless,\n", - "Nearest to when: attractive, edinburgh, refuse, ragga, be, remove, painda, itv,\n", - "Nearest to taking: rational, leopards, garis, go, sidgwick, anoxic, salim, kessinger,\n", - "Nearest to consists: chamber, eee, consist, cbd, located, morisot, leblanc, sint,\n", - "Nearest to scale: exposed, mellin, capricornus, speciation, accede, allude, gears, fuse,\n", - "Nearest to units: unit, prefixes, force, fortieth, typewriter, si, torsion, irl,\n", - "Nearest to ice: louth, rink, pinstripes, plasmodium, cools, pyotr, soils, staining,\n", - "Nearest to instance: lenses, placed, illogical, synapses, unappreciated, bookstore, krugerrand, oscillators,\n", - "Nearest to channel: curler, bandwidth, restructured, dray, wb, channels, mbit, dts,\n", - "Nearest to report: reports, credibility, presidents, annotated, spirituality, standish, focusing, lebanon,\n", - "Epoch 5/10 Iteration: 22100 Avg. Training loss: 3.9926 0.1178 sec/batch\n", - "Epoch 5/10 Iteration: 22200 Avg. Training loss: 4.1086 0.1140 sec/batch\n", - "Epoch 5/10 Iteration: 22300 Avg. Training loss: 4.0173 0.1238 sec/batch\n", - "Epoch 5/10 Iteration: 22400 Avg. Training loss: 4.0545 0.1200 sec/batch\n", - "Epoch 5/10 Iteration: 22500 Avg. Training loss: 3.9600 0.1167 sec/batch\n", - "Epoch 5/10 Iteration: 22600 Avg. Training loss: 3.9318 0.1150 sec/batch\n", - "Epoch 5/10 Iteration: 22700 Avg. Training loss: 3.9985 0.1157 sec/batch\n", - "Epoch 5/10 Iteration: 22800 Avg. Training loss: 3.9130 0.1197 sec/batch\n", - "Epoch 5/10 Iteration: 22900 Avg. Training loss: 3.9757 0.1174 sec/batch\n", - "Epoch 5/10 Iteration: 23000 Avg. Training loss: 3.9773 0.1208 sec/batch\n", - "Nearest to for: given, to, first, scriptwriter, census, the, from, have,\n", - "Nearest to would: disobey, whyte, relegated, nyquist, busting, gimme, habilis, in,\n", - "Nearest to known: banach, rtgs, satrapies, pisin, with, perihelion, usability, oak,\n", - "Nearest to used: cirth, common, grams, machining, use, bp, ceilings, phenol,\n", - "Nearest to at: travelling, degree, emi, the, dominants, breach, italia, awarding,\n", - "Nearest to such: cc, multinationals, lysenkoism, unfair, senegal, group, undercurrent, actus,\n", - "Nearest to called: the, supersessionism, bother, core, ripples, sealand, reintroduce, macedonian,\n", - "Nearest to when: attractive, ragga, edinburgh, remove, be, refuse, itv, retrospect,\n", - "Nearest to taking: go, garis, rational, sidgwick, leopards, salim, anoxic, nba,\n", - "Nearest to consists: chamber, eee, consist, leblanc, morisot, cbd, located, appoints,\n", - "Nearest to scale: mellin, exposed, townes, fuse, gears, curved, capricornus, allude,\n", - "Nearest to units: unit, prefixes, fortieth, force, si, typewriter, torsion, irl,\n", - "Nearest to ice: louth, rink, pyotr, pinstripes, plasmodium, joaquin, soils, gory,\n", - "Nearest to instance: lenses, illogical, placed, synapses, bookstore, unappreciated, healthy, resize,\n", - "Nearest to channel: dray, curler, wb, channels, dts, bandwidth, hearsay, restructured,\n", - "Nearest to report: reports, credibility, presidents, annotated, spirituality, binge, standish, leaped,\n", - "Epoch 5/10 Iteration: 23100 Avg. Training loss: 3.9697 0.1115 sec/batch\n", - "Epoch 6/10 Iteration: 23200 Avg. Training loss: 3.9797 0.0768 sec/batch\n", - "Epoch 6/10 Iteration: 23300 Avg. Training loss: 3.9693 0.1202 sec/batch\n", - "Epoch 6/10 Iteration: 23400 Avg. Training loss: 3.9590 0.1265 sec/batch\n", - "Epoch 6/10 Iteration: 23500 Avg. Training loss: 3.9599 0.1224 sec/batch\n", - "Epoch 6/10 Iteration: 23600 Avg. Training loss: 3.8895 0.1215 sec/batch\n", - "Epoch 6/10 Iteration: 23700 Avg. Training loss: 3.9265 0.1228 sec/batch\n", - "Epoch 6/10 Iteration: 23800 Avg. Training loss: 3.9374 0.1243 sec/batch\n", - "Epoch 6/10 Iteration: 23900 Avg. Training loss: 3.9506 0.1151 sec/batch\n", - "Epoch 6/10 Iteration: 24000 Avg. Training loss: 3.9664 0.1254 sec/batch\n", - "Nearest to for: given, first, to, scriptwriter, the, census, from, converged,\n", - "Nearest to would: whyte, relegated, disobey, busting, in, habilis, gimme, maecenas,\n", - "Nearest to known: rtgs, banach, hoosiers, pisin, nbi, oak, which, perihelion,\n", - "Nearest to used: grams, cirth, common, epoxy, bp, use, machining, commonly,\n", - "Nearest to at: travelling, the, emi, degree, dominants, their, breach, italia,\n", - "Nearest to such: lysenkoism, group, cc, undercurrent, multinationals, actus, hinges, baa,\n", - "Nearest to called: supersessionism, the, bother, reintroduce, denunciations, ripples, systematized, keno,\n", - "Nearest to when: attractive, edinburgh, remove, ragga, refuse, bursa, painda, be,\n", - "Nearest to taking: go, rational, garis, leopards, salim, sidgwick, anoxic, nba,\n", - "Nearest to consists: chamber, consist, eee, located, leblanc, cbd, sint, hydrohalic,\n", - "Nearest to scale: mellin, townes, exposed, capricornus, gears, diatonic, curved, allude,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, force, torsion, irl,\n", - "Nearest to ice: louth, rink, soils, joaquin, pyotr, pinstripes, plasmodium, cools,\n", - "Nearest to instance: lenses, bookstore, illogical, placed, synapses, unappreciated, caesarean, healthy,\n", - "Nearest to channel: curler, wb, dray, creditors, dts, channels, mbit, restructured,\n", - "Nearest to report: reports, credibility, spirituality, annotated, presidents, standish, lebanon, binge,\n", - "Epoch 6/10 Iteration: 24100 Avg. Training loss: 3.9397 0.1236 sec/batch\n", - "Epoch 6/10 Iteration: 24200 Avg. Training loss: 3.9810 0.1160 sec/batch\n", - "Epoch 6/10 Iteration: 24300 Avg. Training loss: 3.8346 0.1265 sec/batch\n", - "Epoch 6/10 Iteration: 24400 Avg. Training loss: 3.9313 0.1289 sec/batch\n", - "Epoch 6/10 Iteration: 24500 Avg. Training loss: 3.8972 0.1195 sec/batch\n", - "Epoch 6/10 Iteration: 24600 Avg. Training loss: 3.8997 0.1186 sec/batch\n", - "Epoch 6/10 Iteration: 24700 Avg. Training loss: 3.9321 0.1139 sec/batch\n", - "Epoch 6/10 Iteration: 24800 Avg. Training loss: 3.9608 0.1289 sec/batch\n", - "Epoch 6/10 Iteration: 24900 Avg. Training loss: 3.9414 0.1107 sec/batch\n", - "Epoch 6/10 Iteration: 25000 Avg. Training loss: 3.9407 0.1113 sec/batch\n", - "Nearest to for: given, to, first, the, scriptwriter, have, from, census,\n", - "Nearest to would: relegated, whyte, disobey, busting, nyquist, in, habilis, coastlands,\n", - "Nearest to known: rtgs, banach, hoosiers, with, which, pisin, charcoal, oak,\n", - "Nearest to used: cirth, grams, common, epoxy, is, use, invented, commonly,\n", - "Nearest to at: the, degree, travelling, emi, dominants, of, awarding, their,\n", - "Nearest to such: cc, group, lysenkoism, hinges, multinationals, undercurrent, actus, baa,\n", - "Nearest to called: the, supersessionism, core, bother, denunciations, keno, reintroduce, systematized,\n", - "Nearest to when: attractive, be, edinburgh, remove, ragga, refuse, retrospect, itv,\n", - "Nearest to taking: go, rational, leopards, garis, salim, sidgwick, anoxic, carpal,\n", - "Nearest to consists: chamber, consist, eee, located, leblanc, calderon, sint, cbd,\n", - "Nearest to scale: mellin, gears, townes, exposed, capricornus, diatonic, fuse, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, torsion, hubei,\n", - "Nearest to ice: louth, rink, joaquin, pyotr, plasmodium, soils, pinstripes, cools,\n", - "Nearest to instance: lenses, placed, bookstore, resize, synapses, unappreciated, jimbo, illogical,\n", - "Nearest to channel: dts, creditors, mbit, curler, wb, bandwidth, channels, hearsay,\n", - "Nearest to report: reports, credibility, annotated, presidents, spirituality, binge, standish, focusing,\n", - "Epoch 6/10 Iteration: 25100 Avg. Training loss: 4.0258 0.1102 sec/batch\n", - "Epoch 6/10 Iteration: 25200 Avg. Training loss: 3.9340 0.1118 sec/batch\n", - "Epoch 6/10 Iteration: 25300 Avg. Training loss: 3.9212 0.1136 sec/batch\n", - "Epoch 6/10 Iteration: 25400 Avg. Training loss: 3.9460 0.1095 sec/batch\n", - "Epoch 6/10 Iteration: 25500 Avg. Training loss: 3.9257 0.1138 sec/batch\n", - "Epoch 6/10 Iteration: 25600 Avg. Training loss: 3.9545 0.1245 sec/batch\n", - "Epoch 6/10 Iteration: 25700 Avg. Training loss: 3.9430 0.1241 sec/batch\n", - "Epoch 6/10 Iteration: 25800 Avg. Training loss: 3.9479 0.1211 sec/batch\n", - "Epoch 6/10 Iteration: 25900 Avg. Training loss: 3.9151 0.1171 sec/batch\n", - "Epoch 6/10 Iteration: 26000 Avg. Training loss: 3.9370 0.1135 sec/batch\n", - "Nearest to for: given, first, to, scriptwriter, by, from, have, the,\n", - "Nearest to would: in, disobey, whyte, relegated, preeminence, lege, nyquist, that,\n", - "Nearest to known: banach, pisin, rtgs, hoosiers, satrapies, which, named, oak,\n", - "Nearest to used: cirth, alliances, invented, machining, is, common, use, grams,\n", - "Nearest to at: the, travelling, degree, emi, dominants, of, their, awarding,\n", - "Nearest to such: group, cc, lysenkoism, hinges, unfair, actus, baa, multinationals,\n", - "Nearest to called: supersessionism, bother, the, denunciations, core, sealand, reintroduce, anakkale,\n", - "Nearest to when: attractive, edinburgh, refuse, ragga, be, remove, painda, itv,\n", - "Nearest to taking: go, rational, sidgwick, garis, salim, leopards, carpal, dedicates,\n", - "Nearest to consists: chamber, consist, eee, leblanc, calderon, morisot, sint, located,\n", - "Nearest to scale: mellin, townes, exposed, capricornus, effects, accede, allude, correlations,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, torsion, hubei,\n", - "Nearest to ice: louth, rink, plasmodium, pyotr, joaquin, soils, cools, pinstripes,\n", - "Nearest to instance: lenses, placed, resize, bookstore, unappreciated, illogical, synapses, consented,\n", - "Nearest to channel: curler, creditors, mbit, dts, bandwidth, wb, dray, restructured,\n", - "Nearest to report: reports, credibility, presidents, annotated, santer, haight, standish, lebanon,\n", - "Epoch 6/10 Iteration: 26100 Avg. Training loss: 3.9495 0.1184 sec/batch\n", - "Epoch 6/10 Iteration: 26200 Avg. Training loss: 3.9339 0.1132 sec/batch\n", - "Epoch 6/10 Iteration: 26300 Avg. Training loss: 3.9436 0.1120 sec/batch\n", - "Epoch 6/10 Iteration: 26400 Avg. Training loss: 3.9021 0.1305 sec/batch\n", - "Epoch 6/10 Iteration: 26500 Avg. Training loss: 3.9170 0.1217 sec/batch\n", - "Epoch 6/10 Iteration: 26600 Avg. Training loss: 3.9391 0.1154 sec/batch\n", - "Epoch 6/10 Iteration: 26700 Avg. Training loss: 3.9181 0.1176 sec/batch\n", - "Epoch 6/10 Iteration: 26800 Avg. Training loss: 4.0194 0.1174 sec/batch\n", - "Epoch 6/10 Iteration: 26900 Avg. Training loss: 4.0194 0.1122 sec/batch\n", - "Epoch 6/10 Iteration: 27000 Avg. Training loss: 3.9875 0.1128 sec/batch\n", - "Nearest to for: given, first, scriptwriter, from, to, the, have, census,\n", - "Nearest to would: disobey, relegated, whyte, in, lege, that, maecenas, coastlands,\n", - "Nearest to known: hoosiers, banach, pisin, oak, with, named, nbi, millions,\n", - "Nearest to used: cirth, invented, use, bunyan, commonly, machining, common, paused,\n", - "Nearest to at: travelling, the, emi, degree, dominants, of, breach, leadbelly,\n", - "Nearest to such: actus, cc, lysenkoism, unfair, hinges, baa, musical, plosives,\n", - "Nearest to called: bother, supersessionism, the, anakkale, keno, denunciations, reintroduce, distinctive,\n", - "Nearest to when: edinburgh, attractive, refuse, painda, remove, scotland, trouble, ragga,\n", - "Nearest to taking: go, sidgwick, rational, salim, garis, leopards, anoxic, dedicates,\n", - "Nearest to consists: chamber, consist, eee, leblanc, sint, calderon, morisot, located,\n", - "Nearest to scale: mellin, diatonic, exposed, accede, effects, gears, capricornus, townes,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, hubei, trucial,\n", - "Nearest to ice: rink, louth, pyotr, joaquin, plasmodium, pinstripes, gory, soils,\n", - "Nearest to instance: lenses, placed, illogical, bookstore, consented, unappreciated, philos, contacts,\n", - "Nearest to channel: creditors, curler, channels, dray, restructured, hearsay, mbit, dts,\n", - "Nearest to report: reports, credibility, presidents, annotated, santer, lebanon, standish, haight,\n", - "Epoch 6/10 Iteration: 27100 Avg. Training loss: 3.9083 0.1172 sec/batch\n", - "Epoch 6/10 Iteration: 27200 Avg. Training loss: 3.9032 0.1138 sec/batch\n", - "Epoch 6/10 Iteration: 27300 Avg. Training loss: 3.9424 0.1262 sec/batch\n", - "Epoch 6/10 Iteration: 27400 Avg. Training loss: 3.8443 0.1288 sec/batch\n", - "Epoch 6/10 Iteration: 27500 Avg. Training loss: 3.9509 0.1284 sec/batch\n", - "Epoch 6/10 Iteration: 27600 Avg. Training loss: 3.9196 0.1230 sec/batch\n", - "Epoch 6/10 Iteration: 27700 Avg. Training loss: 3.9078 0.1216 sec/batch\n", - "Epoch 7/10 Iteration: 27800 Avg. Training loss: 3.9767 0.0466 sec/batch\n", - "Epoch 7/10 Iteration: 27900 Avg. Training loss: 3.8898 0.1218 sec/batch\n", - "Epoch 7/10 Iteration: 28000 Avg. Training loss: 3.9203 0.1215 sec/batch\n", - "Nearest to for: given, scriptwriter, first, to, the, census, have, from,\n", - "Nearest to would: disobey, whyte, relegated, coastlands, lege, that, busting, atomic,\n", - "Nearest to known: with, hoosiers, banach, named, pisin, which, rtgs, oak,\n", - "Nearest to used: cirth, commonly, use, machining, stds, invented, netbios, is,\n", - "Nearest to at: travelling, the, degree, dominants, emi, of, breach, leadbelly,\n", - "Nearest to such: lysenkoism, multinationals, actus, group, unfair, hinges, cc, baa,\n", - "Nearest to called: the, bother, supersessionism, anakkale, systematized, keno, denunciations, core,\n", - "Nearest to when: attractive, refuse, edinburgh, painda, remove, be, scotland, trouble,\n", - "Nearest to taking: go, rational, chinguetti, garis, nba, anoxic, boosts, salim,\n", - "Nearest to consists: chamber, eee, consist, leblanc, located, sint, calderon, cbd,\n", - "Nearest to scale: diatonic, mellin, gears, townes, effects, accede, fretting, capricornus,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, kilogram, sumo,\n", - "Nearest to ice: rink, louth, pyotr, plasmodium, joaquin, pinstripes, gory, zubr,\n", - "Nearest to instance: lenses, placed, illogical, bookstore, resize, attitudes, oscillators, unappreciated,\n", - "Nearest to channel: channels, curler, wb, creditors, dray, mbit, dts, hearsay,\n", - "Nearest to report: reports, credibility, annotated, presidents, spirituality, standish, haight, comprehensive,\n", - "Epoch 7/10 Iteration: 28100 Avg. Training loss: 3.8978 0.1224 sec/batch\n", - "Epoch 7/10 Iteration: 28200 Avg. Training loss: 3.9022 0.1212 sec/batch\n", - "Epoch 7/10 Iteration: 28300 Avg. Training loss: 3.9255 0.1210 sec/batch\n", - "Epoch 7/10 Iteration: 28400 Avg. Training loss: 3.9095 0.1189 sec/batch\n", - "Epoch 7/10 Iteration: 28500 Avg. Training loss: 3.8764 0.1190 sec/batch\n", - "Epoch 7/10 Iteration: 28600 Avg. Training loss: 3.9017 0.1203 sec/batch\n", - "Epoch 7/10 Iteration: 28700 Avg. Training loss: 3.9144 0.1210 sec/batch\n", - "Epoch 7/10 Iteration: 28800 Avg. Training loss: 3.9431 0.1213 sec/batch\n", - "Epoch 7/10 Iteration: 28900 Avg. Training loss: 3.8440 0.1219 sec/batch\n", - "Epoch 7/10 Iteration: 29000 Avg. Training loss: 3.9068 0.1244 sec/batch\n", - "Nearest to for: to, given, the, first, have, from, and, scriptwriter,\n", - "Nearest to would: relegated, coastlands, disobey, that, whyte, in, habilis, lege,\n", - "Nearest to known: with, hoosiers, pisin, banach, which, oak, named, rtgs,\n", - "Nearest to used: use, cirth, commonly, is, grams, machining, epoxy, invented,\n", - "Nearest to at: the, travelling, dominants, emi, of, degree, two, meeting,\n", - "Nearest to such: multinationals, unfair, lysenkoism, group, pashtuns, many, actus, hinges,\n", - "Nearest to called: the, supersessionism, bother, anakkale, core, denunciations, systematized, keno,\n", - "Nearest to when: attractive, remove, refuse, retrospect, edinburgh, be, painda, itv,\n", - "Nearest to taking: go, rational, salim, nba, chinguetti, anoxic, garis, levees,\n", - "Nearest to consists: chamber, consist, eee, located, leblanc, calderon, sint, cbd,\n", - "Nearest to scale: diatonic, mellin, capricornus, townes, suggests, motherhood, accede, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, dera, sumo,\n", - "Nearest to ice: rink, louth, pyotr, plasmodium, joaquin, pinstripes, zubr, cools,\n", - "Nearest to instance: placed, lenses, bookstore, resize, unappreciated, contacts, illogical, envisage,\n", - "Nearest to channel: channels, curler, creditors, wb, dray, bandwidth, mbit, restructured,\n", - "Nearest to report: reports, credibility, annotated, spirituality, presidents, comprehensive, focusing, html,\n", - "Epoch 7/10 Iteration: 29100 Avg. Training loss: 3.8945 0.1254 sec/batch\n", - "Epoch 7/10 Iteration: 29200 Avg. Training loss: 3.8284 0.1224 sec/batch\n", - "Epoch 7/10 Iteration: 29300 Avg. Training loss: 3.8781 0.1231 sec/batch\n", - "Epoch 7/10 Iteration: 29400 Avg. Training loss: 3.9094 0.1229 sec/batch\n", - "Epoch 7/10 Iteration: 29500 Avg. Training loss: 3.8962 0.1207 sec/batch\n", - "Epoch 7/10 Iteration: 29600 Avg. Training loss: 3.8959 0.1095 sec/batch\n", - "Epoch 7/10 Iteration: 29700 Avg. Training loss: 3.9419 0.1060 sec/batch\n", - "Epoch 7/10 Iteration: 29800 Avg. Training loss: 3.9093 0.1057 sec/batch\n", - "Epoch 7/10 Iteration: 29900 Avg. Training loss: 3.8714 0.1004 sec/batch\n", - "Epoch 7/10 Iteration: 30000 Avg. Training loss: 3.8931 0.1013 sec/batch\n", - "Nearest to for: given, first, scriptwriter, to, the, have, census, from,\n", - "Nearest to would: relegated, that, disobey, lege, whyte, coastlands, in, nyquist,\n", - "Nearest to known: banach, with, pisin, which, hoosiers, rtgs, nbi, first,\n", - "Nearest to used: is, use, commonly, cirth, netbios, invented, grams, common,\n", - "Nearest to at: the, travelling, dominants, emi, of, degree, surrounding, aviators,\n", - "Nearest to such: lysenkoism, unfair, cc, other, actus, hinges, desired, group,\n", - "Nearest to called: the, supersessionism, bother, core, systematized, denunciations, rearranged, eusocial,\n", - "Nearest to when: be, attractive, remove, edinburgh, refuse, trouble, itv, retrospect,\n", - "Nearest to taking: go, rational, salim, xo, anoxic, garis, chinguetti, nba,\n", - "Nearest to consists: chamber, consist, eee, leblanc, calderon, conscience, hydrohalic, located,\n", - "Nearest to scale: diatonic, mellin, capricornus, suggests, townes, correlations, accede, motherhood,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, dera, hubei,\n", - "Nearest to ice: rink, louth, pyotr, plasmodium, joaquin, pinstripes, zubr, gory,\n", - "Nearest to instance: placed, lenses, bookstore, contacts, envisage, geometrically, consented, illogical,\n", - "Nearest to channel: creditors, curler, wb, hearsay, channels, transmitters, dts, mbit,\n", - "Nearest to report: reports, credibility, annotated, spirituality, santer, presidents, comprehensive, lebanon,\n", - "Epoch 7/10 Iteration: 30100 Avg. Training loss: 3.9198 0.1057 sec/batch\n", - "Epoch 7/10 Iteration: 30200 Avg. Training loss: 3.9272 0.1015 sec/batch\n", - "Epoch 7/10 Iteration: 30300 Avg. Training loss: 3.9112 0.1014 sec/batch\n", - "Epoch 7/10 Iteration: 30400 Avg. Training loss: 3.8940 0.1035 sec/batch\n", - "Epoch 7/10 Iteration: 30500 Avg. Training loss: 3.9486 0.1055 sec/batch\n", - "Epoch 7/10 Iteration: 30600 Avg. Training loss: 3.9379 0.1060 sec/batch\n", - "Epoch 7/10 Iteration: 30700 Avg. Training loss: 3.8933 0.1067 sec/batch\n", - "Epoch 7/10 Iteration: 30800 Avg. Training loss: 3.8929 0.1102 sec/batch\n", - "Epoch 7/10 Iteration: 30900 Avg. Training loss: 3.9001 0.1094 sec/batch\n", - "Epoch 7/10 Iteration: 31000 Avg. Training loss: 3.8601 0.1133 sec/batch\n", - "Nearest to for: given, the, to, first, scriptwriter, by, in, of,\n", - "Nearest to would: relegated, that, disobey, coastlands, lege, whyte, in, maecenas,\n", - "Nearest to known: with, which, first, banach, hoosiers, pisin, aalborg, millions,\n", - "Nearest to used: use, cirth, commonly, common, invented, is, netbios, grams,\n", - "Nearest to at: the, travelling, of, dominants, degree, emi, as, to,\n", - "Nearest to such: lysenkoism, unfair, cc, hinges, group, plosives, other, baa,\n", - "Nearest to called: the, bother, supersessionism, denunciations, anakkale, keno, distinctive, eusocial,\n", - "Nearest to when: attractive, be, edinburgh, remove, scotland, trouble, refuse, painda,\n", - "Nearest to taking: go, rational, anoxic, salim, xo, sidgwick, boosts, regrettable,\n", - "Nearest to consists: chamber, consist, leblanc, eee, calderon, morisot, conscience, sint,\n", - "Nearest to scale: diatonic, mellin, effects, capricornus, suggests, correlations, agglomeration, motherhood,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, dera, hubei,\n", - "Nearest to ice: rink, louth, joaquin, pyotr, plasmodium, zubr, sweden, soils,\n", - "Nearest to instance: placed, bookstore, husband, lenses, contacts, pasts, wong, envisage,\n", - "Nearest to channel: creditors, curler, hearsay, channels, dray, restructured, wb, mbit,\n", - "Nearest to report: reports, credibility, santer, annotated, standish, presidents, spirituality, comprehensive,\n", - "Epoch 7/10 Iteration: 31100 Avg. Training loss: 3.9213 0.1056 sec/batch\n", - "Epoch 7/10 Iteration: 31200 Avg. Training loss: 3.8905 0.1058 sec/batch\n", - "Epoch 7/10 Iteration: 31300 Avg. Training loss: 3.8990 0.1132 sec/batch\n", - "Epoch 7/10 Iteration: 31400 Avg. Training loss: 3.9640 0.1252 sec/batch\n", - "Epoch 7/10 Iteration: 31500 Avg. Training loss: 3.9684 0.1159 sec/batch\n", - "Epoch 7/10 Iteration: 31600 Avg. Training loss: 3.9861 0.1196 sec/batch\n", - "Epoch 7/10 Iteration: 31700 Avg. Training loss: 3.9020 0.1109 sec/batch\n", - "Epoch 7/10 Iteration: 31800 Avg. Training loss: 3.8697 0.1079 sec/batch\n", - "Epoch 7/10 Iteration: 31900 Avg. Training loss: 3.9195 0.1062 sec/batch\n", - "Epoch 7/10 Iteration: 32000 Avg. Training loss: 3.7972 0.1137 sec/batch\n", - "Nearest to for: given, to, the, first, scriptwriter, by, and, have,\n", - "Nearest to would: that, relegated, coastlands, disobey, to, lege, in, busting,\n", - "Nearest to known: with, which, hoosiers, pisin, first, banach, millions, aalborg,\n", - "Nearest to used: use, commonly, common, cirth, netbios, is, bunyan, invented,\n", - "Nearest to at: the, travelling, emi, of, degree, dominants, to, s,\n", - "Nearest to such: unfair, cc, other, lysenkoism, group, pashtuns, hinges, multinationals,\n", - "Nearest to called: the, supersessionism, bother, denunciations, anakkale, is, keno, instituted,\n", - "Nearest to when: be, remove, attractive, edinburgh, trouble, refuse, painda, scotland,\n", - "Nearest to taking: go, rational, salim, boosts, xo, anoxic, sidgwick, regrettable,\n", - "Nearest to consists: chamber, consist, eee, appoints, leblanc, calderon, conscience, couturat,\n", - "Nearest to scale: diatonic, mellin, effects, motherhood, suggests, capricornus, correlations, townes,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, dera, kilogram,\n", - "Nearest to ice: rink, louth, pyotr, joaquin, plasmodium, sweden, indoor, zubr,\n", - "Nearest to instance: placed, lenses, bookstore, contacts, philos, illogical, envisage, kruskal,\n", - "Nearest to channel: creditors, hearsay, curler, wb, channels, dray, mbit, bandwidth,\n", - "Nearest to report: reports, credibility, annotated, santer, presidents, spirituality, haight, focusing,\n", - "Epoch 7/10 Iteration: 32100 Avg. Training loss: 3.9153 0.1189 sec/batch\n", - "Epoch 7/10 Iteration: 32200 Avg. Training loss: 3.9433 0.1161 sec/batch\n", - "Epoch 7/10 Iteration: 32300 Avg. Training loss: 3.9029 0.1209 sec/batch\n", - "Epoch 8/10 Iteration: 32400 Avg. Training loss: 3.9170 0.0138 sec/batch\n", - "Epoch 8/10 Iteration: 32500 Avg. Training loss: 3.8952 0.1250 sec/batch\n", - "Epoch 8/10 Iteration: 32600 Avg. Training loss: 3.8827 0.1306 sec/batch\n", - "Epoch 8/10 Iteration: 32700 Avg. Training loss: 3.8966 0.1219 sec/batch\n", - "Epoch 8/10 Iteration: 32800 Avg. Training loss: 3.9122 0.1221 sec/batch\n", - "Epoch 8/10 Iteration: 32900 Avg. Training loss: 3.8753 0.1216 sec/batch\n", - "Epoch 8/10 Iteration: 33000 Avg. Training loss: 3.8522 0.1206 sec/batch\n", - "Nearest to for: to, given, the, and, first, by, in, have,\n", - "Nearest to would: that, in, relegated, coastlands, to, disobey, whyte, lege,\n", - "Nearest to known: which, first, with, hoosiers, most, millions, pisin, many,\n", - "Nearest to used: use, commonly, common, is, netbios, cirth, other, for,\n", - "Nearest to at: the, travelling, of, to, dominants, later, as, s,\n", - "Nearest to such: other, group, lysenkoism, multinationals, unfair, hinges, cc, actus,\n", - "Nearest to called: bother, the, supersessionism, is, denunciations, instituted, keno, ripples,\n", - "Nearest to when: remove, be, attractive, edinburgh, refuse, painda, trouble, retrospect,\n", - "Nearest to taking: go, salim, levees, boosts, xo, nba, anoxic, nsaids,\n", - "Nearest to consists: chamber, consist, eee, conscience, sint, couturat, leblanc, calderon,\n", - "Nearest to scale: diatonic, mellin, capricornus, motherhood, gears, suggests, agglomeration, tuning,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, hubei, force, dera,\n", - "Nearest to ice: rink, louth, pyotr, joaquin, plasmodium, sweden, gory, zubr,\n", - "Nearest to instance: placed, bookstore, husband, lenses, illogical, attitudes, pasts, herders,\n", - "Nearest to channel: creditors, wb, mbit, curler, channels, bandwidth, hearsay, transmitters,\n", - "Nearest to report: reports, credibility, annotated, standish, spirituality, presidents, santer, focusing,\n", - "Epoch 8/10 Iteration: 33100 Avg. Training loss: 3.8330 0.1218 sec/batch\n", - "Epoch 8/10 Iteration: 33200 Avg. Training loss: 3.8716 0.1212 sec/batch\n", - "Epoch 8/10 Iteration: 33300 Avg. Training loss: 3.8915 0.1208 sec/batch\n", - "Epoch 8/10 Iteration: 33400 Avg. Training loss: 3.9107 0.1212 sec/batch\n", - "Epoch 8/10 Iteration: 33500 Avg. Training loss: 3.8661 0.1210 sec/batch\n", - "Epoch 8/10 Iteration: 33600 Avg. Training loss: 3.8355 0.1189 sec/batch\n", - "Epoch 8/10 Iteration: 33700 Avg. Training loss: 3.8342 0.1208 sec/batch\n", - "Epoch 8/10 Iteration: 33800 Avg. Training loss: 3.7842 0.1212 sec/batch\n", - "Epoch 8/10 Iteration: 33900 Avg. Training loss: 3.8311 0.1226 sec/batch\n", - "Epoch 8/10 Iteration: 34000 Avg. Training loss: 3.8845 0.1218 sec/batch\n", - "Nearest to for: to, the, given, and, in, have, first, by,\n", - "Nearest to would: that, relegated, to, in, with, coastlands, yet, accelerations,\n", - "Nearest to known: with, which, first, hoosiers, most, many, millions, banach,\n", - "Nearest to used: is, commonly, use, common, grams, for, other, cirth,\n", - "Nearest to at: the, of, travelling, dominants, to, as, degree, two,\n", - "Nearest to such: other, and, as, group, can, cc, exotic, actus,\n", - "Nearest to called: the, is, supersessionism, bother, of, denunciations, a, rearranged,\n", - "Nearest to when: be, remove, attractive, refuse, tire, initial, painda, headers,\n", - "Nearest to taking: go, rational, levees, xo, nsaids, salim, boosts, nba,\n", - "Nearest to consists: consist, chamber, calderon, eee, conscience, located, couturat, leblanc,\n", - "Nearest to scale: diatonic, mellin, suggests, capricornus, motherhood, gears, townes, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, force, hubei, dera,\n", - "Nearest to ice: rink, louth, pyotr, plasmodium, joaquin, sweden, detection, ussr,\n", - "Nearest to instance: placed, bookstore, lenses, oscillators, resize, xa, philos, barcodes,\n", - "Nearest to channel: creditors, channels, mbit, wb, curler, dts, restructured, dray,\n", - "Nearest to report: reports, credibility, annotated, santer, presidents, standish, spirituality, focusing,\n", - "Epoch 8/10 Iteration: 34100 Avg. Training loss: 3.8751 0.1228 sec/batch\n", - "Epoch 8/10 Iteration: 34200 Avg. Training loss: 3.8528 0.1223 sec/batch\n", - "Epoch 8/10 Iteration: 34300 Avg. Training loss: 3.9067 0.1178 sec/batch\n", - "Epoch 8/10 Iteration: 34400 Avg. Training loss: 3.8909 0.1161 sec/batch\n", - "Epoch 8/10 Iteration: 34500 Avg. Training loss: 3.8444 0.1158 sec/batch\n", - "Epoch 8/10 Iteration: 34600 Avg. Training loss: 3.8552 0.1208 sec/batch\n", - "Epoch 8/10 Iteration: 34700 Avg. Training loss: 3.8861 0.1260 sec/batch\n", - "Epoch 8/10 Iteration: 34800 Avg. Training loss: 3.8621 0.1159 sec/batch\n", - "Epoch 8/10 Iteration: 34900 Avg. Training loss: 3.8820 0.1110 sec/batch\n", - "Epoch 8/10 Iteration: 35000 Avg. Training loss: 3.9116 0.1115 sec/batch\n", - "Nearest to for: to, given, the, and, by, have, in, first,\n", - "Nearest to would: that, to, relegated, in, accelerations, yet, than, it,\n", - "Nearest to known: which, with, first, pisin, most, hoosiers, banach, millions,\n", - "Nearest to used: is, use, common, commonly, cirth, occasionally, for, invented,\n", - "Nearest to at: the, travelling, of, dominants, to, as, degree, s,\n", - "Nearest to such: other, as, and, can, group, lysenkoism, cc, hinges,\n", - "Nearest to called: the, bother, supersessionism, is, denunciations, rearranged, anakkale, timbres,\n", - "Nearest to when: be, remove, attractive, painda, refuse, trouble, edinburgh, initial,\n", - "Nearest to taking: go, rational, salim, levees, nsaids, xo, pia, regrettable,\n", - "Nearest to consists: consist, chamber, calderon, conscience, leblanc, couturat, eee, sint,\n", - "Nearest to scale: diatonic, mellin, suggests, capricornus, motherhood, trillions, correlations, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, hubei, dera,\n", - "Nearest to ice: rink, louth, pyotr, joaquin, plasmodium, sweden, ussr, pontine,\n", - "Nearest to instance: placed, bookstore, lenses, contacts, geometrically, pasts, oscillators, robby,\n", - "Nearest to channel: creditors, curler, mbit, wb, restructured, dts, dray, channels,\n", - "Nearest to report: reports, credibility, santer, annotated, focusing, html, standish, comprehensive,\n", - "Epoch 8/10 Iteration: 35100 Avg. Training loss: 3.8544 0.1112 sec/batch\n", - "Epoch 8/10 Iteration: 35200 Avg. Training loss: 3.8741 0.1111 sec/batch\n", - "Epoch 8/10 Iteration: 35300 Avg. Training loss: 3.8893 0.1121 sec/batch\n", - "Epoch 8/10 Iteration: 35400 Avg. Training loss: 3.8901 0.1112 sec/batch\n", - "Epoch 8/10 Iteration: 35500 Avg. Training loss: 3.8736 0.1117 sec/batch\n", - "Epoch 8/10 Iteration: 35600 Avg. Training loss: 3.8698 0.1114 sec/batch\n", - "Epoch 8/10 Iteration: 35700 Avg. Training loss: 3.8237 0.1114 sec/batch\n", - "Epoch 8/10 Iteration: 35800 Avg. Training loss: 3.8605 0.1120 sec/batch\n", - "Epoch 8/10 Iteration: 35900 Avg. Training loss: 3.9338 0.1116 sec/batch\n", - "Epoch 8/10 Iteration: 36000 Avg. Training loss: 3.8586 0.1116 sec/batch\n", - "Nearest to for: given, the, to, and, in, first, scriptwriter, by,\n", - "Nearest to would: that, to, in, relegated, coastlands, yet, lege, with,\n", - "Nearest to known: which, with, first, hoosiers, millions, seventeenth, banach, pisin,\n", - "Nearest to used: is, common, commonly, use, cirth, netbios, often, invented,\n", - "Nearest to at: the, of, travelling, as, s, to, later, in,\n", - "Nearest to such: other, as, lysenkoism, actus, cc, group, hinges, types,\n", - "Nearest to called: bother, the, supersessionism, denunciations, keno, is, timbres, anakkale,\n", - "Nearest to when: be, the, painda, edinburgh, remove, scotland, refuse, trouble,\n", - "Nearest to taking: go, salim, pia, nsaids, xo, rational, levees, diva,\n", - "Nearest to consists: consist, chamber, calderon, eee, sint, conscience, couturat, leblanc,\n", - "Nearest to scale: diatonic, motherhood, capricornus, mellin, suggests, effects, correlations, trillions,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, force, dera, hubei,\n", - "Nearest to ice: rink, joaquin, louth, pyotr, plasmodium, sweden, ussr, hockey,\n", - "Nearest to instance: placed, geometrically, bookstore, philos, oscillators, kruskal, pasts, lenses,\n", - "Nearest to channel: creditors, mbit, channels, curler, wb, bandwidth, restructured, hearsay,\n", - "Nearest to report: reports, credibility, santer, focusing, annotated, comprehensive, standish, html,\n", - "Epoch 8/10 Iteration: 36100 Avg. Training loss: 3.9513 0.1133 sec/batch\n", - "Epoch 8/10 Iteration: 36200 Avg. Training loss: 3.9537 0.1111 sec/batch\n", - "Epoch 8/10 Iteration: 36300 Avg. Training loss: 3.8965 0.1114 sec/batch\n", - "Epoch 8/10 Iteration: 36400 Avg. Training loss: 3.8243 0.1119 sec/batch\n", - "Epoch 8/10 Iteration: 36500 Avg. Training loss: 3.8824 0.1117 sec/batch\n", - "Epoch 8/10 Iteration: 36600 Avg. Training loss: 3.8074 0.1114 sec/batch\n", - "Epoch 8/10 Iteration: 36700 Avg. Training loss: 3.8481 0.1124 sec/batch\n", - "Epoch 8/10 Iteration: 36800 Avg. Training loss: 3.8889 0.1118 sec/batch\n", - "Epoch 8/10 Iteration: 36900 Avg. Training loss: 3.8722 0.1119 sec/batch\n", - "Epoch 8/10 Iteration: 37000 Avg. Training loss: 3.8919 0.1121 sec/batch\n", - "Nearest to for: to, given, the, and, by, in, scriptwriter, have,\n", - "Nearest to would: that, to, with, relegated, coastlands, lege, yet, maecenas,\n", - "Nearest to known: which, with, most, hoosiers, many, the, first, pisin,\n", - "Nearest to used: commonly, use, is, netbios, common, other, cirth, for,\n", - "Nearest to at: the, travelling, to, as, dominants, s, of, emi,\n", - "Nearest to such: as, other, many, group, and, exotic, pashtuns, cc,\n", - "Nearest to called: the, bother, supersessionism, of, denunciations, keno, philology, systematized,\n", - "Nearest to when: be, remove, attractive, was, painda, marysville, edinburgh, the,\n", - "Nearest to taking: go, levees, xo, nsaids, nba, boosts, salim, pia,\n", - "Nearest to consists: chamber, calderon, consist, conscience, couturat, eee, appoints, leblanc,\n", - "Nearest to scale: diatonic, mellin, accidentals, motherhood, capricornus, suggests, gears, scales,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, dera, kilogram,\n", - "Nearest to ice: rink, joaquin, pyotr, louth, sweden, hockey, plasmodium, ussr,\n", - "Nearest to instance: placed, bookstore, pasts, geometrically, oscillators, philos, kruskal, husband,\n", - "Nearest to channel: creditors, mbit, curler, channels, wb, hearsay, bandwidth, dts,\n", - "Nearest to report: reports, credibility, annotated, santer, focusing, standish, html, comprehensive,\n", - "Epoch 9/10 Iteration: 37100 Avg. Training loss: 3.8941 0.0937 sec/batch\n", - "Epoch 9/10 Iteration: 37200 Avg. Training loss: 3.8418 0.1114 sec/batch\n", - "Epoch 9/10 Iteration: 37300 Avg. Training loss: 3.8491 0.1207 sec/batch\n", - "Epoch 9/10 Iteration: 37400 Avg. Training loss: 3.8795 0.1237 sec/batch\n", - "Epoch 9/10 Iteration: 37500 Avg. Training loss: 3.8064 0.1177 sec/batch\n", - "Epoch 9/10 Iteration: 37600 Avg. Training loss: 3.8517 0.1224 sec/batch\n", - "Epoch 9/10 Iteration: 37700 Avg. Training loss: 3.8122 0.1167 sec/batch\n", - "Epoch 9/10 Iteration: 37800 Avg. Training loss: 3.8771 0.1231 sec/batch\n", - "Epoch 9/10 Iteration: 37900 Avg. Training loss: 3.8810 0.1157 sec/batch\n", - "Epoch 9/10 Iteration: 38000 Avg. Training loss: 3.8750 0.1181 sec/batch\n", - "Nearest to for: the, to, and, in, given, by, first, a,\n", - "Nearest to would: that, to, with, relegated, in, than, coastlands, asians,\n", - "Nearest to known: which, most, with, hoosiers, first, and, many, name,\n", - "Nearest to used: commonly, use, is, common, netbios, cirth, as, other,\n", - "Nearest to at: the, of, two, as, and, travelling, to, s,\n", - "Nearest to such: other, as, can, group, lysenkoism, exotic, many, american,\n", - "Nearest to called: the, bother, supersessionism, hardin, is, of, anakkale, eusocial,\n", - "Nearest to when: be, was, painda, attractive, initial, trouble, remove, but,\n", - "Nearest to taking: go, pia, salim, xo, levees, nba, boosts, fugees,\n", - "Nearest to consists: chamber, calderon, consist, conscience, couturat, eee, sint, appoints,\n", - "Nearest to scale: diatonic, motherhood, capricornus, correlations, mellin, chords, gears, trillions,\n", - "Nearest to units: unit, prefixes, fortieth, si, force, typewriter, hubei, dera,\n", - "Nearest to ice: rink, joaquin, pyotr, louth, hockey, sweden, ussr, plasmodium,\n", - "Nearest to instance: placed, bookstore, pasts, philos, accepts, geometrically, oscillators, kruskal,\n", - "Nearest to channel: creditors, curler, wb, restructured, channels, mbit, dts, bandwidth,\n", - "Nearest to report: reports, credibility, annotated, focusing, santer, standish, html, spirituality,\n", - "Epoch 9/10 Iteration: 38100 Avg. Training loss: 3.8705 0.1189 sec/batch\n", - "Epoch 9/10 Iteration: 38200 Avg. Training loss: 3.7634 0.1132 sec/batch\n", - "Epoch 9/10 Iteration: 38300 Avg. Training loss: 3.8207 0.1136 sec/batch\n", - "Epoch 9/10 Iteration: 38400 Avg. Training loss: 3.7974 0.1140 sec/batch\n", - "Epoch 9/10 Iteration: 38500 Avg. Training loss: 3.8033 0.1138 sec/batch\n", - "Epoch 9/10 Iteration: 38600 Avg. Training loss: 3.8553 0.1134 sec/batch\n", - "Epoch 9/10 Iteration: 38700 Avg. Training loss: 3.8482 0.1135 sec/batch\n", - "Epoch 9/10 Iteration: 38800 Avg. Training loss: 3.8287 0.1131 sec/batch\n", - "Epoch 9/10 Iteration: 38900 Avg. Training loss: 3.9033 0.1122 sec/batch\n", - "Epoch 9/10 Iteration: 39000 Avg. Training loss: 3.8907 0.1133 sec/batch\n", - "Nearest to for: the, to, and, in, given, have, by, a,\n", - "Nearest to would: to, that, relegated, with, than, coastlands, in, it,\n", - "Nearest to known: which, most, with, hoosiers, first, banach, the, in,\n", - "Nearest to used: commonly, is, use, common, for, occasionally, as, invented,\n", - "Nearest to at: the, of, to, two, travelling, as, dominants, and,\n", - "Nearest to such: as, other, and, can, many, exotic, lysenkoism, types,\n", - "Nearest to called: the, is, bother, supersessionism, eusocial, of, rearranged, a,\n", - "Nearest to when: be, was, attractive, remove, initial, edinburgh, painda, time,\n", - "Nearest to taking: go, levees, pia, xo, nba, fugees, nsaids, boosts,\n", - "Nearest to consists: consist, chamber, calderon, conscience, couturat, located, leblanc, eee,\n", - "Nearest to scale: diatonic, suggests, trillions, motherhood, mellin, correlations, capricornus, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, force, hubei, dera,\n", - "Nearest to ice: rink, pyotr, joaquin, louth, sweden, hockey, plasmodium, frozen,\n", - "Nearest to instance: placed, geometrically, philos, bookstore, pasts, accepts, oscillators, contacts,\n", - "Nearest to channel: curler, creditors, wb, restructured, channels, mbit, bandwidth, hearsay,\n", - "Nearest to report: reports, credibility, focusing, annotated, santer, standish, binge, html,\n", - "Epoch 9/10 Iteration: 39100 Avg. Training loss: 3.8177 0.1132 sec/batch\n", - "Epoch 9/10 Iteration: 39200 Avg. Training loss: 3.8758 0.1144 sec/batch\n", - "Epoch 9/10 Iteration: 39300 Avg. Training loss: 3.8498 0.1183 sec/batch\n", - "Epoch 9/10 Iteration: 39400 Avg. Training loss: 3.8540 0.1166 sec/batch\n", - "Epoch 9/10 Iteration: 39500 Avg. Training loss: 3.8741 0.1142 sec/batch\n", - "Epoch 9/10 Iteration: 39600 Avg. Training loss: 3.8607 0.1127 sec/batch\n", - "Epoch 9/10 Iteration: 39700 Avg. Training loss: 3.8709 0.1122 sec/batch\n", - "Epoch 9/10 Iteration: 39800 Avg. Training loss: 3.8405 0.1132 sec/batch\n", - "Epoch 9/10 Iteration: 39900 Avg. Training loss: 3.8565 0.1126 sec/batch\n", - "Epoch 9/10 Iteration: 40000 Avg. Training loss: 3.8557 0.1125 sec/batch\n", - "Nearest to for: given, the, to, in, by, and, of, have,\n", - "Nearest to would: that, to, than, with, manorialism, coastlands, relegated, lege,\n", - "Nearest to known: which, with, most, first, name, this, by, pisin,\n", - "Nearest to used: is, use, commonly, common, other, for, as, occasionally,\n", - "Nearest to at: the, of, travelling, dominants, to, two, as, in,\n", - "Nearest to such: as, other, types, and, lysenkoism, exotic, many, american,\n", - "Nearest to called: the, is, bother, of, supersessionism, rearranged, a, eusocial,\n", - "Nearest to when: be, initial, the, attractive, painda, time, was, scotland,\n", - "Nearest to taking: pia, go, levees, novels, xo, fugees, salim, neustria,\n", - "Nearest to consists: consist, chamber, calderon, leblanc, conscience, located, couturat, composed,\n", - "Nearest to scale: diatonic, suggests, correlations, capricornus, motherhood, trillions, mellin, effects,\n", - "Nearest to units: unit, prefixes, fortieth, si, typewriter, dera, force, hubei,\n", - "Nearest to ice: rink, pyotr, joaquin, louth, plasmodium, ussr, sweden, hockey,\n", - "Nearest to instance: placed, geometrically, philos, accepts, kruskal, pasts, bookstore, barcodes,\n", - "Nearest to channel: creditors, curler, mbit, bandwidth, wb, restructured, channels, broadcasts,\n", - "Nearest to report: reports, credibility, santer, annotated, focusing, zangger, html, standish,\n", - "Epoch 9/10 Iteration: 40100 Avg. Training loss: 3.8686 0.1133 sec/batch\n", - "Epoch 9/10 Iteration: 40200 Avg. Training loss: 3.8666 0.1148 sec/batch\n", - "Epoch 9/10 Iteration: 40300 Avg. Training loss: 3.8254 0.1171 sec/batch\n", - "Epoch 9/10 Iteration: 40400 Avg. Training loss: 3.8455 0.1171 sec/batch\n", - "Epoch 9/10 Iteration: 40500 Avg. Training loss: 3.8998 0.1156 sec/batch\n", - "Epoch 9/10 Iteration: 40600 Avg. Training loss: 3.8319 0.1151 sec/batch\n", - "Epoch 9/10 Iteration: 40700 Avg. Training loss: 3.9923 0.1180 sec/batch\n", - "Epoch 9/10 Iteration: 40800 Avg. Training loss: 3.8747 0.1179 sec/batch\n", - "Epoch 9/10 Iteration: 40900 Avg. Training loss: 3.8889 0.1259 sec/batch\n", - "Epoch 9/10 Iteration: 41000 Avg. Training loss: 3.8198 0.1099 sec/batch\n", - "Nearest to for: the, given, to, in, of, have, and, by,\n", - "Nearest to would: that, to, coastlands, with, manorialism, relegated, yet, asians,\n", - "Nearest to known: with, most, which, this, name, first, by, hoosiers,\n", - "Nearest to used: commonly, is, use, common, for, invented, netbios, or,\n", - "Nearest to at: the, of, travelling, as, and, where, dominants, to,\n", - "Nearest to such: as, other, many, types, can, american, lysenkoism, dodging,\n", - "Nearest to called: the, is, bother, of, supersessionism, hardin, a, eusocial,\n", - "Nearest to when: be, painda, was, initial, remove, refuse, edinburgh, scotland,\n", - "Nearest to taking: go, pia, levees, xo, fugees, novels, reestablishing, boosts,\n", - "Nearest to consists: chamber, calderon, consist, conscience, leblanc, judicial, couturat, mayors,\n", - "Nearest to scale: diatonic, suggests, mellin, correlations, capricornus, motherhood, trillions, accidentals,\n", - "Nearest to units: unit, prefixes, fortieth, si, dera, force, typewriter, kilogram,\n", - "Nearest to ice: rink, pyotr, hockey, joaquin, ussr, plasmodium, louth, sweden,\n", - "Nearest to instance: placed, geometrically, philos, pasts, accepts, bookstore, kruskal, oscillators,\n", - "Nearest to channel: creditors, curler, channels, restructured, mbit, hearsay, wb, bandwidth,\n", - "Nearest to report: reports, credibility, santer, commission, annotated, zangger, focusing, binge,\n", - "Epoch 9/10 Iteration: 41100 Avg. Training loss: 3.7843 0.1144 sec/batch\n", - "Epoch 9/10 Iteration: 41200 Avg. Training loss: 3.8725 0.1137 sec/batch\n", - "Epoch 9/10 Iteration: 41300 Avg. Training loss: 3.8033 0.1140 sec/batch\n", - "Epoch 9/10 Iteration: 41400 Avg. Training loss: 3.8783 0.1153 sec/batch\n", - "Epoch 9/10 Iteration: 41500 Avg. Training loss: 3.8427 0.1154 sec/batch\n", - "Epoch 9/10 Iteration: 41600 Avg. Training loss: 3.8499 0.1160 sec/batch\n", - "Epoch 10/10 Iteration: 41700 Avg. Training loss: 3.8824 0.0667 sec/batch\n", - "Epoch 10/10 Iteration: 41800 Avg. Training loss: 3.8163 0.1239 sec/batch\n", - "Epoch 10/10 Iteration: 41900 Avg. Training loss: 3.8315 0.1177 sec/batch\n", - "Epoch 10/10 Iteration: 42000 Avg. Training loss: 3.8348 0.1208 sec/batch\n", - "Nearest to for: the, to, given, and, in, a, by, as,\n", - "Nearest to would: that, to, coastlands, with, relegated, than, lege, in,\n", - "Nearest to known: most, which, with, the, by, first, name, in,\n", - "Nearest to used: commonly, use, is, common, or, as, invented, cirth,\n", - "Nearest to at: the, of, as, travelling, to, in, where, and,\n", - "Nearest to such: as, other, types, can, any, and, lysenkoism, musical,\n", - "Nearest to called: the, is, bother, of, a, supersessionism, systematized, hardin,\n", - "Nearest to when: was, be, initial, the, painda, then, in, remove,\n", - "Nearest to taking: levees, boosts, go, fugees, xo, pia, ukrainians, salim,\n", - "Nearest to consists: chamber, consist, calderon, conscience, leblanc, couturat, sint, judicial,\n", - "Nearest to scale: diatonic, capricornus, suggests, accidentals, mellin, motherhood, specifying, scales,\n", - "Nearest to units: unit, prefixes, fortieth, si, measurement, kilogram, dera, force,\n", - "Nearest to ice: rink, pyotr, joaquin, ussr, louth, hockey, plasmodium, sweden,\n", - "Nearest to instance: placed, pasts, geometrically, bookstore, philos, herders, kruskal, oscillators,\n", - "Nearest to channel: creditors, curler, channels, mbit, wb, hearsay, bandwidth, restructured,\n", - "Nearest to report: reports, credibility, annotated, commission, focusing, santer, binge, zangger,\n", - "Epoch 10/10 Iteration: 42100 Avg. Training loss: 3.8185 0.1217 sec/batch\n", - "Epoch 10/10 Iteration: 42200 Avg. Training loss: 3.8360 0.1214 sec/batch\n", - "Epoch 10/10 Iteration: 42300 Avg. Training loss: 3.8103 0.1212 sec/batch\n", - "Epoch 10/10 Iteration: 42400 Avg. Training loss: 3.8191 0.1210 sec/batch\n", - "Epoch 10/10 Iteration: 42500 Avg. Training loss: 3.8747 0.1212 sec/batch\n", - "Epoch 10/10 Iteration: 42600 Avg. Training loss: 3.8540 0.1210 sec/batch\n", - "Epoch 10/10 Iteration: 42700 Avg. Training loss: 3.8766 0.1211 sec/batch\n", - "Epoch 10/10 Iteration: 42800 Avg. Training loss: 3.7192 0.1214 sec/batch\n", - "Epoch 10/10 Iteration: 42900 Avg. Training loss: 3.8094 0.1219 sec/batch\n", - "Epoch 10/10 Iteration: 43000 Avg. Training loss: 3.7974 0.1225 sec/batch\n", - "Nearest to for: the, to, and, given, a, in, of, by,\n", - "Nearest to would: that, to, relegated, than, coastlands, in, because, with,\n", - "Nearest to known: most, which, with, by, in, first, the, this,\n", - "Nearest to used: is, commonly, use, common, as, for, or, occasionally,\n", - "Nearest to at: the, and, two, as, of, degree, in, s,\n", - "Nearest to such: as, other, can, and, types, many, any, american,\n", - "Nearest to called: is, the, a, of, bother, and, supersessionism, systematized,\n", - "Nearest to when: be, initial, was, then, remove, time, the, before,\n", - "Nearest to taking: go, pia, fugees, levees, nsaids, boosts, xo, ukrainians,\n", - "Nearest to consists: consist, chamber, conscience, calderon, couturat, composed, leblanc, the,\n", - "Nearest to scale: diatonic, suggests, motherhood, capricornus, mellin, accidentals, specifying, trillions,\n", - "Nearest to units: unit, prefixes, fortieth, si, measurement, hubei, dera, kilogram,\n", - "Nearest to ice: rink, pyotr, joaquin, ussr, plasmodium, detection, jabir, louth,\n", - "Nearest to instance: placed, philos, geometrically, kruskal, pasts, accepts, xa, oscillators,\n", - "Nearest to channel: creditors, wb, channels, hearsay, curler, mbit, restructured, carnivores,\n", - "Nearest to report: reports, credibility, annotated, santer, focusing, commission, binge, html,\n", - "Epoch 10/10 Iteration: 43100 Avg. Training loss: 3.7622 0.1223 sec/batch\n", - "Epoch 10/10 Iteration: 43200 Avg. Training loss: 3.8084 0.1211 sec/batch\n", - "Epoch 10/10 Iteration: 43300 Avg. Training loss: 3.8268 0.1220 sec/batch\n", - "Epoch 10/10 Iteration: 43400 Avg. Training loss: 3.8140 0.1209 sec/batch\n", - "Epoch 10/10 Iteration: 43500 Avg. Training loss: 3.8296 0.1220 sec/batch\n", - "Epoch 10/10 Iteration: 43600 Avg. Training loss: 3.8960 0.1191 sec/batch\n", - "Epoch 10/10 Iteration: 43700 Avg. Training loss: 3.8529 0.1213 sec/batch\n", - "Epoch 10/10 Iteration: 43800 Avg. Training loss: 3.8322 0.1238 sec/batch\n", - "Epoch 10/10 Iteration: 43900 Avg. Training loss: 3.8167 0.1228 sec/batch\n", - "Epoch 10/10 Iteration: 44000 Avg. Training loss: 3.8544 0.1259 sec/batch\n", - "Nearest to for: the, to, and, given, in, a, of, by,\n", - "Nearest to would: that, to, than, relegated, in, coastlands, asians, it,\n", - "Nearest to known: most, which, with, this, in, first, the, by,\n", - "Nearest to used: is, commonly, use, common, occasionally, other, often, for,\n", - "Nearest to at: the, of, as, degree, and, travelling, in, dominants,\n", - "Nearest to such: as, other, can, and, any, types, the, american,\n", - "Nearest to called: is, the, bother, a, of, systematized, rearranged, supersessionism,\n", - "Nearest to when: be, initial, attractive, was, painda, time, tire, somehow,\n", - "Nearest to taking: pia, go, fugees, levees, nsaids, reestablishing, boosts, nba,\n", - "Nearest to consists: consist, chamber, conscience, calderon, leblanc, couturat, composed, hydrohalic,\n", - "Nearest to scale: diatonic, suggests, capricornus, correlations, mellin, motherhood, trillions, townes,\n", - "Nearest to units: unit, prefixes, measurement, fortieth, si, force, moller, remembrance,\n", - "Nearest to ice: rink, pyotr, joaquin, ussr, plasmodium, sweden, jabir, frozen,\n", - "Nearest to instance: placed, pasts, geometrically, accepts, kruskal, philos, barcodes, bookstore,\n", - "Nearest to channel: creditors, wb, curler, channels, mbit, hearsay, bandwidth, broadcasts,\n", - "Nearest to report: reports, credibility, santer, annotated, zangger, commission, binge, focusing,\n", - "Epoch 10/10 Iteration: 44100 Avg. Training loss: 3.8485 0.1220 sec/batch\n", - "Epoch 10/10 Iteration: 44200 Avg. Training loss: 3.8296 0.1186 sec/batch\n", - "Epoch 10/10 Iteration: 44300 Avg. Training loss: 3.8256 0.1181 sec/batch\n", - "Epoch 10/10 Iteration: 44400 Avg. Training loss: 3.8264 0.1154 sec/batch\n", - "Epoch 10/10 Iteration: 44500 Avg. Training loss: 3.8798 0.1159 sec/batch\n", - "Epoch 10/10 Iteration: 44600 Avg. Training loss: 3.8181 0.1083 sec/batch\n", - "Epoch 10/10 Iteration: 44700 Avg. Training loss: 3.8231 0.1113 sec/batch\n", - "Epoch 10/10 Iteration: 44800 Avg. Training loss: 3.8373 0.1067 sec/batch\n", - "Epoch 10/10 Iteration: 44900 Avg. Training loss: 3.7952 0.1103 sec/batch\n", - "Epoch 10/10 Iteration: 45000 Avg. Training loss: 3.8190 0.1097 sec/batch\n", - "Nearest to for: the, to, in, given, of, by, and, a,\n", - "Nearest to would: that, to, than, with, in, it, relegated, coastlands,\n", - "Nearest to known: most, with, which, first, in, by, the, this,\n", - "Nearest to used: is, use, common, commonly, other, often, for, to,\n", - "Nearest to at: the, of, in, as, two, three, degree, and,\n", - "Nearest to such: as, other, and, types, any, can, many, american,\n", - "Nearest to called: is, the, bother, a, of, eusocial, identical, rearranged,\n", - "Nearest to when: be, initial, the, attractive, remove, time, before, was,\n", - "Nearest to taking: pia, go, nsaids, fugees, boosts, neustria, reestablishing, xo,\n", - "Nearest to consists: consist, chamber, calderon, leblanc, conscience, composed, couturat, located,\n", - "Nearest to scale: diatonic, suggests, capricornus, motherhood, correlations, mellin, trillions, accede,\n", - "Nearest to units: unit, prefixes, fortieth, measurement, remembrance, force, si, dera,\n", - "Nearest to ice: rink, pyotr, ussr, joaquin, sweden, hockey, plasmodium, louth,\n", - "Nearest to instance: placed, pasts, geometrically, kruskal, philos, accepts, barcodes, xa,\n", - "Nearest to channel: creditors, channels, curler, mbit, wb, bandwidth, hearsay, restructured,\n", - "Nearest to report: reports, credibility, santer, annotated, zangger, commission, focusing, lists,\n", - "Epoch 10/10 Iteration: 45100 Avg. Training loss: 3.8512 0.1079 sec/batch\n", - "Epoch 10/10 Iteration: 45200 Avg. Training loss: 3.8194 0.1076 sec/batch\n", - "Epoch 10/10 Iteration: 45300 Avg. Training loss: 3.9229 0.1111 sec/batch\n", - "Epoch 10/10 Iteration: 45400 Avg. Training loss: 3.9125 0.1113 sec/batch\n", - "Epoch 10/10 Iteration: 45500 Avg. Training loss: 3.8759 0.1216 sec/batch\n", - "Epoch 10/10 Iteration: 45600 Avg. Training loss: 3.8293 0.1217 sec/batch\n", - "Epoch 10/10 Iteration: 45700 Avg. Training loss: 3.8020 0.1224 sec/batch\n", - "Epoch 10/10 Iteration: 45800 Avg. Training loss: 3.8479 0.1217 sec/batch\n", - "Epoch 10/10 Iteration: 45900 Avg. Training loss: 3.7367 0.1218 sec/batch\n", - "Epoch 10/10 Iteration: 46000 Avg. Training loss: 3.8804 0.1215 sec/batch\n", - "Nearest to for: the, and, to, a, given, of, in, from,\n", - "Nearest to would: that, to, than, coastlands, asians, relegated, with, because,\n", - "Nearest to known: most, with, which, the, first, by, in, this,\n", - "Nearest to used: commonly, is, use, common, or, often, other, as,\n", - "Nearest to at: the, in, of, as, degree, s, to, two,\n", - "Nearest to such: as, other, and, types, many, can, any, exotic,\n", - "Nearest to called: the, is, of, a, bother, identical, rearranged, hardin,\n", - "Nearest to when: be, the, was, initial, remove, laga, then, painda,\n", - "Nearest to taking: pia, go, fugees, ukrainians, reestablishing, xo, malm, boosts,\n", - "Nearest to consists: chamber, consist, calderon, leblanc, conscience, judicial, composed, couturat,\n", - "Nearest to scale: diatonic, suggests, capricornus, accidentals, mellin, motherhood, trillions, accede,\n", - "Nearest to units: unit, prefixes, measurement, fortieth, si, remembrance, force, dera,\n", - "Nearest to ice: rink, pyotr, ussr, joaquin, hockey, sweden, louth, plasmodium,\n", - "Nearest to instance: placed, pasts, geometrically, kruskal, philos, lenses, barcodes, oscillators,\n", - "Nearest to channel: creditors, channels, curler, hearsay, mbit, wb, carnivores, bandwidth,\n", - "Nearest to report: reports, credibility, annotated, commission, zangger, santer, focusing, lists,\n", - "Epoch 10/10 Iteration: 46100 Avg. Training loss: 3.8255 0.1184 sec/batch\n", - "Epoch 10/10 Iteration: 46200 Avg. Training loss: 3.8518 0.1119 sec/batch\n" - ] - } - ], + "outputs": [], "source": [ "epochs = 10\n", "batch_size = 1000\n", @@ -1819,7 +614,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.0" } }, "nbformat": 4, diff --git a/intro-to-rnns/Anna_KaRNNa.ipynb b/intro-to-rnns/Anna_KaRNNa.ipynb index 65d7bd7461..1c84851383 100644 --- a/intro-to-rnns/Anna_KaRNNa.ipynb +++ b/intro-to-rnns/Anna_KaRNNa.ipynb @@ -667,4218 +667,11 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 1/20... Training Step: 1... Training loss: 4.4178... 0.1997 sec/batch\n", - "Epoch: 1/20... Training Step: 2... Training loss: 4.3329... 0.1566 sec/batch\n", - "Epoch: 1/20... Training Step: 3... Training loss: 3.8995... 0.1491 sec/batch\n", - "Epoch: 1/20... Training Step: 4... Training loss: 5.5604... 0.1464 sec/batch\n", - "Epoch: 1/20... Training Step: 5... Training loss: 3.9864... 0.1480 sec/batch\n", - "Epoch: 1/20... Training Step: 6... Training loss: 3.9316... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 7... Training loss: 3.7652... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 8... Training loss: 3.6055... 0.1458 sec/batch\n", - "Epoch: 1/20... Training Step: 9... Training loss: 3.4638... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 10... Training loss: 3.4104... 0.1450 sec/batch\n", - "Epoch: 1/20... Training Step: 11... Training loss: 3.3789... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 12... Training loss: 3.4101... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 13... Training loss: 3.3676... 0.1455 sec/batch\n", - "Epoch: 1/20... Training Step: 14... Training loss: 3.3520... 0.1456 sec/batch\n", - "Epoch: 1/20... Training Step: 15... Training loss: 3.3251... 0.1456 sec/batch\n", - "Epoch: 1/20... Training Step: 16... Training loss: 3.3071... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 17... Training loss: 3.2944... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 18... Training loss: 3.3086... 0.1456 sec/batch\n", - "Epoch: 1/20... Training Step: 19... Training loss: 3.2904... 0.1449 sec/batch\n", - "Epoch: 1/20... Training Step: 20... Training loss: 3.2569... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 21... Training loss: 3.2755... 0.1473 sec/batch\n", - "Epoch: 1/20... Training Step: 22... Training loss: 3.2541... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 23... Training loss: 3.2444... 0.1473 sec/batch\n", - "Epoch: 1/20... Training Step: 24... Training loss: 3.2370... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 25... Training loss: 3.2317... 0.1457 sec/batch\n", - "Epoch: 1/20... Training Step: 26... Training loss: 3.2293... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 27... Training loss: 3.2449... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 28... Training loss: 3.2143... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 29... Training loss: 3.2177... 0.1455 sec/batch\n", - "Epoch: 1/20... Training Step: 30... Training loss: 3.2169... 0.1459 sec/batch\n", - "Epoch: 1/20... Training Step: 31... Training loss: 3.2354... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 32... Training loss: 3.2030... 0.1457 sec/batch\n", - "Epoch: 1/20... Training Step: 33... Training loss: 3.1910... 0.1453 sec/batch\n", - "Epoch: 1/20... Training Step: 34... Training loss: 3.2049... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 35... Training loss: 3.1849... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 36... Training loss: 3.2021... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 37... Training loss: 3.1663... 0.1487 sec/batch\n", - "Epoch: 1/20... Training Step: 38... Training loss: 3.1718... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 39... Training loss: 3.1665... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 40... Training loss: 3.1721... 0.1457 sec/batch\n", - "Epoch: 1/20... Training Step: 41... Training loss: 3.1674... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 42... Training loss: 3.1716... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 43... Training loss: 3.1607... 0.1487 sec/batch\n", - "Epoch: 1/20... Training Step: 44... Training loss: 3.1600... 0.1479 sec/batch\n", - "Epoch: 1/20... Training Step: 45... Training loss: 3.1484... 0.1484 sec/batch\n", - "Epoch: 1/20... Training Step: 46... Training loss: 3.1698... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 47... Training loss: 3.1696... 0.1459 sec/batch\n", - "Epoch: 1/20... Training Step: 48... Training loss: 3.1753... 0.1453 sec/batch\n", - "Epoch: 1/20... Training Step: 49... Training loss: 3.1728... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 50... Training loss: 3.1726... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 51... Training loss: 3.1542... 0.1473 sec/batch\n", - "Epoch: 1/20... Training Step: 52... Training loss: 3.1503... 0.1478 sec/batch\n", - "Epoch: 1/20... Training Step: 53... Training loss: 3.1583... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 54... Training loss: 3.1474... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 55... Training loss: 3.1539... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 56... Training loss: 3.1377... 0.1453 sec/batch\n", - "Epoch: 1/20... Training Step: 57... Training loss: 3.1445... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 58... Training loss: 3.1474... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 59... Training loss: 3.1342... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 60... Training loss: 3.1482... 0.1456 sec/batch\n", - "Epoch: 1/20... Training Step: 61... Training loss: 3.1496... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 62... Training loss: 3.1667... 0.1491 sec/batch\n", - "Epoch: 1/20... Training Step: 63... Training loss: 3.1671... 0.1456 sec/batch\n", - "Epoch: 1/20... Training Step: 64... Training loss: 3.1206... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 65... Training loss: 3.1321... 0.1480 sec/batch\n", - "Epoch: 1/20... Training Step: 66... Training loss: 3.1548... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 67... Training loss: 3.1449... 0.1471 sec/batch\n", - "Epoch: 1/20... Training Step: 68... Training loss: 3.1037... 0.1487 sec/batch\n", - "Epoch: 1/20... Training Step: 69... Training loss: 3.1202... 0.1471 sec/batch\n", - "Epoch: 1/20... Training Step: 70... Training loss: 3.1429... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 71... Training loss: 3.1259... 0.1459 sec/batch\n", - "Epoch: 1/20... Training Step: 72... Training loss: 3.1506... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 73... Training loss: 3.1279... 0.1484 sec/batch\n", - "Epoch: 1/20... Training Step: 74... Training loss: 3.1300... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 75... Training loss: 3.1365... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 76... Training loss: 3.1415... 0.1494 sec/batch\n", - "Epoch: 1/20... Training Step: 77... Training loss: 3.1407... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 78... Training loss: 3.1287... 0.1475 sec/batch\n", - "Epoch: 1/20... Training Step: 79... Training loss: 3.1208... 0.1478 sec/batch\n", - "Epoch: 1/20... Training Step: 80... Training loss: 3.1043... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 81... Training loss: 3.1117... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 82... Training loss: 3.1287... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 83... Training loss: 3.1310... 0.1458 sec/batch\n", - "Epoch: 1/20... Training Step: 84... Training loss: 3.1137... 0.1489 sec/batch\n", - "Epoch: 1/20... Training Step: 85... Training loss: 3.1069... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 86... Training loss: 3.1105... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 87... Training loss: 3.1017... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 88... Training loss: 3.1082... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 89... Training loss: 3.1222... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 90... Training loss: 3.1221... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 91... Training loss: 3.1241... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 92... Training loss: 3.1091... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 93... Training loss: 3.1100... 0.1454 sec/batch\n", - "Epoch: 1/20... Training Step: 94... Training loss: 3.1038... 0.1449 sec/batch\n", - "Epoch: 1/20... Training Step: 95... Training loss: 3.1042... 0.1493 sec/batch\n", - "Epoch: 1/20... Training Step: 96... Training loss: 3.1027... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 97... Training loss: 3.1120... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 98... Training loss: 3.1009... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 99... Training loss: 3.0982... 0.1473 sec/batch\n", - "Epoch: 1/20... Training Step: 100... Training loss: 3.0939... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 101... Training loss: 3.1020... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 102... Training loss: 3.0966... 0.1465 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 1/20... Training Step: 103... Training loss: 3.0910... 0.1486 sec/batch\n", - "Epoch: 1/20... Training Step: 104... Training loss: 3.0869... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 105... Training loss: 3.0824... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 106... Training loss: 3.0828... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 107... Training loss: 3.0561... 0.1490 sec/batch\n", - "Epoch: 1/20... Training Step: 108... Training loss: 3.0640... 0.1476 sec/batch\n", - "Epoch: 1/20... Training Step: 109... Training loss: 3.0704... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 110... Training loss: 3.0348... 0.1475 sec/batch\n", - "Epoch: 1/20... Training Step: 111... Training loss: 3.0521... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 112... Training loss: 3.0492... 0.1480 sec/batch\n", - "Epoch: 1/20... Training Step: 113... Training loss: 3.1013... 0.1489 sec/batch\n", - "Epoch: 1/20... Training Step: 114... Training loss: 3.0985... 0.1464 sec/batch\n", - "Epoch: 1/20... Training Step: 115... Training loss: 3.0517... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 116... Training loss: 3.0478... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 117... Training loss: 3.0598... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 118... Training loss: 3.0755... 0.1472 sec/batch\n", - "Epoch: 1/20... Training Step: 119... Training loss: 3.0640... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 120... Training loss: 3.0424... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 121... Training loss: 3.0764... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 122... Training loss: 3.0519... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 123... Training loss: 3.0430... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 124... Training loss: 3.0479... 0.1502 sec/batch\n", - "Epoch: 1/20... Training Step: 125... Training loss: 3.0220... 0.1479 sec/batch\n", - "Epoch: 1/20... Training Step: 126... Training loss: 3.0027... 0.1478 sec/batch\n", - "Epoch: 1/20... Training Step: 127... Training loss: 3.0222... 0.1471 sec/batch\n", - "Epoch: 1/20... Training Step: 128... Training loss: 3.0282... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 129... Training loss: 3.0063... 0.1490 sec/batch\n", - "Epoch: 1/20... Training Step: 130... Training loss: 3.0022... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 131... Training loss: 3.0031... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 132... Training loss: 2.9730... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 133... Training loss: 2.9831... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 134... Training loss: 2.9651... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 135... Training loss: 2.9330... 0.1467 sec/batch\n", - "Epoch: 1/20... Training Step: 136... Training loss: 2.9422... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 137... Training loss: 2.9498... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 138... Training loss: 2.9367... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 139... Training loss: 2.9526... 0.1460 sec/batch\n", - "Epoch: 1/20... Training Step: 140... Training loss: 2.9326... 0.1489 sec/batch\n", - "Epoch: 1/20... Training Step: 141... Training loss: 2.9392... 0.1494 sec/batch\n", - "Epoch: 1/20... Training Step: 142... Training loss: 2.8942... 0.1479 sec/batch\n", - "Epoch: 1/20... Training Step: 143... Training loss: 2.9138... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 144... Training loss: 2.8872... 0.1482 sec/batch\n", - "Epoch: 1/20... Training Step: 145... Training loss: 2.8930... 0.1481 sec/batch\n", - "Epoch: 1/20... Training Step: 146... Training loss: 2.8873... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 147... Training loss: 2.8791... 0.1458 sec/batch\n", - "Epoch: 1/20... Training Step: 148... Training loss: 2.8893... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 149... Training loss: 2.8296... 0.1458 sec/batch\n", - "Epoch: 1/20... Training Step: 150... Training loss: 2.8458... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 151... Training loss: 2.8644... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 152... Training loss: 2.8688... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 153... Training loss: 2.8250... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 154... Training loss: 2.8351... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 155... Training loss: 2.7969... 0.1482 sec/batch\n", - "Epoch: 1/20... Training Step: 156... Training loss: 2.7906... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 157... Training loss: 2.7751... 0.1472 sec/batch\n", - "Epoch: 1/20... Training Step: 158... Training loss: 2.7880... 0.1471 sec/batch\n", - "Epoch: 1/20... Training Step: 159... Training loss: 2.7380... 0.1473 sec/batch\n", - "Epoch: 1/20... Training Step: 160... Training loss: 2.7710... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 161... Training loss: 2.7494... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 162... Training loss: 2.7093... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 163... Training loss: 2.6965... 0.1481 sec/batch\n", - "Epoch: 1/20... Training Step: 164... Training loss: 2.6994... 0.1476 sec/batch\n", - "Epoch: 1/20... Training Step: 165... Training loss: 2.6990... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 166... Training loss: 2.6865... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 167... Training loss: 2.6751... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 168... Training loss: 2.6715... 0.1470 sec/batch\n", - "Epoch: 1/20... Training Step: 169... Training loss: 2.6659... 0.1495 sec/batch\n", - "Epoch: 1/20... Training Step: 170... Training loss: 2.6242... 0.1478 sec/batch\n", - "Epoch: 1/20... Training Step: 171... Training loss: 2.6668... 0.1480 sec/batch\n", - "Epoch: 1/20... Training Step: 172... Training loss: 2.6969... 0.1475 sec/batch\n", - "Epoch: 1/20... Training Step: 173... Training loss: 2.6974... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 174... Training loss: 2.6692... 0.1479 sec/batch\n", - "Epoch: 1/20... Training Step: 175... Training loss: 2.6715... 0.1477 sec/batch\n", - "Epoch: 1/20... Training Step: 176... Training loss: 2.8228... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 177... Training loss: 2.6629... 0.1474 sec/batch\n", - "Epoch: 1/20... Training Step: 178... Training loss: 2.6093... 0.1465 sec/batch\n", - "Epoch: 1/20... Training Step: 179... Training loss: 2.5935... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 180... Training loss: 2.5895... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 181... Training loss: 2.5993... 0.1461 sec/batch\n", - "Epoch: 1/20... Training Step: 182... Training loss: 2.5892... 0.1457 sec/batch\n", - "Epoch: 1/20... Training Step: 183... Training loss: 2.5797... 0.1466 sec/batch\n", - "Epoch: 1/20... Training Step: 184... Training loss: 2.6051... 0.1472 sec/batch\n", - "Epoch: 1/20... Training Step: 185... Training loss: 2.6215... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 186... Training loss: 2.5779... 0.1464 sec/batch\n", - "Epoch: 1/20... Training Step: 187... Training loss: 2.5535... 0.1462 sec/batch\n", - "Epoch: 1/20... Training Step: 188... Training loss: 2.5344... 0.1480 sec/batch\n", - "Epoch: 1/20... Training Step: 189... Training loss: 2.5359... 0.1482 sec/batch\n", - "Epoch: 1/20... Training Step: 190... Training loss: 2.5342... 0.1529 sec/batch\n", - "Epoch: 1/20... Training Step: 191... Training loss: 2.5477... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 192... Training loss: 2.5057... 0.1469 sec/batch\n", - "Epoch: 1/20... Training Step: 193... Training loss: 2.5321... 0.1464 sec/batch\n", - "Epoch: 1/20... Training Step: 194... Training loss: 2.5142... 0.1463 sec/batch\n", - "Epoch: 1/20... Training Step: 195... Training loss: 2.5118... 0.1476 sec/batch\n", - "Epoch: 1/20... Training Step: 196... Training loss: 2.5068... 0.1483 sec/batch\n", - "Epoch: 1/20... Training Step: 197... Training loss: 2.5044... 0.1468 sec/batch\n", - "Epoch: 1/20... Training Step: 198... Training loss: 2.5003... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 199... Training loss: 2.5762... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 200... Training loss: 2.4816... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 201... Training loss: 2.4804... 0.1532 sec/batch\n", - "Epoch: 2/20... Training Step: 202... Training loss: 2.4922... 0.1515 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 2/20... Training Step: 203... Training loss: 2.4915... 0.1497 sec/batch\n", - "Epoch: 2/20... Training Step: 204... Training loss: 2.4868... 0.1496 sec/batch\n", - "Epoch: 2/20... Training Step: 205... Training loss: 2.4942... 0.1475 sec/batch\n", - "Epoch: 2/20... Training Step: 206... Training loss: 2.4897... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 207... Training loss: 2.4990... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 208... Training loss: 2.4754... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 209... Training loss: 2.4678... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 210... Training loss: 2.4808... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 211... Training loss: 2.4629... 0.1459 sec/batch\n", - "Epoch: 2/20... Training Step: 212... Training loss: 2.5062... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 213... Training loss: 2.4719... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 214... Training loss: 2.4695... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 215... Training loss: 2.4652... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 216... Training loss: 2.4945... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 217... Training loss: 2.4647... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 218... Training loss: 2.4447... 0.1475 sec/batch\n", - "Epoch: 2/20... Training Step: 219... Training loss: 2.4435... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 220... Training loss: 2.4838... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 221... Training loss: 2.4544... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 222... Training loss: 2.4413... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 223... Training loss: 2.4307... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 224... Training loss: 2.4408... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 225... Training loss: 2.4302... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 226... Training loss: 2.4330... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 227... Training loss: 2.4417... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 228... Training loss: 2.4344... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 229... Training loss: 2.4430... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 230... Training loss: 2.4160... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 231... Training loss: 2.3995... 0.1463 sec/batch\n", - "Epoch: 2/20... Training Step: 232... Training loss: 2.4220... 0.1460 sec/batch\n", - "Epoch: 2/20... Training Step: 233... Training loss: 2.3963... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 234... Training loss: 2.4158... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 235... Training loss: 2.3991... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 236... Training loss: 2.3759... 0.1483 sec/batch\n", - "Epoch: 2/20... Training Step: 237... Training loss: 2.3921... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 238... Training loss: 2.3972... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 239... Training loss: 2.3939... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 240... Training loss: 2.3806... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 241... Training loss: 2.3811... 0.1492 sec/batch\n", - "Epoch: 2/20... Training Step: 242... Training loss: 2.3770... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 243... Training loss: 2.3765... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 244... Training loss: 2.3408... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 245... Training loss: 2.4097... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 246... Training loss: 2.3760... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 247... Training loss: 2.3794... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 248... Training loss: 2.3928... 0.1473 sec/batch\n", - "Epoch: 2/20... Training Step: 249... Training loss: 2.3578... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 250... Training loss: 2.3850... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 251... Training loss: 2.3627... 0.1463 sec/batch\n", - "Epoch: 2/20... Training Step: 252... Training loss: 2.3608... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 253... Training loss: 2.3523... 0.1463 sec/batch\n", - "Epoch: 2/20... Training Step: 254... Training loss: 2.3748... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 255... Training loss: 2.3624... 0.1491 sec/batch\n", - "Epoch: 2/20... Training Step: 256... Training loss: 2.3500... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 257... Training loss: 2.3531... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 258... Training loss: 2.3727... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 259... Training loss: 2.3493... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 260... Training loss: 2.3616... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 261... Training loss: 2.3717... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 262... Training loss: 2.3409... 0.1475 sec/batch\n", - "Epoch: 2/20... Training Step: 263... Training loss: 2.3355... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 264... Training loss: 2.3594... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 265... Training loss: 2.3432... 0.1485 sec/batch\n", - "Epoch: 2/20... Training Step: 266... Training loss: 2.3059... 0.1489 sec/batch\n", - "Epoch: 2/20... Training Step: 267... Training loss: 2.3199... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 268... Training loss: 2.3349... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 269... Training loss: 2.3475... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 270... Training loss: 2.3349... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 271... Training loss: 2.3348... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 272... Training loss: 2.3071... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 273... Training loss: 2.3228... 0.1460 sec/batch\n", - "Epoch: 2/20... Training Step: 274... Training loss: 2.3627... 0.1461 sec/batch\n", - "Epoch: 2/20... Training Step: 275... Training loss: 2.3187... 0.1460 sec/batch\n", - "Epoch: 2/20... Training Step: 276... Training loss: 2.3276... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 277... Training loss: 2.3047... 0.1485 sec/batch\n", - "Epoch: 2/20... Training Step: 278... Training loss: 2.3062... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 279... Training loss: 2.2910... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 280... Training loss: 2.3192... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 281... Training loss: 2.2870... 0.1460 sec/batch\n", - "Epoch: 2/20... Training Step: 282... Training loss: 2.2755... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 283... Training loss: 2.2539... 0.1483 sec/batch\n", - "Epoch: 2/20... Training Step: 284... Training loss: 2.2847... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 285... Training loss: 2.2853... 0.1461 sec/batch\n", - "Epoch: 2/20... Training Step: 286... Training loss: 2.2858... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 287... Training loss: 2.2773... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 288... Training loss: 2.3016... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 289... Training loss: 2.2675... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 290... Training loss: 2.2928... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 291... Training loss: 2.2626... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 292... Training loss: 2.2606... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 293... Training loss: 2.2570... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 294... Training loss: 2.2586... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 295... Training loss: 2.2850... 0.1484 sec/batch\n", - "Epoch: 2/20... Training Step: 296... Training loss: 2.2647... 0.1482 sec/batch\n", - "Epoch: 2/20... Training Step: 297... Training loss: 2.2473... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 298... Training loss: 2.2467... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 299... Training loss: 2.2771... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 300... Training loss: 2.2623... 0.1458 sec/batch\n", - "Epoch: 2/20... Training Step: 301... Training loss: 2.2385... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 302... Training loss: 2.2456... 0.1506 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 2/20... Training Step: 303... Training loss: 2.2445... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 304... Training loss: 2.2462... 0.1463 sec/batch\n", - "Epoch: 2/20... Training Step: 305... Training loss: 2.2492... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 306... Training loss: 2.2775... 0.1488 sec/batch\n", - "Epoch: 2/20... Training Step: 307... Training loss: 2.2664... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 308... Training loss: 2.2324... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 309... Training loss: 2.2441... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 310... Training loss: 2.2561... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 311... Training loss: 2.2341... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 312... Training loss: 2.2189... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 313... Training loss: 2.2179... 0.1482 sec/batch\n", - "Epoch: 2/20... Training Step: 314... Training loss: 2.1948... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 315... Training loss: 2.2308... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 316... Training loss: 2.2302... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 317... Training loss: 2.2449... 0.1484 sec/batch\n", - "Epoch: 2/20... Training Step: 318... Training loss: 2.2428... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 319... Training loss: 2.2463... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 320... Training loss: 2.2158... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 321... Training loss: 2.2160... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 322... Training loss: 2.2486... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 323... Training loss: 2.2213... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 324... Training loss: 2.1927... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 325... Training loss: 2.2302... 0.1473 sec/batch\n", - "Epoch: 2/20... Training Step: 326... Training loss: 2.2362... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 327... Training loss: 2.2198... 0.1462 sec/batch\n", - "Epoch: 2/20... Training Step: 328... Training loss: 2.2106... 0.1493 sec/batch\n", - "Epoch: 2/20... Training Step: 329... Training loss: 2.1959... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 330... Training loss: 2.1918... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 331... Training loss: 2.2301... 0.1474 sec/batch\n", - "Epoch: 2/20... Training Step: 332... Training loss: 2.2224... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 333... Training loss: 2.2042... 0.1462 sec/batch\n", - "Epoch: 2/20... Training Step: 334... Training loss: 2.2123... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 335... Training loss: 2.2004... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 336... Training loss: 2.2052... 0.1491 sec/batch\n", - "Epoch: 2/20... Training Step: 337... Training loss: 2.2392... 0.1482 sec/batch\n", - "Epoch: 2/20... Training Step: 338... Training loss: 2.1902... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 339... Training loss: 2.2131... 0.1494 sec/batch\n", - "Epoch: 2/20... Training Step: 340... Training loss: 2.1936... 0.1482 sec/batch\n", - "Epoch: 2/20... Training Step: 341... Training loss: 2.1977... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 342... Training loss: 2.1831... 0.1481 sec/batch\n", - "Epoch: 2/20... Training Step: 343... Training loss: 2.1864... 0.1486 sec/batch\n", - "Epoch: 2/20... Training Step: 344... Training loss: 2.2168... 0.1482 sec/batch\n", - "Epoch: 2/20... Training Step: 345... Training loss: 2.1973... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 346... Training loss: 2.2094... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 347... Training loss: 2.1811... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 348... Training loss: 2.1705... 0.1499 sec/batch\n", - "Epoch: 2/20... Training Step: 349... Training loss: 2.1971... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 350... Training loss: 2.2239... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 351... Training loss: 2.1995... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 352... Training loss: 2.1997... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 353... Training loss: 2.1656... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 354... Training loss: 2.1727... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 355... Training loss: 2.1611... 0.1475 sec/batch\n", - "Epoch: 2/20... Training Step: 356... Training loss: 2.1613... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 357... Training loss: 2.1416... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 358... Training loss: 2.2156... 0.1488 sec/batch\n", - "Epoch: 2/20... Training Step: 359... Training loss: 2.1704... 0.1464 sec/batch\n", - "Epoch: 2/20... Training Step: 360... Training loss: 2.1549... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 361... Training loss: 2.1655... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 362... Training loss: 2.1640... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 363... Training loss: 2.1714... 0.1470 sec/batch\n", - "Epoch: 2/20... Training Step: 364... Training loss: 2.1625... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 365... Training loss: 2.1639... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 366... Training loss: 2.1769... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 367... Training loss: 2.1562... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 368... Training loss: 2.1474... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 369... Training loss: 2.1445... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 370... Training loss: 2.1702... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 371... Training loss: 2.1666... 0.1498 sec/batch\n", - "Epoch: 2/20... Training Step: 372... Training loss: 2.1589... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 373... Training loss: 2.1722... 0.1488 sec/batch\n", - "Epoch: 2/20... Training Step: 374... Training loss: 2.1763... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 375... Training loss: 2.1377... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 376... Training loss: 2.1593... 0.1477 sec/batch\n", - "Epoch: 2/20... Training Step: 377... Training loss: 2.1229... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 378... Training loss: 2.1105... 0.1466 sec/batch\n", - "Epoch: 2/20... Training Step: 379... Training loss: 2.1331... 0.1468 sec/batch\n", - "Epoch: 2/20... Training Step: 380... Training loss: 2.1511... 0.1471 sec/batch\n", - "Epoch: 2/20... Training Step: 381... Training loss: 2.1437... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 382... Training loss: 2.1655... 0.1476 sec/batch\n", - "Epoch: 2/20... Training Step: 383... Training loss: 2.1462... 0.1479 sec/batch\n", - "Epoch: 2/20... Training Step: 384... Training loss: 2.1352... 0.1483 sec/batch\n", - "Epoch: 2/20... Training Step: 385... Training loss: 2.1356... 0.1465 sec/batch\n", - "Epoch: 2/20... Training Step: 386... Training loss: 2.1135... 0.1478 sec/batch\n", - "Epoch: 2/20... Training Step: 387... Training loss: 2.1199... 0.1480 sec/batch\n", - "Epoch: 2/20... Training Step: 388... Training loss: 2.1290... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 389... Training loss: 2.1417... 0.1501 sec/batch\n", - "Epoch: 2/20... Training Step: 390... Training loss: 2.1003... 0.1472 sec/batch\n", - "Epoch: 2/20... Training Step: 391... Training loss: 2.1321... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 392... Training loss: 2.1266... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 393... Training loss: 2.0990... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 394... Training loss: 2.1232... 0.1469 sec/batch\n", - "Epoch: 2/20... Training Step: 395... Training loss: 2.1153... 0.1467 sec/batch\n", - "Epoch: 2/20... Training Step: 396... Training loss: 2.0960... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 397... Training loss: 2.1949... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 398... Training loss: 2.0923... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 399... Training loss: 2.0769... 0.1462 sec/batch\n", - "Epoch: 3/20... Training Step: 400... Training loss: 2.0929... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 401... Training loss: 2.1005... 0.1518 sec/batch\n", - "Epoch: 3/20... Training Step: 402... Training loss: 2.0726... 0.1499 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 3/20... Training Step: 403... Training loss: 2.0991... 0.1505 sec/batch\n", - "Epoch: 3/20... Training Step: 404... Training loss: 2.0952... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 405... Training loss: 2.1287... 0.1484 sec/batch\n", - "Epoch: 3/20... Training Step: 406... Training loss: 2.0938... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 407... Training loss: 2.0799... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 408... Training loss: 2.0800... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 409... Training loss: 2.1055... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 410... Training loss: 2.1335... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 411... Training loss: 2.0908... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 412... Training loss: 2.0808... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 413... Training loss: 2.0789... 0.1489 sec/batch\n", - "Epoch: 3/20... Training Step: 414... Training loss: 2.1235... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 415... Training loss: 2.0959... 0.1475 sec/batch\n", - "Epoch: 3/20... Training Step: 416... Training loss: 2.0841... 0.1466 sec/batch\n", - "Epoch: 3/20... Training Step: 417... Training loss: 2.0713... 0.1468 sec/batch\n", - "Epoch: 3/20... Training Step: 418... Training loss: 2.1413... 0.1493 sec/batch\n", - "Epoch: 3/20... Training Step: 419... Training loss: 2.0856... 0.1496 sec/batch\n", - "Epoch: 3/20... Training Step: 420... Training loss: 2.0784... 0.1510 sec/batch\n", - "Epoch: 3/20... Training Step: 421... Training loss: 2.0839... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 422... Training loss: 2.0541... 0.1465 sec/batch\n", - "Epoch: 3/20... Training Step: 423... Training loss: 2.0567... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 424... Training loss: 2.0877... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 425... Training loss: 2.1118... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 426... Training loss: 2.0829... 0.1484 sec/batch\n", - "Epoch: 3/20... Training Step: 427... Training loss: 2.0685... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 428... Training loss: 2.0460... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 429... Training loss: 2.0692... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 430... Training loss: 2.1007... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 431... Training loss: 2.0443... 0.1466 sec/batch\n", - "Epoch: 3/20... Training Step: 432... Training loss: 2.0657... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 433... Training loss: 2.0586... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 434... Training loss: 2.0171... 0.1462 sec/batch\n", - "Epoch: 3/20... Training Step: 435... Training loss: 2.0285... 0.1487 sec/batch\n", - "Epoch: 3/20... Training Step: 436... Training loss: 2.0305... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 437... Training loss: 2.0350... 0.1496 sec/batch\n", - "Epoch: 3/20... Training Step: 438... Training loss: 2.0514... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 439... Training loss: 2.0339... 0.1487 sec/batch\n", - "Epoch: 3/20... Training Step: 440... Training loss: 2.0312... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 441... Training loss: 2.0538... 0.1462 sec/batch\n", - "Epoch: 3/20... Training Step: 442... Training loss: 1.9874... 0.1488 sec/batch\n", - "Epoch: 3/20... Training Step: 443... Training loss: 2.0569... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 444... Training loss: 2.0218... 0.1475 sec/batch\n", - "Epoch: 3/20... Training Step: 445... Training loss: 2.0379... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 446... Training loss: 2.0805... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 447... Training loss: 2.0173... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 448... Training loss: 2.0869... 0.1486 sec/batch\n", - "Epoch: 3/20... Training Step: 449... Training loss: 2.0334... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 450... Training loss: 2.0326... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 451... Training loss: 2.0221... 0.1467 sec/batch\n", - "Epoch: 3/20... Training Step: 452... Training loss: 2.0444... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 453... Training loss: 2.0408... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 454... Training loss: 2.0283... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 455... Training loss: 2.0211... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 456... Training loss: 2.0666... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 457... Training loss: 2.0324... 0.1497 sec/batch\n", - "Epoch: 3/20... Training Step: 458... Training loss: 2.0736... 0.1461 sec/batch\n", - "Epoch: 3/20... Training Step: 459... Training loss: 2.0644... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 460... Training loss: 2.0308... 0.1466 sec/batch\n", - "Epoch: 3/20... Training Step: 461... Training loss: 2.0157... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 462... Training loss: 2.0642... 0.1513 sec/batch\n", - "Epoch: 3/20... Training Step: 463... Training loss: 2.0352... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 464... Training loss: 1.9999... 0.1500 sec/batch\n", - "Epoch: 3/20... Training Step: 465... Training loss: 2.0092... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 466... Training loss: 2.0161... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 467... Training loss: 2.0601... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 468... Training loss: 2.0308... 0.1467 sec/batch\n", - "Epoch: 3/20... Training Step: 469... Training loss: 2.0298... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 470... Training loss: 2.0016... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 471... Training loss: 2.0138... 0.1498 sec/batch\n", - "Epoch: 3/20... Training Step: 472... Training loss: 2.0410... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 473... Training loss: 2.0149... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 474... Training loss: 2.0167... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 475... Training loss: 1.9786... 0.1460 sec/batch\n", - "Epoch: 3/20... Training Step: 476... Training loss: 1.9936... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 477... Training loss: 1.9717... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 478... Training loss: 2.0201... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 479... Training loss: 1.9687... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 480... Training loss: 1.9953... 0.1490 sec/batch\n", - "Epoch: 3/20... Training Step: 481... Training loss: 1.9668... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 482... Training loss: 1.9923... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 483... Training loss: 1.9915... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 484... Training loss: 1.9846... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 485... Training loss: 1.9640... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 486... Training loss: 2.0094... 0.1467 sec/batch\n", - "Epoch: 3/20... Training Step: 487... Training loss: 1.9693... 0.1462 sec/batch\n", - "Epoch: 3/20... Training Step: 488... Training loss: 1.9983... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 489... Training loss: 1.9606... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 490... Training loss: 1.9656... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 491... Training loss: 1.9733... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 492... Training loss: 1.9816... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 493... Training loss: 1.9781... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 494... Training loss: 1.9608... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 495... Training loss: 1.9637... 0.1475 sec/batch\n", - "Epoch: 3/20... Training Step: 496... Training loss: 1.9308... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 497... Training loss: 1.9890... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 498... Training loss: 1.9874... 0.1470 sec/batch\n", - "Epoch: 3/20... Training Step: 499... Training loss: 1.9670... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 500... Training loss: 1.9761... 0.1470 sec/batch\n", - "Epoch: 3/20... Training Step: 501... Training loss: 1.9570... 0.1493 sec/batch\n", - "Epoch: 3/20... Training Step: 502... Training loss: 1.9738... 0.1473 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 3/20... Training Step: 503... Training loss: 1.9737... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 504... Training loss: 1.9906... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 505... Training loss: 1.9865... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 506... Training loss: 1.9788... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 507... Training loss: 1.9649... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 508... Training loss: 1.9651... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 509... Training loss: 1.9668... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 510... Training loss: 1.9612... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 511... Training loss: 1.9527... 0.1486 sec/batch\n", - "Epoch: 3/20... Training Step: 512... Training loss: 1.9262... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 513... Training loss: 1.9580... 0.1470 sec/batch\n", - "Epoch: 3/20... Training Step: 514... Training loss: 1.9567... 0.1488 sec/batch\n", - "Epoch: 3/20... Training Step: 515... Training loss: 1.9659... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 516... Training loss: 1.9651... 0.1503 sec/batch\n", - "Epoch: 3/20... Training Step: 517... Training loss: 1.9696... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 518... Training loss: 1.9395... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 519... Training loss: 1.9435... 0.1469 sec/batch\n", - "Epoch: 3/20... Training Step: 520... Training loss: 1.9838... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 521... Training loss: 1.9536... 0.1486 sec/batch\n", - "Epoch: 3/20... Training Step: 522... Training loss: 1.9126... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 523... Training loss: 1.9740... 0.1468 sec/batch\n", - "Epoch: 3/20... Training Step: 524... Training loss: 1.9603... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 525... Training loss: 1.9483... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 526... Training loss: 1.9630... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 527... Training loss: 1.9268... 0.1468 sec/batch\n", - "Epoch: 3/20... Training Step: 528... Training loss: 1.9328... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 529... Training loss: 1.9538... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 530... Training loss: 1.9494... 0.1464 sec/batch\n", - "Epoch: 3/20... Training Step: 531... Training loss: 1.9440... 0.1494 sec/batch\n", - "Epoch: 3/20... Training Step: 532... Training loss: 1.9563... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 533... Training loss: 1.9633... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 534... Training loss: 1.9568... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 535... Training loss: 1.9788... 0.1467 sec/batch\n", - "Epoch: 3/20... Training Step: 536... Training loss: 1.9490... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 537... Training loss: 1.9820... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 538... Training loss: 1.9310... 0.1490 sec/batch\n", - "Epoch: 3/20... Training Step: 539... Training loss: 1.9552... 0.1472 sec/batch\n", - "Epoch: 3/20... Training Step: 540... Training loss: 1.9327... 0.1467 sec/batch\n", - "Epoch: 3/20... Training Step: 541... Training loss: 1.9240... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 542... Training loss: 1.9468... 0.1468 sec/batch\n", - "Epoch: 3/20... Training Step: 543... Training loss: 1.9593... 0.1510 sec/batch\n", - "Epoch: 3/20... Training Step: 544... Training loss: 1.9666... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 545... Training loss: 1.9459... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 546... Training loss: 1.9273... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 547... Training loss: 1.9314... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 548... Training loss: 1.9724... 0.1466 sec/batch\n", - "Epoch: 3/20... Training Step: 549... Training loss: 1.9410... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 550... Training loss: 1.9536... 0.1486 sec/batch\n", - "Epoch: 3/20... Training Step: 551... Training loss: 1.9252... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 552... Training loss: 1.9243... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 553... Training loss: 1.9293... 0.1468 sec/batch\n", - "Epoch: 3/20... Training Step: 554... Training loss: 1.9332... 0.1483 sec/batch\n", - "Epoch: 3/20... Training Step: 555... Training loss: 1.9035... 0.1492 sec/batch\n", - "Epoch: 3/20... Training Step: 556... Training loss: 1.9698... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 557... Training loss: 1.9563... 0.1503 sec/batch\n", - "Epoch: 3/20... Training Step: 558... Training loss: 1.9299... 0.1470 sec/batch\n", - "Epoch: 3/20... Training Step: 559... Training loss: 1.9488... 0.1471 sec/batch\n", - "Epoch: 3/20... Training Step: 560... Training loss: 1.9325... 0.1490 sec/batch\n", - "Epoch: 3/20... Training Step: 561... Training loss: 1.9195... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 562... Training loss: 1.9179... 0.1473 sec/batch\n", - "Epoch: 3/20... Training Step: 563... Training loss: 1.9326... 0.1463 sec/batch\n", - "Epoch: 3/20... Training Step: 564... Training loss: 1.9763... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 565... Training loss: 1.9205... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 566... Training loss: 1.9155... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 567... Training loss: 1.9100... 0.1499 sec/batch\n", - "Epoch: 3/20... Training Step: 568... Training loss: 1.9058... 0.1465 sec/batch\n", - "Epoch: 3/20... Training Step: 569... Training loss: 1.9424... 0.1481 sec/batch\n", - "Epoch: 3/20... Training Step: 570... Training loss: 1.9264... 0.1492 sec/batch\n", - "Epoch: 3/20... Training Step: 571... Training loss: 1.9198... 0.1482 sec/batch\n", - "Epoch: 3/20... Training Step: 572... Training loss: 1.9186... 0.1491 sec/batch\n", - "Epoch: 3/20... Training Step: 573... Training loss: 1.8994... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 574... Training loss: 1.9325... 0.1479 sec/batch\n", - "Epoch: 3/20... Training Step: 575... Training loss: 1.8895... 0.1477 sec/batch\n", - "Epoch: 3/20... Training Step: 576... Training loss: 1.8801... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 577... Training loss: 1.8912... 0.1487 sec/batch\n", - "Epoch: 3/20... Training Step: 578... Training loss: 1.9110... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 579... Training loss: 1.8981... 0.1499 sec/batch\n", - "Epoch: 3/20... Training Step: 580... Training loss: 1.9337... 0.1475 sec/batch\n", - "Epoch: 3/20... Training Step: 581... Training loss: 1.9136... 0.1476 sec/batch\n", - "Epoch: 3/20... Training Step: 582... Training loss: 1.9001... 0.1475 sec/batch\n", - "Epoch: 3/20... Training Step: 583... Training loss: 1.9054... 0.1474 sec/batch\n", - "Epoch: 3/20... Training Step: 584... Training loss: 1.8894... 0.1487 sec/batch\n", - "Epoch: 3/20... Training Step: 585... Training loss: 1.8941... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 586... Training loss: 1.8931... 0.1484 sec/batch\n", - "Epoch: 3/20... Training Step: 587... Training loss: 1.9144... 0.1485 sec/batch\n", - "Epoch: 3/20... Training Step: 588... Training loss: 1.8764... 0.1499 sec/batch\n", - "Epoch: 3/20... Training Step: 589... Training loss: 1.9055... 0.1487 sec/batch\n", - "Epoch: 3/20... Training Step: 590... Training loss: 1.8790... 0.1480 sec/batch\n", - "Epoch: 3/20... Training Step: 591... Training loss: 1.8684... 0.1488 sec/batch\n", - "Epoch: 3/20... Training Step: 592... Training loss: 1.9002... 0.1489 sec/batch\n", - "Epoch: 3/20... Training Step: 593... Training loss: 1.8967... 0.1478 sec/batch\n", - "Epoch: 3/20... Training Step: 594... Training loss: 1.8792... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 595... Training loss: 1.9743... 0.1493 sec/batch\n", - "Epoch: 4/20... Training Step: 596... Training loss: 1.8896... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 597... Training loss: 1.8837... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 598... Training loss: 1.8908... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 599... Training loss: 1.8812... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 600... Training loss: 1.8537... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 601... Training loss: 1.8817... 0.1533 sec/batch\n", - "Epoch: 4/20... Training Step: 602... Training loss: 1.8822... 0.1516 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 4/20... Training Step: 603... Training loss: 1.9265... 0.1501 sec/batch\n", - "Epoch: 4/20... Training Step: 604... Training loss: 1.8877... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 605... Training loss: 1.8672... 0.1470 sec/batch\n", - "Epoch: 4/20... Training Step: 606... Training loss: 1.8664... 0.1465 sec/batch\n", - "Epoch: 4/20... Training Step: 607... Training loss: 1.8811... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 608... Training loss: 1.9223... 0.1469 sec/batch\n", - "Epoch: 4/20... Training Step: 609... Training loss: 1.8733... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 610... Training loss: 1.8713... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 611... Training loss: 1.8748... 0.1491 sec/batch\n", - "Epoch: 4/20... Training Step: 612... Training loss: 1.9166... 0.1469 sec/batch\n", - "Epoch: 4/20... Training Step: 613... Training loss: 1.8814... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 614... Training loss: 1.8868... 0.1486 sec/batch\n", - "Epoch: 4/20... Training Step: 615... Training loss: 1.8606... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 616... Training loss: 1.9209... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 617... Training loss: 1.8669... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 618... Training loss: 1.8706... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 619... Training loss: 1.8705... 0.1495 sec/batch\n", - "Epoch: 4/20... Training Step: 620... Training loss: 1.8383... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 621... Training loss: 1.8368... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 622... Training loss: 1.8756... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 623... Training loss: 1.9096... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 624... Training loss: 1.8773... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 625... Training loss: 1.8686... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 626... Training loss: 1.8463... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 627... Training loss: 1.8775... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 628... Training loss: 1.8859... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 629... Training loss: 1.8460... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 630... Training loss: 1.8527... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 631... Training loss: 1.8519... 0.1510 sec/batch\n", - "Epoch: 4/20... Training Step: 632... Training loss: 1.8239... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 633... Training loss: 1.8222... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 634... Training loss: 1.8243... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 635... Training loss: 1.8318... 0.1493 sec/batch\n", - "Epoch: 4/20... Training Step: 636... Training loss: 1.8696... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 637... Training loss: 1.8295... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 638... Training loss: 1.8222... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 639... Training loss: 1.8629... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 640... Training loss: 1.8067... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 641... Training loss: 1.8517... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 642... Training loss: 1.8397... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 643... Training loss: 1.8408... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 644... Training loss: 1.8875... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 645... Training loss: 1.8140... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 646... Training loss: 1.8956... 0.1489 sec/batch\n", - "Epoch: 4/20... Training Step: 647... Training loss: 1.8388... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 648... Training loss: 1.8534... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 649... Training loss: 1.8379... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 650... Training loss: 1.8500... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 651... Training loss: 1.8610... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 652... Training loss: 1.8231... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 653... Training loss: 1.8197... 0.1488 sec/batch\n", - "Epoch: 4/20... Training Step: 654... Training loss: 1.8762... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 655... Training loss: 1.8418... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 656... Training loss: 1.8716... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 657... Training loss: 1.8766... 0.1490 sec/batch\n", - "Epoch: 4/20... Training Step: 658... Training loss: 1.8610... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 659... Training loss: 1.8290... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 660... Training loss: 1.8621... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 661... Training loss: 1.8565... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 662... Training loss: 1.8162... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 663... Training loss: 1.8185... 0.1486 sec/batch\n", - "Epoch: 4/20... Training Step: 664... Training loss: 1.8276... 0.1500 sec/batch\n", - "Epoch: 4/20... Training Step: 665... Training loss: 1.8647... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 666... Training loss: 1.8498... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 667... Training loss: 1.8521... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 668... Training loss: 1.8185... 0.1499 sec/batch\n", - "Epoch: 4/20... Training Step: 669... Training loss: 1.8250... 0.1492 sec/batch\n", - "Epoch: 4/20... Training Step: 670... Training loss: 1.8539... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 671... Training loss: 1.8382... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 672... Training loss: 1.8356... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 673... Training loss: 1.7962... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 674... Training loss: 1.8159... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 675... Training loss: 1.7938... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 676... Training loss: 1.8446... 0.1493 sec/batch\n", - "Epoch: 4/20... Training Step: 677... Training loss: 1.7890... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 678... Training loss: 1.8186... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 679... Training loss: 1.7909... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 680... Training loss: 1.8021... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 681... Training loss: 1.8090... 0.1472 sec/batch\n", - "Epoch: 4/20... Training Step: 682... Training loss: 1.7930... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 683... Training loss: 1.7771... 0.1508 sec/batch\n", - "Epoch: 4/20... Training Step: 684... Training loss: 1.8285... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 685... Training loss: 1.7977... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 686... Training loss: 1.7990... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 687... Training loss: 1.7907... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 688... Training loss: 1.7878... 0.1463 sec/batch\n", - "Epoch: 4/20... Training Step: 689... Training loss: 1.7900... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 690... Training loss: 1.8187... 0.1510 sec/batch\n", - "Epoch: 4/20... Training Step: 691... Training loss: 1.7929... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 692... Training loss: 1.7618... 0.1514 sec/batch\n", - "Epoch: 4/20... Training Step: 693... Training loss: 1.7861... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 694... Training loss: 1.7423... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 695... Training loss: 1.8155... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 696... Training loss: 1.7905... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 697... Training loss: 1.7789... 0.1472 sec/batch\n", - "Epoch: 4/20... Training Step: 698... Training loss: 1.8029... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 699... Training loss: 1.7922... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 700... Training loss: 1.7954... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 701... Training loss: 1.7881... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 702... Training loss: 1.8110... 0.1499 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 4/20... Training Step: 703... Training loss: 1.8109... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 704... Training loss: 1.8253... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 705... Training loss: 1.8110... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 706... Training loss: 1.7860... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 707... Training loss: 1.8007... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 708... Training loss: 1.7990... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 709... Training loss: 1.7808... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 710... Training loss: 1.7623... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 711... Training loss: 1.8037... 0.1489 sec/batch\n", - "Epoch: 4/20... Training Step: 712... Training loss: 1.7779... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 713... Training loss: 1.7950... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 714... Training loss: 1.7885... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 715... Training loss: 1.8035... 0.1469 sec/batch\n", - "Epoch: 4/20... Training Step: 716... Training loss: 1.7583... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 717... Training loss: 1.7588... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 718... Training loss: 1.8163... 0.1490 sec/batch\n", - "Epoch: 4/20... Training Step: 719... Training loss: 1.7843... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 720... Training loss: 1.7439... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 721... Training loss: 1.8087... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 722... Training loss: 1.8012... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 723... Training loss: 1.7754... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 724... Training loss: 1.7735... 0.1472 sec/batch\n", - "Epoch: 4/20... Training Step: 725... Training loss: 1.7553... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 726... Training loss: 1.7612... 0.1486 sec/batch\n", - "Epoch: 4/20... Training Step: 727... Training loss: 1.7982... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 728... Training loss: 1.7953... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 729... Training loss: 1.7914... 0.1469 sec/batch\n", - "Epoch: 4/20... Training Step: 730... Training loss: 1.7900... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 731... Training loss: 1.8080... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 732... Training loss: 1.7928... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 733... Training loss: 1.8123... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 734... Training loss: 1.7763... 0.1488 sec/batch\n", - "Epoch: 4/20... Training Step: 735... Training loss: 1.8230... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 736... Training loss: 1.7785... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 737... Training loss: 1.7806... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 738... Training loss: 1.7857... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 739... Training loss: 1.7599... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 740... Training loss: 1.7882... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 741... Training loss: 1.7915... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 742... Training loss: 1.8210... 0.1481 sec/batch\n", - "Epoch: 4/20... Training Step: 743... Training loss: 1.7903... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 744... Training loss: 1.7640... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 745... Training loss: 1.7640... 0.1471 sec/batch\n", - "Epoch: 4/20... Training Step: 746... Training loss: 1.7963... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 747... Training loss: 1.7864... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 748... Training loss: 1.7806... 0.1490 sec/batch\n", - "Epoch: 4/20... Training Step: 749... Training loss: 1.7802... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 750... Training loss: 1.7710... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 751... Training loss: 1.7849... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 752... Training loss: 1.7692... 0.1472 sec/batch\n", - "Epoch: 4/20... Training Step: 753... Training loss: 1.7451... 0.1480 sec/batch\n", - "Epoch: 4/20... Training Step: 754... Training loss: 1.7969... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 755... Training loss: 1.8062... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 756... Training loss: 1.7713... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 757... Training loss: 1.7861... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 758... Training loss: 1.7769... 0.1472 sec/batch\n", - "Epoch: 4/20... Training Step: 759... Training loss: 1.7666... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 760... Training loss: 1.7673... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 761... Training loss: 1.7732... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 762... Training loss: 1.8315... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 763... Training loss: 1.7603... 0.1496 sec/batch\n", - "Epoch: 4/20... Training Step: 764... Training loss: 1.7591... 0.1486 sec/batch\n", - "Epoch: 4/20... Training Step: 765... Training loss: 1.7641... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 766... Training loss: 1.7484... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 767... Training loss: 1.7837... 0.1491 sec/batch\n", - "Epoch: 4/20... Training Step: 768... Training loss: 1.7647... 0.1477 sec/batch\n", - "Epoch: 4/20... Training Step: 769... Training loss: 1.7792... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 770... Training loss: 1.7668... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 771... Training loss: 1.7445... 0.1484 sec/batch\n", - "Epoch: 4/20... Training Step: 772... Training loss: 1.7723... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 773... Training loss: 1.7407... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 774... Training loss: 1.7289... 0.1489 sec/batch\n", - "Epoch: 4/20... Training Step: 775... Training loss: 1.7423... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 776... Training loss: 1.7476... 0.1479 sec/batch\n", - "Epoch: 4/20... Training Step: 777... Training loss: 1.7525... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 778... Training loss: 1.7745... 0.1478 sec/batch\n", - "Epoch: 4/20... Training Step: 779... Training loss: 1.7545... 0.1473 sec/batch\n", - "Epoch: 4/20... Training Step: 780... Training loss: 1.7356... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 781... Training loss: 1.7650... 0.1485 sec/batch\n", - "Epoch: 4/20... Training Step: 782... Training loss: 1.7380... 0.1487 sec/batch\n", - "Epoch: 4/20... Training Step: 783... Training loss: 1.7503... 0.1490 sec/batch\n", - "Epoch: 4/20... Training Step: 784... Training loss: 1.7545... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 785... Training loss: 1.7510... 0.1494 sec/batch\n", - "Epoch: 4/20... Training Step: 786... Training loss: 1.7276... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 787... Training loss: 1.7536... 0.1474 sec/batch\n", - "Epoch: 4/20... Training Step: 788... Training loss: 1.7147... 0.1482 sec/batch\n", - "Epoch: 4/20... Training Step: 789... Training loss: 1.7181... 0.1483 sec/batch\n", - "Epoch: 4/20... Training Step: 790... Training loss: 1.7484... 0.1475 sec/batch\n", - "Epoch: 4/20... Training Step: 791... Training loss: 1.7388... 0.1476 sec/batch\n", - "Epoch: 4/20... Training Step: 792... Training loss: 1.7299... 0.1482 sec/batch\n", - "Epoch: 5/20... Training Step: 793... Training loss: 1.8349... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 794... Training loss: 1.7431... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 795... Training loss: 1.7264... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 796... Training loss: 1.7350... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 797... Training loss: 1.7302... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 798... Training loss: 1.6949... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 799... Training loss: 1.7425... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 800... Training loss: 1.7221... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 801... Training loss: 1.7630... 0.1506 sec/batch\n", - "Epoch: 5/20... Training Step: 802... Training loss: 1.7289... 0.1524 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 5/20... Training Step: 803... Training loss: 1.7209... 0.1507 sec/batch\n", - "Epoch: 5/20... Training Step: 804... Training loss: 1.7178... 0.1491 sec/batch\n", - "Epoch: 5/20... Training Step: 805... Training loss: 1.7380... 0.1469 sec/batch\n", - "Epoch: 5/20... Training Step: 806... Training loss: 1.7701... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 807... Training loss: 1.7275... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 808... Training loss: 1.7092... 0.1467 sec/batch\n", - "Epoch: 5/20... Training Step: 809... Training loss: 1.7426... 0.1468 sec/batch\n", - "Epoch: 5/20... Training Step: 810... Training loss: 1.7717... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 811... Training loss: 1.7374... 0.1502 sec/batch\n", - "Epoch: 5/20... Training Step: 812... Training loss: 1.7544... 0.1488 sec/batch\n", - "Epoch: 5/20... Training Step: 813... Training loss: 1.7188... 0.1501 sec/batch\n", - "Epoch: 5/20... Training Step: 814... Training loss: 1.7587... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 815... Training loss: 1.7363... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 816... Training loss: 1.7375... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 817... Training loss: 1.7333... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 818... Training loss: 1.7005... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 819... Training loss: 1.6989... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 820... Training loss: 1.7422... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 821... Training loss: 1.7633... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 822... Training loss: 1.7395... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 823... Training loss: 1.7235... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 824... Training loss: 1.6886... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 825... Training loss: 1.7383... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 826... Training loss: 1.7518... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 827... Training loss: 1.7159... 0.1469 sec/batch\n", - "Epoch: 5/20... Training Step: 828... Training loss: 1.7137... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 829... Training loss: 1.7049... 0.1491 sec/batch\n", - "Epoch: 5/20... Training Step: 830... Training loss: 1.6854... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 831... Training loss: 1.6822... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 832... Training loss: 1.6900... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 833... Training loss: 1.6920... 0.1498 sec/batch\n", - "Epoch: 5/20... Training Step: 834... Training loss: 1.7310... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 835... Training loss: 1.6932... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 836... Training loss: 1.6810... 0.1466 sec/batch\n", - "Epoch: 5/20... Training Step: 837... Training loss: 1.7253... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 838... Training loss: 1.6663... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 839... Training loss: 1.7090... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 840... Training loss: 1.6952... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 841... Training loss: 1.6926... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 842... Training loss: 1.7487... 0.1503 sec/batch\n", - "Epoch: 5/20... Training Step: 843... Training loss: 1.6862... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 844... Training loss: 1.7745... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 845... Training loss: 1.7070... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 846... Training loss: 1.7145... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 847... Training loss: 1.7070... 0.1510 sec/batch\n", - "Epoch: 5/20... Training Step: 848... Training loss: 1.7155... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 849... Training loss: 1.7268... 0.1493 sec/batch\n", - "Epoch: 5/20... Training Step: 850... Training loss: 1.6970... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 851... Training loss: 1.6880... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 852... Training loss: 1.7485... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 853... Training loss: 1.6978... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 854... Training loss: 1.7593... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 855... Training loss: 1.7490... 0.1482 sec/batch\n", - "Epoch: 5/20... Training Step: 856... Training loss: 1.7328... 0.1477 sec/batch\n", - "Epoch: 5/20... Training Step: 857... Training loss: 1.7075... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 858... Training loss: 1.7204... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 859... Training loss: 1.7185... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 860... Training loss: 1.6998... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 861... Training loss: 1.7002... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 862... Training loss: 1.6964... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 863... Training loss: 1.7392... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 864... Training loss: 1.7227... 0.1494 sec/batch\n", - "Epoch: 5/20... Training Step: 865... Training loss: 1.7394... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 866... Training loss: 1.6878... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 867... Training loss: 1.7105... 0.1494 sec/batch\n", - "Epoch: 5/20... Training Step: 868... Training loss: 1.7335... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 869... Training loss: 1.7020... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 870... Training loss: 1.7090... 0.1490 sec/batch\n", - "Epoch: 5/20... Training Step: 871... Training loss: 1.6510... 0.1470 sec/batch\n", - "Epoch: 5/20... Training Step: 872... Training loss: 1.6916... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 873... Training loss: 1.6595... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 874... Training loss: 1.7046... 0.1470 sec/batch\n", - "Epoch: 5/20... Training Step: 875... Training loss: 1.6614... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 876... Training loss: 1.6956... 0.1482 sec/batch\n", - "Epoch: 5/20... Training Step: 877... Training loss: 1.6664... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 878... Training loss: 1.6768... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 879... Training loss: 1.6670... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 880... Training loss: 1.6824... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 881... Training loss: 1.6486... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 882... Training loss: 1.7052... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 883... Training loss: 1.6641... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 884... Training loss: 1.6731... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 885... Training loss: 1.6655... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 886... Training loss: 1.6667... 0.1467 sec/batch\n", - "Epoch: 5/20... Training Step: 887... Training loss: 1.6693... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 888... Training loss: 1.6929... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 889... Training loss: 1.6827... 0.1469 sec/batch\n", - "Epoch: 5/20... Training Step: 890... Training loss: 1.6399... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 891... Training loss: 1.6689... 0.1462 sec/batch\n", - "Epoch: 5/20... Training Step: 892... Training loss: 1.6416... 0.1477 sec/batch\n", - "Epoch: 5/20... Training Step: 893... Training loss: 1.6859... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 894... Training loss: 1.6714... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 895... Training loss: 1.6769... 0.1488 sec/batch\n", - "Epoch: 5/20... Training Step: 896... Training loss: 1.6658... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 897... Training loss: 1.6714... 0.1466 sec/batch\n", - "Epoch: 5/20... Training Step: 898... Training loss: 1.6695... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 899... Training loss: 1.6820... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 900... Training loss: 1.6909... 0.1489 sec/batch\n", - "Epoch: 5/20... Training Step: 901... Training loss: 1.6917... 0.1492 sec/batch\n", - "Epoch: 5/20... Training Step: 902... Training loss: 1.6888... 0.1516 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 5/20... Training Step: 903... Training loss: 1.6703... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 904... Training loss: 1.6758... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 905... Training loss: 1.6654... 0.1486 sec/batch\n", - "Epoch: 5/20... Training Step: 906... Training loss: 1.6591... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 907... Training loss: 1.6574... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 908... Training loss: 1.6331... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 909... Training loss: 1.6867... 0.1505 sec/batch\n", - "Epoch: 5/20... Training Step: 910... Training loss: 1.6664... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 911... Training loss: 1.6735... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 912... Training loss: 1.6667... 0.1467 sec/batch\n", - "Epoch: 5/20... Training Step: 913... Training loss: 1.6842... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 914... Training loss: 1.6376... 0.1489 sec/batch\n", - "Epoch: 5/20... Training Step: 915... Training loss: 1.6422... 0.1484 sec/batch\n", - "Epoch: 5/20... Training Step: 916... Training loss: 1.6864... 0.1497 sec/batch\n", - "Epoch: 5/20... Training Step: 917... Training loss: 1.6674... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 918... Training loss: 1.6221... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 919... Training loss: 1.6918... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 920... Training loss: 1.6808... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 921... Training loss: 1.6550... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 922... Training loss: 1.6593... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 923... Training loss: 1.6346... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 924... Training loss: 1.6391... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 925... Training loss: 1.6924... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 926... Training loss: 1.6765... 0.1486 sec/batch\n", - "Epoch: 5/20... Training Step: 927... Training loss: 1.6799... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 928... Training loss: 1.6710... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 929... Training loss: 1.7040... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 930... Training loss: 1.6761... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 931... Training loss: 1.6987... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 932... Training loss: 1.6606... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 933... Training loss: 1.7163... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 934... Training loss: 1.6633... 0.1491 sec/batch\n", - "Epoch: 5/20... Training Step: 935... Training loss: 1.6625... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 936... Training loss: 1.6795... 0.1488 sec/batch\n", - "Epoch: 5/20... Training Step: 937... Training loss: 1.6489... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 938... Training loss: 1.6780... 0.1472 sec/batch\n", - "Epoch: 5/20... Training Step: 939... Training loss: 1.6688... 0.1465 sec/batch\n", - "Epoch: 5/20... Training Step: 940... Training loss: 1.6957... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 941... Training loss: 1.6739... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 942... Training loss: 1.6495... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 943... Training loss: 1.6379... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 944... Training loss: 1.6689... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 945... Training loss: 1.6606... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 946... Training loss: 1.6564... 0.1492 sec/batch\n", - "Epoch: 5/20... Training Step: 947... Training loss: 1.6598... 0.1489 sec/batch\n", - "Epoch: 5/20... Training Step: 948... Training loss: 1.6597... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 949... Training loss: 1.6781... 0.1476 sec/batch\n", - "Epoch: 5/20... Training Step: 950... Training loss: 1.6616... 0.1501 sec/batch\n", - "Epoch: 5/20... Training Step: 951... Training loss: 1.6135... 0.1481 sec/batch\n", - "Epoch: 5/20... Training Step: 952... Training loss: 1.6925... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 953... Training loss: 1.6928... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 954... Training loss: 1.6603... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 955... Training loss: 1.6693... 0.1489 sec/batch\n", - "Epoch: 5/20... Training Step: 956... Training loss: 1.6659... 0.1489 sec/batch\n", - "Epoch: 5/20... Training Step: 957... Training loss: 1.6568... 0.1479 sec/batch\n", - "Epoch: 5/20... Training Step: 958... Training loss: 1.6528... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 959... Training loss: 1.6655... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 960... Training loss: 1.7183... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 961... Training loss: 1.6603... 0.1477 sec/batch\n", - "Epoch: 5/20... Training Step: 962... Training loss: 1.6396... 0.1498 sec/batch\n", - "Epoch: 5/20... Training Step: 963... Training loss: 1.6405... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 964... Training loss: 1.6485... 0.1483 sec/batch\n", - "Epoch: 5/20... Training Step: 965... Training loss: 1.6741... 0.1502 sec/batch\n", - "Epoch: 5/20... Training Step: 966... Training loss: 1.6658... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 967... Training loss: 1.6783... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 968... Training loss: 1.6502... 0.1486 sec/batch\n", - "Epoch: 5/20... Training Step: 969... Training loss: 1.6366... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 970... Training loss: 1.6659... 0.1469 sec/batch\n", - "Epoch: 5/20... Training Step: 971... Training loss: 1.6236... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 972... Training loss: 1.6262... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 973... Training loss: 1.6219... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 974... Training loss: 1.6428... 0.1506 sec/batch\n", - "Epoch: 5/20... Training Step: 975... Training loss: 1.6469... 0.1463 sec/batch\n", - "Epoch: 5/20... Training Step: 976... Training loss: 1.6528... 0.1478 sec/batch\n", - "Epoch: 5/20... Training Step: 977... Training loss: 1.6516... 0.1493 sec/batch\n", - "Epoch: 5/20... Training Step: 978... Training loss: 1.6326... 0.1487 sec/batch\n", - "Epoch: 5/20... Training Step: 979... Training loss: 1.6525... 0.1475 sec/batch\n", - "Epoch: 5/20... Training Step: 980... Training loss: 1.6316... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 981... Training loss: 1.6294... 0.1505 sec/batch\n", - "Epoch: 5/20... Training Step: 982... Training loss: 1.6409... 0.1471 sec/batch\n", - "Epoch: 5/20... Training Step: 983... Training loss: 1.6362... 0.1473 sec/batch\n", - "Epoch: 5/20... Training Step: 984... Training loss: 1.6309... 0.1467 sec/batch\n", - "Epoch: 5/20... Training Step: 985... Training loss: 1.6447... 0.1474 sec/batch\n", - "Epoch: 5/20... Training Step: 986... Training loss: 1.6169... 0.1500 sec/batch\n", - "Epoch: 5/20... Training Step: 987... Training loss: 1.6071... 0.1490 sec/batch\n", - "Epoch: 5/20... Training Step: 988... Training loss: 1.6445... 0.1480 sec/batch\n", - "Epoch: 5/20... Training Step: 989... Training loss: 1.6245... 0.1485 sec/batch\n", - "Epoch: 5/20... Training Step: 990... Training loss: 1.6190... 0.1490 sec/batch\n", - "Epoch: 6/20... Training Step: 991... Training loss: 1.7301... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 992... Training loss: 1.6469... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 993... Training loss: 1.6251... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 994... Training loss: 1.6331... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 995... Training loss: 1.6212... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 996... Training loss: 1.6004... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 997... Training loss: 1.6369... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 998... Training loss: 1.6241... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 999... Training loss: 1.6539... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1000... Training loss: 1.6327... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1001... Training loss: 1.6149... 0.1516 sec/batch\n", - "Epoch: 6/20... Training Step: 1002... Training loss: 1.6107... 0.1513 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 6/20... Training Step: 1003... Training loss: 1.6318... 0.1499 sec/batch\n", - "Epoch: 6/20... Training Step: 1004... Training loss: 1.6660... 0.1491 sec/batch\n", - "Epoch: 6/20... Training Step: 1005... Training loss: 1.6310... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1006... Training loss: 1.6100... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1007... Training loss: 1.6351... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1008... Training loss: 1.6509... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1009... Training loss: 1.6367... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1010... Training loss: 1.6554... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1011... Training loss: 1.6193... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1012... Training loss: 1.6566... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1013... Training loss: 1.6212... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1014... Training loss: 1.6382... 0.1495 sec/batch\n", - "Epoch: 6/20... Training Step: 1015... Training loss: 1.6304... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1016... Training loss: 1.5876... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1017... Training loss: 1.5969... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1018... Training loss: 1.6384... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1019... Training loss: 1.6513... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1020... Training loss: 1.6404... 0.1467 sec/batch\n", - "Epoch: 6/20... Training Step: 1021... Training loss: 1.6202... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1022... Training loss: 1.6071... 0.1486 sec/batch\n", - "Epoch: 6/20... Training Step: 1023... Training loss: 1.6465... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1024... Training loss: 1.6346... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1025... Training loss: 1.6208... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1026... Training loss: 1.6288... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1027... Training loss: 1.5965... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1028... Training loss: 1.5798... 0.1465 sec/batch\n", - "Epoch: 6/20... Training Step: 1029... Training loss: 1.5732... 0.1495 sec/batch\n", - "Epoch: 6/20... Training Step: 1030... Training loss: 1.5996... 0.1511 sec/batch\n", - "Epoch: 6/20... Training Step: 1031... Training loss: 1.6001... 0.1498 sec/batch\n", - "Epoch: 6/20... Training Step: 1032... Training loss: 1.6437... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1033... Training loss: 1.5960... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1034... Training loss: 1.5838... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1035... Training loss: 1.6264... 0.1491 sec/batch\n", - "Epoch: 6/20... Training Step: 1036... Training loss: 1.5811... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1037... Training loss: 1.6036... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1038... Training loss: 1.5956... 0.1469 sec/batch\n", - "Epoch: 6/20... Training Step: 1039... Training loss: 1.5994... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1040... Training loss: 1.6522... 0.1502 sec/batch\n", - "Epoch: 6/20... Training Step: 1041... Training loss: 1.5910... 0.1490 sec/batch\n", - "Epoch: 6/20... Training Step: 1042... Training loss: 1.6647... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1043... Training loss: 1.6237... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1044... Training loss: 1.6162... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1045... Training loss: 1.6090... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1046... Training loss: 1.6218... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1047... Training loss: 1.6396... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1048... Training loss: 1.6017... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1049... Training loss: 1.5923... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1050... Training loss: 1.6498... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1051... Training loss: 1.6188... 0.1469 sec/batch\n", - "Epoch: 6/20... Training Step: 1052... Training loss: 1.6725... 0.1486 sec/batch\n", - "Epoch: 6/20... Training Step: 1053... Training loss: 1.6505... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1054... Training loss: 1.6313... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1055... Training loss: 1.6069... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1056... Training loss: 1.6270... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1057... Training loss: 1.6368... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1058... Training loss: 1.5919... 0.1505 sec/batch\n", - "Epoch: 6/20... Training Step: 1059... Training loss: 1.6085... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1060... Training loss: 1.6090... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1061... Training loss: 1.6707... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1062... Training loss: 1.6296... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1063... Training loss: 1.6444... 0.1488 sec/batch\n", - "Epoch: 6/20... Training Step: 1064... Training loss: 1.6027... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1065... Training loss: 1.6039... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1066... Training loss: 1.6267... 0.1467 sec/batch\n", - "Epoch: 6/20... Training Step: 1067... Training loss: 1.5987... 0.1487 sec/batch\n", - "Epoch: 6/20... Training Step: 1068... Training loss: 1.6124... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1069... Training loss: 1.5631... 0.1498 sec/batch\n", - "Epoch: 6/20... Training Step: 1070... Training loss: 1.6003... 0.1485 sec/batch\n", - "Epoch: 6/20... Training Step: 1071... Training loss: 1.5671... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1072... Training loss: 1.6155... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1073... Training loss: 1.5655... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1074... Training loss: 1.6073... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1075... Training loss: 1.5747... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1076... Training loss: 1.5893... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1077... Training loss: 1.5750... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1078... Training loss: 1.5749... 0.1485 sec/batch\n", - "Epoch: 6/20... Training Step: 1079... Training loss: 1.5648... 0.1494 sec/batch\n", - "Epoch: 6/20... Training Step: 1080... Training loss: 1.6089... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1081... Training loss: 1.5726... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1082... Training loss: 1.5805... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1083... Training loss: 1.5735... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1084... Training loss: 1.5719... 0.1488 sec/batch\n", - "Epoch: 6/20... Training Step: 1085... Training loss: 1.5729... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1086... Training loss: 1.6022... 0.1487 sec/batch\n", - "Epoch: 6/20... Training Step: 1087... Training loss: 1.5865... 0.1467 sec/batch\n", - "Epoch: 6/20... Training Step: 1088... Training loss: 1.5587... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1089... Training loss: 1.5723... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1090... Training loss: 1.5482... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1091... Training loss: 1.5978... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1092... Training loss: 1.5888... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1093... Training loss: 1.5796... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1094... Training loss: 1.5794... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1095... Training loss: 1.5769... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1096... Training loss: 1.5800... 0.1472 sec/batch\n", - "Epoch: 6/20... Training Step: 1097... Training loss: 1.5794... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1098... Training loss: 1.5974... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1099... Training loss: 1.5886... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1100... Training loss: 1.6141... 0.1467 sec/batch\n", - "Epoch: 6/20... Training Step: 1101... Training loss: 1.5714... 0.1489 sec/batch\n", - "Epoch: 6/20... Training Step: 1102... Training loss: 1.5837... 0.1484 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 6/20... Training Step: 1103... Training loss: 1.5897... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1104... Training loss: 1.5808... 0.1491 sec/batch\n", - "Epoch: 6/20... Training Step: 1105... Training loss: 1.5662... 0.1485 sec/batch\n", - "Epoch: 6/20... Training Step: 1106... Training loss: 1.5490... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1107... Training loss: 1.6017... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1108... Training loss: 1.5915... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1109... Training loss: 1.5868... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1110... Training loss: 1.5805... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1111... Training loss: 1.5930... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1112... Training loss: 1.5479... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1113... Training loss: 1.5420... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1114... Training loss: 1.6097... 0.1488 sec/batch\n", - "Epoch: 6/20... Training Step: 1115... Training loss: 1.5836... 0.1485 sec/batch\n", - "Epoch: 6/20... Training Step: 1116... Training loss: 1.5426... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1117... Training loss: 1.6001... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1118... Training loss: 1.5923... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1119... Training loss: 1.5683... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1120... Training loss: 1.5608... 0.1493 sec/batch\n", - "Epoch: 6/20... Training Step: 1121... Training loss: 1.5434... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1122... Training loss: 1.5512... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1123... Training loss: 1.6024... 0.1474 sec/batch\n", - "Epoch: 6/20... Training Step: 1124... Training loss: 1.5921... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1125... Training loss: 1.5976... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1126... Training loss: 1.5885... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1127... Training loss: 1.6142... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1128... Training loss: 1.5850... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1129... Training loss: 1.5964... 0.1488 sec/batch\n", - "Epoch: 6/20... Training Step: 1130... Training loss: 1.5824... 0.1497 sec/batch\n", - "Epoch: 6/20... Training Step: 1131... Training loss: 1.6382... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1132... Training loss: 1.5792... 0.1514 sec/batch\n", - "Epoch: 6/20... Training Step: 1133... Training loss: 1.5739... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1134... Training loss: 1.6074... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1135... Training loss: 1.5682... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1136... Training loss: 1.6087... 0.1481 sec/batch\n", - "Epoch: 6/20... Training Step: 1137... Training loss: 1.5919... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1138... Training loss: 1.6168... 0.1489 sec/batch\n", - "Epoch: 6/20... Training Step: 1139... Training loss: 1.6072... 0.1511 sec/batch\n", - "Epoch: 6/20... Training Step: 1140... Training loss: 1.5777... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1141... Training loss: 1.5429... 0.1492 sec/batch\n", - "Epoch: 6/20... Training Step: 1142... Training loss: 1.5788... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1143... Training loss: 1.5989... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1144... Training loss: 1.5864... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1145... Training loss: 1.5788... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1146... Training loss: 1.5771... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1147... Training loss: 1.5857... 0.1492 sec/batch\n", - "Epoch: 6/20... Training Step: 1148... Training loss: 1.5736... 0.1498 sec/batch\n", - "Epoch: 6/20... Training Step: 1149... Training loss: 1.5387... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1150... Training loss: 1.6008... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1151... Training loss: 1.6185... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1152... Training loss: 1.5883... 0.1477 sec/batch\n", - "Epoch: 6/20... Training Step: 1153... Training loss: 1.5834... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1154... Training loss: 1.5609... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1155... Training loss: 1.5792... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1156... Training loss: 1.5727... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1157... Training loss: 1.5880... 0.1475 sec/batch\n", - "Epoch: 6/20... Training Step: 1158... Training loss: 1.6455... 0.1491 sec/batch\n", - "Epoch: 6/20... Training Step: 1159... Training loss: 1.5744... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1160... Training loss: 1.5585... 0.1470 sec/batch\n", - "Epoch: 6/20... Training Step: 1161... Training loss: 1.5676... 0.1466 sec/batch\n", - "Epoch: 6/20... Training Step: 1162... Training loss: 1.5438... 0.1476 sec/batch\n", - "Epoch: 6/20... Training Step: 1163... Training loss: 1.6077... 0.1483 sec/batch\n", - "Epoch: 6/20... Training Step: 1164... Training loss: 1.5783... 0.1468 sec/batch\n", - "Epoch: 6/20... Training Step: 1165... Training loss: 1.5853... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1166... Training loss: 1.5450... 0.1495 sec/batch\n", - "Epoch: 6/20... Training Step: 1167... Training loss: 1.5579... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1168... Training loss: 1.5849... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1169... Training loss: 1.5357... 0.1480 sec/batch\n", - "Epoch: 6/20... Training Step: 1170... Training loss: 1.5413... 0.1479 sec/batch\n", - "Epoch: 6/20... Training Step: 1171... Training loss: 1.5387... 0.1471 sec/batch\n", - "Epoch: 6/20... Training Step: 1172... Training loss: 1.5522... 0.1469 sec/batch\n", - "Epoch: 6/20... Training Step: 1173... Training loss: 1.5594... 0.1497 sec/batch\n", - "Epoch: 6/20... Training Step: 1174... Training loss: 1.5676... 0.1491 sec/batch\n", - "Epoch: 6/20... Training Step: 1175... Training loss: 1.5700... 0.1510 sec/batch\n", - "Epoch: 6/20... Training Step: 1176... Training loss: 1.5562... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1177... Training loss: 1.5823... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1178... Training loss: 1.5584... 0.1490 sec/batch\n", - "Epoch: 6/20... Training Step: 1179... Training loss: 1.5600... 0.1473 sec/batch\n", - "Epoch: 6/20... Training Step: 1180... Training loss: 1.5737... 0.1465 sec/batch\n", - "Epoch: 6/20... Training Step: 1181... Training loss: 1.5550... 0.1469 sec/batch\n", - "Epoch: 6/20... Training Step: 1182... Training loss: 1.5433... 0.1500 sec/batch\n", - "Epoch: 6/20... Training Step: 1183... Training loss: 1.5631... 0.1485 sec/batch\n", - "Epoch: 6/20... Training Step: 1184... Training loss: 1.5392... 0.1484 sec/batch\n", - "Epoch: 6/20... Training Step: 1185... Training loss: 1.5336... 0.1482 sec/batch\n", - "Epoch: 6/20... Training Step: 1186... Training loss: 1.5680... 0.1478 sec/batch\n", - "Epoch: 6/20... Training Step: 1187... Training loss: 1.5493... 0.1492 sec/batch\n", - "Epoch: 6/20... Training Step: 1188... Training loss: 1.5386... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1189... Training loss: 1.6650... 0.1465 sec/batch\n", - "Epoch: 7/20... Training Step: 1190... Training loss: 1.5669... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1191... Training loss: 1.5523... 0.1495 sec/batch\n", - "Epoch: 7/20... Training Step: 1192... Training loss: 1.5627... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1193... Training loss: 1.5409... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1194... Training loss: 1.5218... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1195... Training loss: 1.5619... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1196... Training loss: 1.5515... 0.1485 sec/batch\n", - "Epoch: 7/20... Training Step: 1197... Training loss: 1.5735... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1198... Training loss: 1.5628... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1199... Training loss: 1.5363... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1200... Training loss: 1.5407... 0.1508 sec/batch\n", - "Epoch: 7/20... Training Step: 1201... Training loss: 1.5549... 0.1497 sec/batch\n", - "Epoch: 7/20... Training Step: 1202... Training loss: 1.5920... 0.1502 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 7/20... Training Step: 1203... Training loss: 1.5409... 0.1493 sec/batch\n", - "Epoch: 7/20... Training Step: 1204... Training loss: 1.5255... 0.1499 sec/batch\n", - "Epoch: 7/20... Training Step: 1205... Training loss: 1.5606... 0.1492 sec/batch\n", - "Epoch: 7/20... Training Step: 1206... Training loss: 1.5955... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1207... Training loss: 1.5632... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1208... Training loss: 1.5796... 0.1485 sec/batch\n", - "Epoch: 7/20... Training Step: 1209... Training loss: 1.5454... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1210... Training loss: 1.5719... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1211... Training loss: 1.5396... 0.1468 sec/batch\n", - "Epoch: 7/20... Training Step: 1212... Training loss: 1.5595... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1213... Training loss: 1.5684... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1214... Training loss: 1.5218... 0.1493 sec/batch\n", - "Epoch: 7/20... Training Step: 1215... Training loss: 1.5182... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1216... Training loss: 1.5745... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1217... Training loss: 1.5699... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1218... Training loss: 1.5659... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1219... Training loss: 1.5428... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1220... Training loss: 1.5260... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1221... Training loss: 1.5675... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1222... Training loss: 1.5578... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1223... Training loss: 1.5415... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1224... Training loss: 1.5477... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1225... Training loss: 1.5296... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1226... Training loss: 1.5158... 0.1498 sec/batch\n", - "Epoch: 7/20... Training Step: 1227... Training loss: 1.5022... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1228... Training loss: 1.5292... 0.1485 sec/batch\n", - "Epoch: 7/20... Training Step: 1229... Training loss: 1.5218... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1230... Training loss: 1.5708... 0.1467 sec/batch\n", - "Epoch: 7/20... Training Step: 1231... Training loss: 1.5281... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1232... Training loss: 1.5108... 0.1472 sec/batch\n", - "Epoch: 7/20... Training Step: 1233... Training loss: 1.5524... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1234... Training loss: 1.5029... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1235... Training loss: 1.5344... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1236... Training loss: 1.5218... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1237... Training loss: 1.5288... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1238... Training loss: 1.5726... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1239... Training loss: 1.5144... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1240... Training loss: 1.5976... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1241... Training loss: 1.5414... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1242... Training loss: 1.5451... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1243... Training loss: 1.5369... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1244... Training loss: 1.5541... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1245... Training loss: 1.5610... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1246... Training loss: 1.5257... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1247... Training loss: 1.5266... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1248... Training loss: 1.5748... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1249... Training loss: 1.5469... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1250... Training loss: 1.5968... 0.1505 sec/batch\n", - "Epoch: 7/20... Training Step: 1251... Training loss: 1.5654... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1252... Training loss: 1.5593... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1253... Training loss: 1.5270... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1254... Training loss: 1.5505... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1255... Training loss: 1.5593... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1256... Training loss: 1.5163... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1257... Training loss: 1.5359... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1258... Training loss: 1.5379... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1259... Training loss: 1.5838... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1260... Training loss: 1.5560... 0.1511 sec/batch\n", - "Epoch: 7/20... Training Step: 1261... Training loss: 1.5743... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1262... Training loss: 1.5271... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1263... Training loss: 1.5316... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1264... Training loss: 1.5668... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1265... Training loss: 1.5406... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1266... Training loss: 1.5301... 0.1496 sec/batch\n", - "Epoch: 7/20... Training Step: 1267... Training loss: 1.5015... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1268... Training loss: 1.5312... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1269... Training loss: 1.4920... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1270... Training loss: 1.5454... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1271... Training loss: 1.5045... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1272... Training loss: 1.5325... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1273... Training loss: 1.5194... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1274... Training loss: 1.5227... 0.1465 sec/batch\n", - "Epoch: 7/20... Training Step: 1275... Training loss: 1.5021... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1276... Training loss: 1.5128... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1277... Training loss: 1.4972... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1278... Training loss: 1.5447... 0.1511 sec/batch\n", - "Epoch: 7/20... Training Step: 1279... Training loss: 1.5152... 0.1466 sec/batch\n", - "Epoch: 7/20... Training Step: 1280... Training loss: 1.5219... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1281... Training loss: 1.5094... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1282... Training loss: 1.5137... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1283... Training loss: 1.5076... 0.1472 sec/batch\n", - "Epoch: 7/20... Training Step: 1284... Training loss: 1.5367... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1285... Training loss: 1.5312... 0.1491 sec/batch\n", - "Epoch: 7/20... Training Step: 1286... Training loss: 1.4939... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1287... Training loss: 1.5087... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1288... Training loss: 1.4876... 0.1468 sec/batch\n", - "Epoch: 7/20... Training Step: 1289... Training loss: 1.5339... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1290... Training loss: 1.5110... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1291... Training loss: 1.5200... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1292... Training loss: 1.5123... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1293... Training loss: 1.5123... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1294... Training loss: 1.5284... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1295... Training loss: 1.5199... 0.1508 sec/batch\n", - "Epoch: 7/20... Training Step: 1296... Training loss: 1.5206... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1297... Training loss: 1.5202... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1298... Training loss: 1.5422... 0.1485 sec/batch\n", - "Epoch: 7/20... Training Step: 1299... Training loss: 1.5138... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1300... Training loss: 1.5068... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1301... Training loss: 1.5189... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1302... Training loss: 1.5143... 0.1476 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 7/20... Training Step: 1303... Training loss: 1.4940... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1304... Training loss: 1.4803... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1305... Training loss: 1.5175... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1306... Training loss: 1.5203... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1307... Training loss: 1.5077... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1308... Training loss: 1.5063... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1309... Training loss: 1.5177... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1310... Training loss: 1.4801... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1311... Training loss: 1.4667... 0.1467 sec/batch\n", - "Epoch: 7/20... Training Step: 1312... Training loss: 1.5250... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1313... Training loss: 1.5106... 0.1470 sec/batch\n", - "Epoch: 7/20... Training Step: 1314... Training loss: 1.4663... 0.1512 sec/batch\n", - "Epoch: 7/20... Training Step: 1315... Training loss: 1.5281... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1316... Training loss: 1.5275... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1317... Training loss: 1.5018... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1318... Training loss: 1.4785... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1319... Training loss: 1.4656... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1320... Training loss: 1.4922... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1321... Training loss: 1.5304... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1322... Training loss: 1.5205... 0.1492 sec/batch\n", - "Epoch: 7/20... Training Step: 1323... Training loss: 1.5215... 0.1494 sec/batch\n", - "Epoch: 7/20... Training Step: 1324... Training loss: 1.5036... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1325... Training loss: 1.5312... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1326... Training loss: 1.5311... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1327... Training loss: 1.5231... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1328... Training loss: 1.5037... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1329... Training loss: 1.5617... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1330... Training loss: 1.5117... 0.1502 sec/batch\n", - "Epoch: 7/20... Training Step: 1331... Training loss: 1.4989... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1332... Training loss: 1.5377... 0.1470 sec/batch\n", - "Epoch: 7/20... Training Step: 1333... Training loss: 1.4840... 0.1482 sec/batch\n", - "Epoch: 7/20... Training Step: 1334... Training loss: 1.5291... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1335... Training loss: 1.5110... 0.1485 sec/batch\n", - "Epoch: 7/20... Training Step: 1336... Training loss: 1.5575... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1337... Training loss: 1.5244... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1338... Training loss: 1.4957... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1339... Training loss: 1.4667... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1340... Training loss: 1.4982... 0.1494 sec/batch\n", - "Epoch: 7/20... Training Step: 1341... Training loss: 1.5152... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1342... Training loss: 1.5005... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1343... Training loss: 1.4923... 0.1479 sec/batch\n", - "Epoch: 7/20... Training Step: 1344... Training loss: 1.4952... 0.1490 sec/batch\n", - "Epoch: 7/20... Training Step: 1345... Training loss: 1.5177... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1346... Training loss: 1.5055... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1347... Training loss: 1.4697... 0.1468 sec/batch\n", - "Epoch: 7/20... Training Step: 1348... Training loss: 1.5205... 0.1487 sec/batch\n", - "Epoch: 7/20... Training Step: 1349... Training loss: 1.5358... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1350... Training loss: 1.5034... 0.1474 sec/batch\n", - "Epoch: 7/20... Training Step: 1351... Training loss: 1.5108... 0.1461 sec/batch\n", - "Epoch: 7/20... Training Step: 1352... Training loss: 1.5022... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1353... Training loss: 1.5031... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1354... Training loss: 1.5041... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1355... Training loss: 1.5202... 0.1478 sec/batch\n", - "Epoch: 7/20... Training Step: 1356... Training loss: 1.5656... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1357... Training loss: 1.4969... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1358... Training loss: 1.5032... 0.1489 sec/batch\n", - "Epoch: 7/20... Training Step: 1359... Training loss: 1.5005... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1360... Training loss: 1.4797... 0.1481 sec/batch\n", - "Epoch: 7/20... Training Step: 1361... Training loss: 1.5299... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1362... Training loss: 1.5080... 0.1487 sec/batch\n", - "Epoch: 7/20... Training Step: 1363... Training loss: 1.5224... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1364... Training loss: 1.4741... 0.1484 sec/batch\n", - "Epoch: 7/20... Training Step: 1365... Training loss: 1.4768... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1366... Training loss: 1.5216... 0.1493 sec/batch\n", - "Epoch: 7/20... Training Step: 1367... Training loss: 1.4942... 0.1475 sec/batch\n", - "Epoch: 7/20... Training Step: 1368... Training loss: 1.4695... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1369... Training loss: 1.4735... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1370... Training loss: 1.4970... 0.1486 sec/batch\n", - "Epoch: 7/20... Training Step: 1371... Training loss: 1.4910... 0.1488 sec/batch\n", - "Epoch: 7/20... Training Step: 1372... Training loss: 1.4895... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1373... Training loss: 1.4838... 0.1477 sec/batch\n", - "Epoch: 7/20... Training Step: 1374... Training loss: 1.4748... 0.1473 sec/batch\n", - "Epoch: 7/20... Training Step: 1375... Training loss: 1.5100... 0.1476 sec/batch\n", - "Epoch: 7/20... Training Step: 1376... Training loss: 1.4765... 0.1501 sec/batch\n", - "Epoch: 7/20... Training Step: 1377... Training loss: 1.4972... 0.1465 sec/batch\n", - "Epoch: 7/20... Training Step: 1378... Training loss: 1.4914... 0.1483 sec/batch\n", - "Epoch: 7/20... Training Step: 1379... Training loss: 1.4813... 0.1469 sec/batch\n", - "Epoch: 7/20... Training Step: 1380... Training loss: 1.4773... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1381... Training loss: 1.4998... 0.1480 sec/batch\n", - "Epoch: 7/20... Training Step: 1382... Training loss: 1.4678... 0.1471 sec/batch\n", - "Epoch: 7/20... Training Step: 1383... Training loss: 1.4633... 0.1470 sec/batch\n", - "Epoch: 7/20... Training Step: 1384... Training loss: 1.4959... 0.1468 sec/batch\n", - "Epoch: 7/20... Training Step: 1385... Training loss: 1.4798... 0.1492 sec/batch\n", - "Epoch: 7/20... Training Step: 1386... Training loss: 1.4815... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1387... Training loss: 1.6025... 0.1469 sec/batch\n", - "Epoch: 8/20... Training Step: 1388... Training loss: 1.5013... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1389... Training loss: 1.4855... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1390... Training loss: 1.5023... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1391... Training loss: 1.4702... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1392... Training loss: 1.4559... 0.1472 sec/batch\n", - "Epoch: 8/20... Training Step: 1393... Training loss: 1.4965... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1394... Training loss: 1.4712... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1395... Training loss: 1.4910... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1396... Training loss: 1.4757... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1397... Training loss: 1.4640... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1398... Training loss: 1.4706... 0.1494 sec/batch\n", - "Epoch: 8/20... Training Step: 1399... Training loss: 1.4720... 0.1470 sec/batch\n", - "Epoch: 8/20... Training Step: 1400... Training loss: 1.5060... 0.1495 sec/batch\n", - "Epoch: 8/20... Training Step: 1401... Training loss: 1.4823... 0.1520 sec/batch\n", - "Epoch: 8/20... Training Step: 1402... Training loss: 1.4578... 0.1531 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 8/20... Training Step: 1403... Training loss: 1.4952... 0.1496 sec/batch\n", - "Epoch: 8/20... Training Step: 1404... Training loss: 1.5114... 0.1473 sec/batch\n", - "Epoch: 8/20... Training Step: 1405... Training loss: 1.4944... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1406... Training loss: 1.5147... 0.1488 sec/batch\n", - "Epoch: 8/20... Training Step: 1407... Training loss: 1.4711... 0.1491 sec/batch\n", - "Epoch: 8/20... Training Step: 1408... Training loss: 1.5059... 0.1485 sec/batch\n", - "Epoch: 8/20... Training Step: 1409... Training loss: 1.4710... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1410... Training loss: 1.4907... 0.1470 sec/batch\n", - "Epoch: 8/20... Training Step: 1411... Training loss: 1.4828... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1412... Training loss: 1.4354... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1413... Training loss: 1.4407... 0.1471 sec/batch\n", - "Epoch: 8/20... Training Step: 1414... Training loss: 1.5031... 0.1488 sec/batch\n", - "Epoch: 8/20... Training Step: 1415... Training loss: 1.4934... 0.1490 sec/batch\n", - "Epoch: 8/20... Training Step: 1416... Training loss: 1.5071... 0.1491 sec/batch\n", - "Epoch: 8/20... Training Step: 1417... Training loss: 1.4729... 0.1470 sec/batch\n", - "Epoch: 8/20... Training Step: 1418... Training loss: 1.4564... 0.1501 sec/batch\n", - "Epoch: 8/20... Training Step: 1419... Training loss: 1.4960... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1420... Training loss: 1.4887... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1421... Training loss: 1.4690... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1422... Training loss: 1.4859... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1423... Training loss: 1.4673... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1424... Training loss: 1.4423... 0.1467 sec/batch\n", - "Epoch: 8/20... Training Step: 1425... Training loss: 1.4364... 0.1525 sec/batch\n", - "Epoch: 8/20... Training Step: 1426... Training loss: 1.4668... 0.1472 sec/batch\n", - "Epoch: 8/20... Training Step: 1427... Training loss: 1.4711... 0.1514 sec/batch\n", - "Epoch: 8/20... Training Step: 1428... Training loss: 1.5294... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1429... Training loss: 1.4695... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1430... Training loss: 1.4562... 0.1488 sec/batch\n", - "Epoch: 8/20... Training Step: 1431... Training loss: 1.4889... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1432... Training loss: 1.4585... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1433... Training loss: 1.4687... 0.1468 sec/batch\n", - "Epoch: 8/20... Training Step: 1434... Training loss: 1.4687... 0.1505 sec/batch\n", - "Epoch: 8/20... Training Step: 1435... Training loss: 1.4788... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1436... Training loss: 1.5110... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1437... Training loss: 1.4584... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1438... Training loss: 1.5312... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1439... Training loss: 1.4908... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1440... Training loss: 1.4872... 0.1467 sec/batch\n", - "Epoch: 8/20... Training Step: 1441... Training loss: 1.4730... 0.1472 sec/batch\n", - "Epoch: 8/20... Training Step: 1442... Training loss: 1.4822... 0.1489 sec/batch\n", - "Epoch: 8/20... Training Step: 1443... Training loss: 1.4945... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1444... Training loss: 1.4577... 0.1489 sec/batch\n", - "Epoch: 8/20... Training Step: 1445... Training loss: 1.4494... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1446... Training loss: 1.5105... 0.1495 sec/batch\n", - "Epoch: 8/20... Training Step: 1447... Training loss: 1.4768... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1448... Training loss: 1.5268... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1449... Training loss: 1.4973... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1450... Training loss: 1.4740... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1451... Training loss: 1.4698... 0.1498 sec/batch\n", - "Epoch: 8/20... Training Step: 1452... Training loss: 1.4819... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1453... Training loss: 1.4786... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1454... Training loss: 1.4444... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1455... Training loss: 1.4707... 0.1498 sec/batch\n", - "Epoch: 8/20... Training Step: 1456... Training loss: 1.4612... 0.1485 sec/batch\n", - "Epoch: 8/20... Training Step: 1457... Training loss: 1.5128... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1458... Training loss: 1.5021... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1459... Training loss: 1.5083... 0.1474 sec/batch\n", - "Epoch: 8/20... Training Step: 1460... Training loss: 1.4565... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1461... Training loss: 1.4634... 0.1501 sec/batch\n", - "Epoch: 8/20... Training Step: 1462... Training loss: 1.4889... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1463... Training loss: 1.4616... 0.1489 sec/batch\n", - "Epoch: 8/20... Training Step: 1464... Training loss: 1.4687... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1465... Training loss: 1.4224... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1466... Training loss: 1.4718... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1467... Training loss: 1.4203... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1468... Training loss: 1.4675... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1469... Training loss: 1.4339... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1470... Training loss: 1.4685... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1471... Training loss: 1.4344... 0.1497 sec/batch\n", - "Epoch: 8/20... Training Step: 1472... Training loss: 1.4615... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1473... Training loss: 1.4412... 0.1510 sec/batch\n", - "Epoch: 8/20... Training Step: 1474... Training loss: 1.4489... 0.1485 sec/batch\n", - "Epoch: 8/20... Training Step: 1475... Training loss: 1.4357... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1476... Training loss: 1.4801... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1477... Training loss: 1.4447... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1478... Training loss: 1.4604... 0.1488 sec/batch\n", - "Epoch: 8/20... Training Step: 1479... Training loss: 1.4453... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1480... Training loss: 1.4503... 0.1497 sec/batch\n", - "Epoch: 8/20... Training Step: 1481... Training loss: 1.4370... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1482... Training loss: 1.4708... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1483... Training loss: 1.4674... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1484... Training loss: 1.4244... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1485... Training loss: 1.4450... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1486... Training loss: 1.4236... 0.1471 sec/batch\n", - "Epoch: 8/20... Training Step: 1487... Training loss: 1.4614... 0.1504 sec/batch\n", - "Epoch: 8/20... Training Step: 1488... Training loss: 1.4514... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1489... Training loss: 1.4581... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1490... Training loss: 1.4486... 0.1490 sec/batch\n", - "Epoch: 8/20... Training Step: 1491... Training loss: 1.4479... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1492... Training loss: 1.4628... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1493... Training loss: 1.4637... 0.1493 sec/batch\n", - "Epoch: 8/20... Training Step: 1494... Training loss: 1.4562... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1495... Training loss: 1.4518... 0.1494 sec/batch\n", - "Epoch: 8/20... Training Step: 1496... Training loss: 1.4789... 0.1499 sec/batch\n", - "Epoch: 8/20... Training Step: 1497... Training loss: 1.4433... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1498... Training loss: 1.4416... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1499... Training loss: 1.4565... 0.1490 sec/batch\n", - "Epoch: 8/20... Training Step: 1500... Training loss: 1.4366... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1501... Training loss: 1.4277... 0.1471 sec/batch\n", - "Epoch: 8/20... Training Step: 1502... Training loss: 1.4171... 0.1482 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 8/20... Training Step: 1503... Training loss: 1.4567... 0.1475 sec/batch\n", - "Epoch: 8/20... Training Step: 1504... Training loss: 1.4613... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1505... Training loss: 1.4481... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1506... Training loss: 1.4450... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1507... Training loss: 1.4602... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1508... Training loss: 1.4248... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1509... Training loss: 1.4100... 0.1471 sec/batch\n", - "Epoch: 8/20... Training Step: 1510... Training loss: 1.4655... 0.1472 sec/batch\n", - "Epoch: 8/20... Training Step: 1511... Training loss: 1.4543... 0.1474 sec/batch\n", - "Epoch: 8/20... Training Step: 1512... Training loss: 1.4094... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1513... Training loss: 1.4771... 0.1473 sec/batch\n", - "Epoch: 8/20... Training Step: 1514... Training loss: 1.4655... 0.1490 sec/batch\n", - "Epoch: 8/20... Training Step: 1515... Training loss: 1.4320... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1516... Training loss: 1.4208... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1517... Training loss: 1.3964... 0.1494 sec/batch\n", - "Epoch: 8/20... Training Step: 1518... Training loss: 1.4309... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1519... Training loss: 1.4724... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1520... Training loss: 1.4544... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1521... Training loss: 1.4582... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1522... Training loss: 1.4567... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1523... Training loss: 1.4857... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1524... Training loss: 1.4646... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1525... Training loss: 1.4599... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1526... Training loss: 1.4548... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1527... Training loss: 1.5023... 0.1495 sec/batch\n", - "Epoch: 8/20... Training Step: 1528... Training loss: 1.4565... 0.1492 sec/batch\n", - "Epoch: 8/20... Training Step: 1529... Training loss: 1.4400... 0.1491 sec/batch\n", - "Epoch: 8/20... Training Step: 1530... Training loss: 1.4808... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1531... Training loss: 1.4341... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1532... Training loss: 1.4782... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1533... Training loss: 1.4614... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1534... Training loss: 1.4829... 0.1476 sec/batch\n", - "Epoch: 8/20... Training Step: 1535... Training loss: 1.4633... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1536... Training loss: 1.4362... 0.1510 sec/batch\n", - "Epoch: 8/20... Training Step: 1537... Training loss: 1.4084... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1538... Training loss: 1.4447... 0.1497 sec/batch\n", - "Epoch: 8/20... Training Step: 1539... Training loss: 1.4645... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1540... Training loss: 1.4464... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1541... Training loss: 1.4412... 0.1473 sec/batch\n", - "Epoch: 8/20... Training Step: 1542... Training loss: 1.4433... 0.1479 sec/batch\n", - "Epoch: 8/20... Training Step: 1543... Training loss: 1.4523... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1544... Training loss: 1.4465... 0.1489 sec/batch\n", - "Epoch: 8/20... Training Step: 1545... Training loss: 1.4158... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1546... Training loss: 1.4729... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1547... Training loss: 1.4851... 0.1474 sec/batch\n", - "Epoch: 8/20... Training Step: 1548... Training loss: 1.4465... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1549... Training loss: 1.4545... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1550... Training loss: 1.4432... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1551... Training loss: 1.4484... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1552... Training loss: 1.4377... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1553... Training loss: 1.4738... 0.1472 sec/batch\n", - "Epoch: 8/20... Training Step: 1554... Training loss: 1.5184... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1555... Training loss: 1.4388... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1556... Training loss: 1.4355... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1557... Training loss: 1.4330... 0.1481 sec/batch\n", - "Epoch: 8/20... Training Step: 1558... Training loss: 1.4265... 0.1477 sec/batch\n", - "Epoch: 8/20... Training Step: 1559... Training loss: 1.4655... 0.1471 sec/batch\n", - "Epoch: 8/20... Training Step: 1560... Training loss: 1.4590... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1561... Training loss: 1.4713... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1562... Training loss: 1.4225... 0.1538 sec/batch\n", - "Epoch: 8/20... Training Step: 1563... Training loss: 1.4286... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1564... Training loss: 1.4678... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1565... Training loss: 1.4265... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1566... Training loss: 1.4217... 0.1494 sec/batch\n", - "Epoch: 8/20... Training Step: 1567... Training loss: 1.4142... 0.1488 sec/batch\n", - "Epoch: 8/20... Training Step: 1568... Training loss: 1.4376... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1569... Training loss: 1.4488... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1570... Training loss: 1.4267... 0.1491 sec/batch\n", - "Epoch: 8/20... Training Step: 1571... Training loss: 1.4385... 0.1486 sec/batch\n", - "Epoch: 8/20... Training Step: 1572... Training loss: 1.4331... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1573... Training loss: 1.4575... 0.1498 sec/batch\n", - "Epoch: 8/20... Training Step: 1574... Training loss: 1.4296... 0.1489 sec/batch\n", - "Epoch: 8/20... Training Step: 1575... Training loss: 1.4339... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1576... Training loss: 1.4403... 0.1482 sec/batch\n", - "Epoch: 8/20... Training Step: 1577... Training loss: 1.4232... 0.1480 sec/batch\n", - "Epoch: 8/20... Training Step: 1578... Training loss: 1.4246... 0.1478 sec/batch\n", - "Epoch: 8/20... Training Step: 1579... Training loss: 1.4438... 0.1498 sec/batch\n", - "Epoch: 8/20... Training Step: 1580... Training loss: 1.4271... 0.1483 sec/batch\n", - "Epoch: 8/20... Training Step: 1581... Training loss: 1.4145... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1582... Training loss: 1.4551... 0.1487 sec/batch\n", - "Epoch: 8/20... Training Step: 1583... Training loss: 1.4372... 0.1484 sec/batch\n", - "Epoch: 8/20... Training Step: 1584... Training loss: 1.4298... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1585... Training loss: 1.5735... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1586... Training loss: 1.4644... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1587... Training loss: 1.4424... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1588... Training loss: 1.4651... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1589... Training loss: 1.4306... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1590... Training loss: 1.4114... 0.1486 sec/batch\n", - "Epoch: 9/20... Training Step: 1591... Training loss: 1.4409... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1592... Training loss: 1.4278... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1593... Training loss: 1.4521... 0.1504 sec/batch\n", - "Epoch: 9/20... Training Step: 1594... Training loss: 1.4406... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1595... Training loss: 1.4265... 0.1492 sec/batch\n", - "Epoch: 9/20... Training Step: 1596... Training loss: 1.4219... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1597... Training loss: 1.4434... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1598... Training loss: 1.4670... 0.1486 sec/batch\n", - "Epoch: 9/20... Training Step: 1599... Training loss: 1.4177... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1600... Training loss: 1.4238... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1601... Training loss: 1.4458... 0.1511 sec/batch\n", - "Epoch: 9/20... Training Step: 1602... Training loss: 1.4603... 0.1530 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 9/20... Training Step: 1603... Training loss: 1.4514... 0.1505 sec/batch\n", - "Epoch: 9/20... Training Step: 1604... Training loss: 1.4623... 0.1495 sec/batch\n", - "Epoch: 9/20... Training Step: 1605... Training loss: 1.4246... 0.1469 sec/batch\n", - "Epoch: 9/20... Training Step: 1606... Training loss: 1.4517... 0.1468 sec/batch\n", - "Epoch: 9/20... Training Step: 1607... Training loss: 1.4298... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1608... Training loss: 1.4485... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1609... Training loss: 1.4318... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1610... Training loss: 1.3958... 0.1473 sec/batch\n", - "Epoch: 9/20... Training Step: 1611... Training loss: 1.4041... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1612... Training loss: 1.4450... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1613... Training loss: 1.4563... 0.1471 sec/batch\n", - "Epoch: 9/20... Training Step: 1614... Training loss: 1.4479... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1615... Training loss: 1.4315... 0.1472 sec/batch\n", - "Epoch: 9/20... Training Step: 1616... Training loss: 1.4058... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1617... Training loss: 1.4532... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1618... Training loss: 1.4452... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1619... Training loss: 1.4157... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1620... Training loss: 1.4337... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1621... Training loss: 1.4115... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1622... Training loss: 1.3853... 0.1491 sec/batch\n", - "Epoch: 9/20... Training Step: 1623... Training loss: 1.3856... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1624... Training loss: 1.4130... 0.1490 sec/batch\n", - "Epoch: 9/20... Training Step: 1625... Training loss: 1.4072... 0.1500 sec/batch\n", - "Epoch: 9/20... Training Step: 1626... Training loss: 1.4570... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1627... Training loss: 1.4112... 0.1486 sec/batch\n", - "Epoch: 9/20... Training Step: 1628... Training loss: 1.4006... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1629... Training loss: 1.4325... 0.1486 sec/batch\n", - "Epoch: 9/20... Training Step: 1630... Training loss: 1.3905... 0.1508 sec/batch\n", - "Epoch: 9/20... Training Step: 1631... Training loss: 1.4246... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1632... Training loss: 1.4236... 0.1504 sec/batch\n", - "Epoch: 9/20... Training Step: 1633... Training loss: 1.4129... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1634... Training loss: 1.4502... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1635... Training loss: 1.4010... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1636... Training loss: 1.4718... 0.1472 sec/batch\n", - "Epoch: 9/20... Training Step: 1637... Training loss: 1.4348... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1638... Training loss: 1.4317... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1639... Training loss: 1.4183... 0.1499 sec/batch\n", - "Epoch: 9/20... Training Step: 1640... Training loss: 1.4321... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1641... Training loss: 1.4586... 0.1520 sec/batch\n", - "Epoch: 9/20... Training Step: 1642... Training loss: 1.4132... 0.1490 sec/batch\n", - "Epoch: 9/20... Training Step: 1643... Training loss: 1.4074... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1644... Training loss: 1.4580... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1645... Training loss: 1.4323... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1646... Training loss: 1.4789... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1647... Training loss: 1.4481... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1648... Training loss: 1.4374... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1649... Training loss: 1.4193... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1650... Training loss: 1.4365... 0.1497 sec/batch\n", - "Epoch: 9/20... Training Step: 1651... Training loss: 1.4383... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1652... Training loss: 1.4119... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1653... Training loss: 1.4244... 0.1490 sec/batch\n", - "Epoch: 9/20... Training Step: 1654... Training loss: 1.4079... 0.1471 sec/batch\n", - "Epoch: 9/20... Training Step: 1655... Training loss: 1.4746... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1656... Training loss: 1.4377... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1657... Training loss: 1.4577... 0.1496 sec/batch\n", - "Epoch: 9/20... Training Step: 1658... Training loss: 1.4071... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1659... Training loss: 1.4307... 0.1504 sec/batch\n", - "Epoch: 9/20... Training Step: 1660... Training loss: 1.4403... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1661... Training loss: 1.4142... 0.1472 sec/batch\n", - "Epoch: 9/20... Training Step: 1662... Training loss: 1.4158... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1663... Training loss: 1.3876... 0.1471 sec/batch\n", - "Epoch: 9/20... Training Step: 1664... Training loss: 1.4339... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1665... Training loss: 1.3844... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1666... Training loss: 1.4191... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1667... Training loss: 1.3903... 0.1477 sec/batch\n", - "Epoch: 9/20... Training Step: 1668... Training loss: 1.4182... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1669... Training loss: 1.3964... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1670... Training loss: 1.4273... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1671... Training loss: 1.3971... 0.1472 sec/batch\n", - "Epoch: 9/20... Training Step: 1672... Training loss: 1.4114... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1673... Training loss: 1.3952... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1674... Training loss: 1.4249... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1675... Training loss: 1.4023... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1676... Training loss: 1.4086... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1677... Training loss: 1.4031... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1678... Training loss: 1.3943... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1679... Training loss: 1.3934... 0.1491 sec/batch\n", - "Epoch: 9/20... Training Step: 1680... Training loss: 1.4335... 0.1491 sec/batch\n", - "Epoch: 9/20... Training Step: 1681... Training loss: 1.4261... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1682... Training loss: 1.3898... 0.1477 sec/batch\n", - "Epoch: 9/20... Training Step: 1683... Training loss: 1.3953... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1684... Training loss: 1.3759... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1685... Training loss: 1.4200... 0.1486 sec/batch\n", - "Epoch: 9/20... Training Step: 1686... Training loss: 1.4110... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1687... Training loss: 1.4077... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1688... Training loss: 1.4008... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1689... Training loss: 1.4110... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1690... Training loss: 1.4139... 0.1473 sec/batch\n", - "Epoch: 9/20... Training Step: 1691... Training loss: 1.4091... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1692... Training loss: 1.4187... 0.1531 sec/batch\n", - "Epoch: 9/20... Training Step: 1693... Training loss: 1.4113... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1694... Training loss: 1.4218... 0.1497 sec/batch\n", - "Epoch: 9/20... Training Step: 1695... Training loss: 1.4013... 0.1501 sec/batch\n", - "Epoch: 9/20... Training Step: 1696... Training loss: 1.4103... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1697... Training loss: 1.4252... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1698... Training loss: 1.4028... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1699... Training loss: 1.3991... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1700... Training loss: 1.3699... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1701... Training loss: 1.4250... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1702... Training loss: 1.4252... 0.1487 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 9/20... Training Step: 1703... Training loss: 1.4031... 0.1511 sec/batch\n", - "Epoch: 9/20... Training Step: 1704... Training loss: 1.4012... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1705... Training loss: 1.4125... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1706... Training loss: 1.3777... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1707... Training loss: 1.3695... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1708... Training loss: 1.4179... 0.1472 sec/batch\n", - "Epoch: 9/20... Training Step: 1709... Training loss: 1.3994... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1710... Training loss: 1.3607... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1711... Training loss: 1.4256... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1712... Training loss: 1.4134... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1713... Training loss: 1.3955... 0.1493 sec/batch\n", - "Epoch: 9/20... Training Step: 1714... Training loss: 1.3746... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1715... Training loss: 1.3539... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1716... Training loss: 1.3772... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1717... Training loss: 1.4263... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1718... Training loss: 1.4322... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1719... Training loss: 1.4213... 0.1473 sec/batch\n", - "Epoch: 9/20... Training Step: 1720... Training loss: 1.4056... 0.1477 sec/batch\n", - "Epoch: 9/20... Training Step: 1721... Training loss: 1.4333... 0.1480 sec/batch\n", - "Epoch: 9/20... Training Step: 1722... Training loss: 1.4313... 0.1495 sec/batch\n", - "Epoch: 9/20... Training Step: 1723... Training loss: 1.4164... 0.1476 sec/batch\n", - "Epoch: 9/20... Training Step: 1724... Training loss: 1.4166... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1725... Training loss: 1.4717... 0.1502 sec/batch\n", - "Epoch: 9/20... Training Step: 1726... Training loss: 1.4179... 0.1490 sec/batch\n", - "Epoch: 9/20... Training Step: 1727... Training loss: 1.4053... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1728... Training loss: 1.4378... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1729... Training loss: 1.3907... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1730... Training loss: 1.4250... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1731... Training loss: 1.4232... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1732... Training loss: 1.4489... 0.1501 sec/batch\n", - "Epoch: 9/20... Training Step: 1733... Training loss: 1.4345... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1734... Training loss: 1.4014... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1735... Training loss: 1.3797... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1736... Training loss: 1.3961... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1737... Training loss: 1.4186... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1738... Training loss: 1.4082... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1739... Training loss: 1.4015... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1740... Training loss: 1.4057... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1741... Training loss: 1.4143... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1742... Training loss: 1.3965... 0.1483 sec/batch\n", - "Epoch: 9/20... Training Step: 1743... Training loss: 1.3792... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1744... Training loss: 1.4241... 0.1478 sec/batch\n", - "Epoch: 9/20... Training Step: 1745... Training loss: 1.4296... 0.1505 sec/batch\n", - "Epoch: 9/20... Training Step: 1746... Training loss: 1.4091... 0.1477 sec/batch\n", - "Epoch: 9/20... Training Step: 1747... Training loss: 1.4142... 0.1477 sec/batch\n", - "Epoch: 9/20... Training Step: 1748... Training loss: 1.4026... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1749... Training loss: 1.4086... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1750... Training loss: 1.4138... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1751... Training loss: 1.4321... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1752... Training loss: 1.4752... 0.1503 sec/batch\n", - "Epoch: 9/20... Training Step: 1753... Training loss: 1.4140... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1754... Training loss: 1.4051... 0.1479 sec/batch\n", - "Epoch: 9/20... Training Step: 1755... Training loss: 1.4020... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1756... Training loss: 1.3883... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1757... Training loss: 1.4377... 0.1490 sec/batch\n", - "Epoch: 9/20... Training Step: 1758... Training loss: 1.4136... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1759... Training loss: 1.4138... 0.1496 sec/batch\n", - "Epoch: 9/20... Training Step: 1760... Training loss: 1.3785... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1761... Training loss: 1.3918... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1762... Training loss: 1.4294... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1763... Training loss: 1.3848... 0.1474 sec/batch\n", - "Epoch: 9/20... Training Step: 1764... Training loss: 1.3679... 0.1464 sec/batch\n", - "Epoch: 9/20... Training Step: 1765... Training loss: 1.3882... 0.1482 sec/batch\n", - "Epoch: 9/20... Training Step: 1766... Training loss: 1.3868... 0.1499 sec/batch\n", - "Epoch: 9/20... Training Step: 1767... Training loss: 1.4079... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1768... Training loss: 1.3904... 0.1498 sec/batch\n", - "Epoch: 9/20... Training Step: 1769... Training loss: 1.3917... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1770... Training loss: 1.3828... 0.1485 sec/batch\n", - "Epoch: 9/20... Training Step: 1771... Training loss: 1.4304... 0.1487 sec/batch\n", - "Epoch: 9/20... Training Step: 1772... Training loss: 1.3919... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1773... Training loss: 1.3972... 0.1466 sec/batch\n", - "Epoch: 9/20... Training Step: 1774... Training loss: 1.3870... 0.1484 sec/batch\n", - "Epoch: 9/20... Training Step: 1775... Training loss: 1.3804... 0.1497 sec/batch\n", - "Epoch: 9/20... Training Step: 1776... Training loss: 1.3859... 0.1475 sec/batch\n", - "Epoch: 9/20... Training Step: 1777... Training loss: 1.3977... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1778... Training loss: 1.3695... 0.1488 sec/batch\n", - "Epoch: 9/20... Training Step: 1779... Training loss: 1.3609... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1780... Training loss: 1.4038... 0.1489 sec/batch\n", - "Epoch: 9/20... Training Step: 1781... Training loss: 1.3991... 0.1481 sec/batch\n", - "Epoch: 9/20... Training Step: 1782... Training loss: 1.3841... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1783... Training loss: 1.5497... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1784... Training loss: 1.4488... 0.1499 sec/batch\n", - "Epoch: 10/20... Training Step: 1785... Training loss: 1.4285... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1786... Training loss: 1.4219... 0.1500 sec/batch\n", - "Epoch: 10/20... Training Step: 1787... Training loss: 1.3915... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1788... Training loss: 1.3795... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1789... Training loss: 1.4252... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1790... Training loss: 1.3952... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1791... Training loss: 1.4120... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1792... Training loss: 1.4069... 0.1485 sec/batch\n", - "Epoch: 10/20... Training Step: 1793... Training loss: 1.3868... 0.1502 sec/batch\n", - "Epoch: 10/20... Training Step: 1794... Training loss: 1.3997... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1795... Training loss: 1.4009... 0.1503 sec/batch\n", - "Epoch: 10/20... Training Step: 1796... Training loss: 1.4268... 0.1506 sec/batch\n", - "Epoch: 10/20... Training Step: 1797... Training loss: 1.3976... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1798... Training loss: 1.3845... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1799... Training loss: 1.4198... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1800... Training loss: 1.4345... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1801... Training loss: 1.4109... 0.1524 sec/batch\n", - "Epoch: 10/20... Training Step: 1802... Training loss: 1.4296... 0.1527 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 10/20... Training Step: 1803... Training loss: 1.3944... 0.1502 sec/batch\n", - "Epoch: 10/20... Training Step: 1804... Training loss: 1.4174... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1805... Training loss: 1.3929... 0.1513 sec/batch\n", - "Epoch: 10/20... Training Step: 1806... Training loss: 1.4130... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1807... Training loss: 1.4055... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1808... Training loss: 1.3635... 0.1492 sec/batch\n", - "Epoch: 10/20... Training Step: 1809... Training loss: 1.3728... 0.1492 sec/batch\n", - "Epoch: 10/20... Training Step: 1810... Training loss: 1.4178... 0.1474 sec/batch\n", - "Epoch: 10/20... Training Step: 1811... Training loss: 1.4255... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1812... Training loss: 1.4180... 0.1489 sec/batch\n", - "Epoch: 10/20... Training Step: 1813... Training loss: 1.3897... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1814... Training loss: 1.3694... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1815... Training loss: 1.4076... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1816... Training loss: 1.4073... 0.1486 sec/batch\n", - "Epoch: 10/20... Training Step: 1817... Training loss: 1.3842... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1818... Training loss: 1.3963... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1819... Training loss: 1.3774... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1820... Training loss: 1.3601... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1821... Training loss: 1.3419... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1822... Training loss: 1.3734... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1823... Training loss: 1.3786... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1824... Training loss: 1.4339... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1825... Training loss: 1.3799... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1826... Training loss: 1.3671... 0.1468 sec/batch\n", - "Epoch: 10/20... Training Step: 1827... Training loss: 1.4033... 0.1462 sec/batch\n", - "Epoch: 10/20... Training Step: 1828... Training loss: 1.3648... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1829... Training loss: 1.3868... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1830... Training loss: 1.3925... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1831... Training loss: 1.3905... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1832... Training loss: 1.4130... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1833... Training loss: 1.3642... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1834... Training loss: 1.4290... 0.1485 sec/batch\n", - "Epoch: 10/20... Training Step: 1835... Training loss: 1.3827... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1836... Training loss: 1.3989... 0.1485 sec/batch\n", - "Epoch: 10/20... Training Step: 1837... Training loss: 1.3774... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1838... Training loss: 1.3928... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1839... Training loss: 1.4162... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1840... Training loss: 1.3779... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1841... Training loss: 1.3730... 0.1474 sec/batch\n", - "Epoch: 10/20... Training Step: 1842... Training loss: 1.4296... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1843... Training loss: 1.4058... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1844... Training loss: 1.4374... 0.1507 sec/batch\n", - "Epoch: 10/20... Training Step: 1845... Training loss: 1.4218... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1846... Training loss: 1.4012... 0.1473 sec/batch\n", - "Epoch: 10/20... Training Step: 1847... Training loss: 1.3836... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1848... Training loss: 1.4050... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1849... Training loss: 1.3970... 0.1473 sec/batch\n", - "Epoch: 10/20... Training Step: 1850... Training loss: 1.3729... 0.1468 sec/batch\n", - "Epoch: 10/20... Training Step: 1851... Training loss: 1.3899... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1852... Training loss: 1.3751... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1853... Training loss: 1.4388... 0.1496 sec/batch\n", - "Epoch: 10/20... Training Step: 1854... Training loss: 1.4133... 0.1496 sec/batch\n", - "Epoch: 10/20... Training Step: 1855... Training loss: 1.4292... 0.1474 sec/batch\n", - "Epoch: 10/20... Training Step: 1856... Training loss: 1.3764... 0.1490 sec/batch\n", - "Epoch: 10/20... Training Step: 1857... Training loss: 1.3909... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1858... Training loss: 1.4164... 0.1473 sec/batch\n", - "Epoch: 10/20... Training Step: 1859... Training loss: 1.3830... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1860... Training loss: 1.3702... 0.1467 sec/batch\n", - "Epoch: 10/20... Training Step: 1861... Training loss: 1.3512... 0.1468 sec/batch\n", - "Epoch: 10/20... Training Step: 1862... Training loss: 1.3993... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1863... Training loss: 1.3440... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1864... Training loss: 1.3977... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1865... Training loss: 1.3568... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1866... Training loss: 1.3824... 0.1471 sec/batch\n", - "Epoch: 10/20... Training Step: 1867... Training loss: 1.3644... 0.1509 sec/batch\n", - "Epoch: 10/20... Training Step: 1868... Training loss: 1.3863... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1869... Training loss: 1.3635... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1870... Training loss: 1.3648... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1871... Training loss: 1.3516... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1872... Training loss: 1.3994... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1873... Training loss: 1.3732... 0.1474 sec/batch\n", - "Epoch: 10/20... Training Step: 1874... Training loss: 1.3711... 0.1489 sec/batch\n", - "Epoch: 10/20... Training Step: 1875... Training loss: 1.3530... 0.1471 sec/batch\n", - "Epoch: 10/20... Training Step: 1876... Training loss: 1.3641... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1877... Training loss: 1.3751... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1878... Training loss: 1.3883... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1879... Training loss: 1.3862... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1880... Training loss: 1.3526... 0.1467 sec/batch\n", - "Epoch: 10/20... Training Step: 1881... Training loss: 1.3534... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1882... Training loss: 1.3526... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1883... Training loss: 1.3828... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1884... Training loss: 1.3683... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1885... Training loss: 1.3832... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1886... Training loss: 1.3796... 0.1466 sec/batch\n", - "Epoch: 10/20... Training Step: 1887... Training loss: 1.3787... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1888... Training loss: 1.3796... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1889... Training loss: 1.3824... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1890... Training loss: 1.3802... 0.1494 sec/batch\n", - "Epoch: 10/20... Training Step: 1891... Training loss: 1.3646... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1892... Training loss: 1.3964... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1893... Training loss: 1.3667... 0.1473 sec/batch\n", - "Epoch: 10/20... Training Step: 1894... Training loss: 1.3792... 0.1491 sec/batch\n", - "Epoch: 10/20... Training Step: 1895... Training loss: 1.3937... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1896... Training loss: 1.3648... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1897... Training loss: 1.3502... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1898... Training loss: 1.3453... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1899... Training loss: 1.3728... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1900... Training loss: 1.3845... 0.1471 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 10/20... Training Step: 1901... Training loss: 1.3700... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1902... Training loss: 1.3781... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1903... Training loss: 1.3792... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1904... Training loss: 1.3491... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1905... Training loss: 1.3428... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1906... Training loss: 1.3857... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1907... Training loss: 1.3753... 0.1516 sec/batch\n", - "Epoch: 10/20... Training Step: 1908... Training loss: 1.3374... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1909... Training loss: 1.3931... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1910... Training loss: 1.3878... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1911... Training loss: 1.3640... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1912... Training loss: 1.3451... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1913... Training loss: 1.3251... 0.1468 sec/batch\n", - "Epoch: 10/20... Training Step: 1914... Training loss: 1.3578... 0.1471 sec/batch\n", - "Epoch: 10/20... Training Step: 1915... Training loss: 1.3925... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1916... Training loss: 1.3876... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1917... Training loss: 1.4031... 0.1470 sec/batch\n", - "Epoch: 10/20... Training Step: 1918... Training loss: 1.3721... 0.1478 sec/batch\n", - "Epoch: 10/20... Training Step: 1919... Training loss: 1.4144... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1920... Training loss: 1.3886... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1921... Training loss: 1.3916... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1922... Training loss: 1.3904... 0.1470 sec/batch\n", - "Epoch: 10/20... Training Step: 1923... Training loss: 1.4424... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1924... Training loss: 1.3919... 0.1500 sec/batch\n", - "Epoch: 10/20... Training Step: 1925... Training loss: 1.3779... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1926... Training loss: 1.4128... 0.1489 sec/batch\n", - "Epoch: 10/20... Training Step: 1927... Training loss: 1.3632... 0.1486 sec/batch\n", - "Epoch: 10/20... Training Step: 1928... Training loss: 1.3990... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1929... Training loss: 1.3900... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1930... Training loss: 1.4121... 0.1470 sec/batch\n", - "Epoch: 10/20... Training Step: 1931... Training loss: 1.4086... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1932... Training loss: 1.3637... 0.1485 sec/batch\n", - "Epoch: 10/20... Training Step: 1933... Training loss: 1.3442... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1934... Training loss: 1.3599... 0.1490 sec/batch\n", - "Epoch: 10/20... Training Step: 1935... Training loss: 1.3949... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1936... Training loss: 1.3820... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1937... Training loss: 1.3690... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1938... Training loss: 1.3770... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1939... Training loss: 1.3907... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1940... Training loss: 1.3756... 0.1468 sec/batch\n", - "Epoch: 10/20... Training Step: 1941... Training loss: 1.3441... 0.1482 sec/batch\n", - "Epoch: 10/20... Training Step: 1942... Training loss: 1.4001... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1943... Training loss: 1.4002... 0.1493 sec/batch\n", - "Epoch: 10/20... Training Step: 1944... Training loss: 1.3814... 0.1506 sec/batch\n", - "Epoch: 10/20... Training Step: 1945... Training loss: 1.3746... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1946... Training loss: 1.3716... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1947... Training loss: 1.3776... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1948... Training loss: 1.3738... 0.1477 sec/batch\n", - "Epoch: 10/20... Training Step: 1949... Training loss: 1.4020... 0.1490 sec/batch\n", - "Epoch: 10/20... Training Step: 1950... Training loss: 1.4389... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1951... Training loss: 1.3780... 0.1490 sec/batch\n", - "Epoch: 10/20... Training Step: 1952... Training loss: 1.3678... 0.1501 sec/batch\n", - "Epoch: 10/20... Training Step: 1953... Training loss: 1.3730... 0.1475 sec/batch\n", - "Epoch: 10/20... Training Step: 1954... Training loss: 1.3574... 0.1471 sec/batch\n", - "Epoch: 10/20... Training Step: 1955... Training loss: 1.4040... 0.1470 sec/batch\n", - "Epoch: 10/20... Training Step: 1956... Training loss: 1.3780... 0.1489 sec/batch\n", - "Epoch: 10/20... Training Step: 1957... Training loss: 1.3994... 0.1495 sec/batch\n", - "Epoch: 10/20... Training Step: 1958... Training loss: 1.3566... 0.1501 sec/batch\n", - "Epoch: 10/20... Training Step: 1959... Training loss: 1.3611... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1960... Training loss: 1.3984... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1961... Training loss: 1.3519... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1962... Training loss: 1.3450... 0.1506 sec/batch\n", - "Epoch: 10/20... Training Step: 1963... Training loss: 1.3515... 0.1472 sec/batch\n", - "Epoch: 10/20... Training Step: 1964... Training loss: 1.3728... 0.1479 sec/batch\n", - "Epoch: 10/20... Training Step: 1965... Training loss: 1.3673... 0.1489 sec/batch\n", - "Epoch: 10/20... Training Step: 1966... Training loss: 1.3674... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1967... Training loss: 1.3667... 0.1469 sec/batch\n", - "Epoch: 10/20... Training Step: 1968... Training loss: 1.3562... 0.1493 sec/batch\n", - "Epoch: 10/20... Training Step: 1969... Training loss: 1.4006... 0.1483 sec/batch\n", - "Epoch: 10/20... Training Step: 1970... Training loss: 1.3580... 0.1484 sec/batch\n", - "Epoch: 10/20... Training Step: 1971... Training loss: 1.3610... 0.1486 sec/batch\n", - "Epoch: 10/20... Training Step: 1972... Training loss: 1.3786... 0.1491 sec/batch\n", - "Epoch: 10/20... Training Step: 1973... Training loss: 1.3470... 0.1487 sec/batch\n", - "Epoch: 10/20... Training Step: 1974... Training loss: 1.3518... 0.1486 sec/batch\n", - "Epoch: 10/20... Training Step: 1975... Training loss: 1.3731... 0.1476 sec/batch\n", - "Epoch: 10/20... Training Step: 1976... Training loss: 1.3566... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1977... Training loss: 1.3382... 0.1488 sec/batch\n", - "Epoch: 10/20... Training Step: 1978... Training loss: 1.3818... 0.1480 sec/batch\n", - "Epoch: 10/20... Training Step: 1979... Training loss: 1.3623... 0.1481 sec/batch\n", - "Epoch: 10/20... Training Step: 1980... Training loss: 1.3558... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 1981... Training loss: 1.5137... 0.1507 sec/batch\n", - "Epoch: 11/20... Training Step: 1982... Training loss: 1.3925... 0.1488 sec/batch\n", - "Epoch: 11/20... Training Step: 1983... Training loss: 1.3885... 0.1487 sec/batch\n", - "Epoch: 11/20... Training Step: 1984... Training loss: 1.3924... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 1985... Training loss: 1.3510... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 1986... Training loss: 1.3331... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 1987... Training loss: 1.3796... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 1988... Training loss: 1.3545... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 1989... Training loss: 1.3744... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 1990... Training loss: 1.3716... 0.1508 sec/batch\n", - "Epoch: 11/20... Training Step: 1991... Training loss: 1.3607... 0.1491 sec/batch\n", - "Epoch: 11/20... Training Step: 1992... Training loss: 1.3585... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 1993... Training loss: 1.3773... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 1994... Training loss: 1.3829... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 1995... Training loss: 1.3648... 0.1474 sec/batch\n", - "Epoch: 11/20... Training Step: 1996... Training loss: 1.3405... 0.1466 sec/batch\n", - "Epoch: 11/20... Training Step: 1997... Training loss: 1.3817... 0.1502 sec/batch\n", - "Epoch: 11/20... Training Step: 1998... Training loss: 1.3982... 0.1489 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 11/20... Training Step: 1999... Training loss: 1.3831... 0.1488 sec/batch\n", - "Epoch: 11/20... Training Step: 2000... Training loss: 1.3995... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2001... Training loss: 1.3572... 0.1544 sec/batch\n", - "Epoch: 11/20... Training Step: 2002... Training loss: 1.3826... 0.1515 sec/batch\n", - "Epoch: 11/20... Training Step: 2003... Training loss: 1.3633... 0.1507 sec/batch\n", - "Epoch: 11/20... Training Step: 2004... Training loss: 1.3868... 0.1532 sec/batch\n", - "Epoch: 11/20... Training Step: 2005... Training loss: 1.3715... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 2006... Training loss: 1.3298... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2007... Training loss: 1.3356... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2008... Training loss: 1.3804... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2009... Training loss: 1.3832... 0.1488 sec/batch\n", - "Epoch: 11/20... Training Step: 2010... Training loss: 1.3925... 0.1474 sec/batch\n", - "Epoch: 11/20... Training Step: 2011... Training loss: 1.3624... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2012... Training loss: 1.3416... 0.1461 sec/batch\n", - "Epoch: 11/20... Training Step: 2013... Training loss: 1.3714... 0.1471 sec/batch\n", - "Epoch: 11/20... Training Step: 2014... Training loss: 1.3703... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2015... Training loss: 1.3516... 0.1474 sec/batch\n", - "Epoch: 11/20... Training Step: 2016... Training loss: 1.3765... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2017... Training loss: 1.3416... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2018... Training loss: 1.3260... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2019... Training loss: 1.3168... 0.1494 sec/batch\n", - "Epoch: 11/20... Training Step: 2020... Training loss: 1.3494... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2021... Training loss: 1.3448... 0.1498 sec/batch\n", - "Epoch: 11/20... Training Step: 2022... Training loss: 1.4003... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2023... Training loss: 1.3493... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2024... Training loss: 1.3357... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2025... Training loss: 1.3768... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2026... Training loss: 1.3297... 0.1506 sec/batch\n", - "Epoch: 11/20... Training Step: 2027... Training loss: 1.3511... 0.1460 sec/batch\n", - "Epoch: 11/20... Training Step: 2028... Training loss: 1.3595... 0.1469 sec/batch\n", - "Epoch: 11/20... Training Step: 2029... Training loss: 1.3550... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2030... Training loss: 1.3786... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2031... Training loss: 1.3319... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2032... Training loss: 1.4072... 0.1486 sec/batch\n", - "Epoch: 11/20... Training Step: 2033... Training loss: 1.3618... 0.1469 sec/batch\n", - "Epoch: 11/20... Training Step: 2034... Training loss: 1.3757... 0.1467 sec/batch\n", - "Epoch: 11/20... Training Step: 2035... Training loss: 1.3526... 0.1465 sec/batch\n", - "Epoch: 11/20... Training Step: 2036... Training loss: 1.3658... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2037... Training loss: 1.3769... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 2038... Training loss: 1.3524... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2039... Training loss: 1.3324... 0.1471 sec/batch\n", - "Epoch: 11/20... Training Step: 2040... Training loss: 1.4001... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2041... Training loss: 1.3755... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2042... Training loss: 1.4063... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2043... Training loss: 1.3886... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2044... Training loss: 1.3715... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2045... Training loss: 1.3582... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2046... Training loss: 1.3660... 0.1534 sec/batch\n", - "Epoch: 11/20... Training Step: 2047... Training loss: 1.3801... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2048... Training loss: 1.3457... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2049... Training loss: 1.3741... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2050... Training loss: 1.3468... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2051... Training loss: 1.4128... 0.1469 sec/batch\n", - "Epoch: 11/20... Training Step: 2052... Training loss: 1.3804... 0.1467 sec/batch\n", - "Epoch: 11/20... Training Step: 2053... Training loss: 1.3962... 0.1503 sec/batch\n", - "Epoch: 11/20... Training Step: 2054... Training loss: 1.3520... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 2055... Training loss: 1.3670... 0.1494 sec/batch\n", - "Epoch: 11/20... Training Step: 2056... Training loss: 1.3822... 0.1475 sec/batch\n", - "Epoch: 11/20... Training Step: 2057... Training loss: 1.3530... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2058... Training loss: 1.3457... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 2059... Training loss: 1.3144... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2060... Training loss: 1.3714... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2061... Training loss: 1.3255... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 2062... Training loss: 1.3568... 0.1488 sec/batch\n", - "Epoch: 11/20... Training Step: 2063... Training loss: 1.3271... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2064... Training loss: 1.3542... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 2065... Training loss: 1.3333... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2066... Training loss: 1.3576... 0.1474 sec/batch\n", - "Epoch: 11/20... Training Step: 2067... Training loss: 1.3307... 0.1493 sec/batch\n", - "Epoch: 11/20... Training Step: 2068... Training loss: 1.3388... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2069... Training loss: 1.3305... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2070... Training loss: 1.3732... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2071... Training loss: 1.3358... 0.1486 sec/batch\n", - "Epoch: 11/20... Training Step: 2072... Training loss: 1.3490... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2073... Training loss: 1.3335... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2074... Training loss: 1.3293... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2075... Training loss: 1.3372... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2076... Training loss: 1.3556... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2077... Training loss: 1.3713... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2078... Training loss: 1.3235... 0.1475 sec/batch\n", - "Epoch: 11/20... Training Step: 2079... Training loss: 1.3288... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2080... Training loss: 1.3283... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2081... Training loss: 1.3621... 0.1482 sec/batch\n", - "Epoch: 11/20... Training Step: 2082... Training loss: 1.3471... 0.1488 sec/batch\n", - "Epoch: 11/20... Training Step: 2083... Training loss: 1.3471... 0.1491 sec/batch\n", - "Epoch: 11/20... Training Step: 2084... Training loss: 1.3624... 0.1469 sec/batch\n", - "Epoch: 11/20... Training Step: 2085... Training loss: 1.3541... 0.1464 sec/batch\n", - "Epoch: 11/20... Training Step: 2086... Training loss: 1.3549... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2087... Training loss: 1.3596... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2088... Training loss: 1.3578... 0.1484 sec/batch\n", - "Epoch: 11/20... Training Step: 2089... Training loss: 1.3373... 0.1487 sec/batch\n", - "Epoch: 11/20... Training Step: 2090... Training loss: 1.3690... 0.1491 sec/batch\n", - "Epoch: 11/20... Training Step: 2091... Training loss: 1.3430... 0.1502 sec/batch\n", - "Epoch: 11/20... Training Step: 2092... Training loss: 1.3557... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 2093... Training loss: 1.3586... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2094... Training loss: 1.3376... 0.1484 sec/batch\n", - "Epoch: 11/20... Training Step: 2095... Training loss: 1.3302... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2096... Training loss: 1.3102... 0.1472 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 11/20... Training Step: 2097... Training loss: 1.3497... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2098... Training loss: 1.3562... 0.1508 sec/batch\n", - "Epoch: 11/20... Training Step: 2099... Training loss: 1.3467... 0.1487 sec/batch\n", - "Epoch: 11/20... Training Step: 2100... Training loss: 1.3508... 0.1501 sec/batch\n", - "Epoch: 11/20... Training Step: 2101... Training loss: 1.3562... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2102... Training loss: 1.3102... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2103... Training loss: 1.3149... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 2104... Training loss: 1.3524... 0.1491 sec/batch\n", - "Epoch: 11/20... Training Step: 2105... Training loss: 1.3405... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2106... Training loss: 1.2997... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2107... Training loss: 1.3660... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2108... Training loss: 1.3551... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 2109... Training loss: 1.3384... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2110... Training loss: 1.3177... 0.1492 sec/batch\n", - "Epoch: 11/20... Training Step: 2111... Training loss: 1.3058... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 2112... Training loss: 1.3287... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2113... Training loss: 1.3739... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 2114... Training loss: 1.3601... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2115... Training loss: 1.3614... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2116... Training loss: 1.3548... 0.1475 sec/batch\n", - "Epoch: 11/20... Training Step: 2117... Training loss: 1.3779... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2118... Training loss: 1.3582... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2119... Training loss: 1.3562... 0.1505 sec/batch\n", - "Epoch: 11/20... Training Step: 2120... Training loss: 1.3524... 0.1484 sec/batch\n", - "Epoch: 11/20... Training Step: 2121... Training loss: 1.4070... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2122... Training loss: 1.3647... 0.1493 sec/batch\n", - "Epoch: 11/20... Training Step: 2123... Training loss: 1.3398... 0.1467 sec/batch\n", - "Epoch: 11/20... Training Step: 2124... Training loss: 1.3868... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2125... Training loss: 1.3294... 0.1472 sec/batch\n", - "Epoch: 11/20... Training Step: 2126... Training loss: 1.3782... 0.1494 sec/batch\n", - "Epoch: 11/20... Training Step: 2127... Training loss: 1.3655... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2128... Training loss: 1.3921... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 2129... Training loss: 1.3798... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2130... Training loss: 1.3439... 0.1475 sec/batch\n", - "Epoch: 11/20... Training Step: 2131... Training loss: 1.3162... 0.1486 sec/batch\n", - "Epoch: 11/20... Training Step: 2132... Training loss: 1.3187... 0.1475 sec/batch\n", - "Epoch: 11/20... Training Step: 2133... Training loss: 1.3625... 0.1498 sec/batch\n", - "Epoch: 11/20... Training Step: 2134... Training loss: 1.3440... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2135... Training loss: 1.3335... 0.1482 sec/batch\n", - "Epoch: 11/20... Training Step: 2136... Training loss: 1.3517... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2137... Training loss: 1.3683... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2138... Training loss: 1.3504... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2139... Training loss: 1.3138... 0.1480 sec/batch\n", - "Epoch: 11/20... Training Step: 2140... Training loss: 1.3742... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2141... Training loss: 1.3849... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2142... Training loss: 1.3426... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2143... Training loss: 1.3520... 0.1471 sec/batch\n", - "Epoch: 11/20... Training Step: 2144... Training loss: 1.3405... 0.1467 sec/batch\n", - "Epoch: 11/20... Training Step: 2145... Training loss: 1.3628... 0.1482 sec/batch\n", - "Epoch: 11/20... Training Step: 2146... Training loss: 1.3498... 0.1490 sec/batch\n", - "Epoch: 11/20... Training Step: 2147... Training loss: 1.3663... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2148... Training loss: 1.4183... 0.1482 sec/batch\n", - "Epoch: 11/20... Training Step: 2149... Training loss: 1.3579... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2150... Training loss: 1.3491... 0.1471 sec/batch\n", - "Epoch: 11/20... Training Step: 2151... Training loss: 1.3413... 0.1482 sec/batch\n", - "Epoch: 11/20... Training Step: 2152... Training loss: 1.3335... 0.1485 sec/batch\n", - "Epoch: 11/20... Training Step: 2153... Training loss: 1.3712... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2154... Training loss: 1.3459... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2155... Training loss: 1.3649... 0.1500 sec/batch\n", - "Epoch: 11/20... Training Step: 2156... Training loss: 1.3186... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2157... Training loss: 1.3278... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2158... Training loss: 1.3730... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2159... Training loss: 1.3282... 0.1473 sec/batch\n", - "Epoch: 11/20... Training Step: 2160... Training loss: 1.3184... 0.1470 sec/batch\n", - "Epoch: 11/20... Training Step: 2161... Training loss: 1.3264... 0.1481 sec/batch\n", - "Epoch: 11/20... Training Step: 2162... Training loss: 1.3417... 0.1479 sec/batch\n", - "Epoch: 11/20... Training Step: 2163... Training loss: 1.3536... 0.1489 sec/batch\n", - "Epoch: 11/20... Training Step: 2164... Training loss: 1.3307... 0.1495 sec/batch\n", - "Epoch: 11/20... Training Step: 2165... Training loss: 1.3381... 0.1477 sec/batch\n", - "Epoch: 11/20... Training Step: 2166... Training loss: 1.3261... 0.1487 sec/batch\n", - "Epoch: 11/20... Training Step: 2167... Training loss: 1.3768... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2168... Training loss: 1.3395... 0.1471 sec/batch\n", - "Epoch: 11/20... Training Step: 2169... Training loss: 1.3449... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2170... Training loss: 1.3475... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2171... Training loss: 1.3214... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2172... Training loss: 1.3285... 0.1493 sec/batch\n", - "Epoch: 11/20... Training Step: 2173... Training loss: 1.3482... 0.1483 sec/batch\n", - "Epoch: 11/20... Training Step: 2174... Training loss: 1.3282... 0.1511 sec/batch\n", - "Epoch: 11/20... Training Step: 2175... Training loss: 1.3105... 0.1478 sec/batch\n", - "Epoch: 11/20... Training Step: 2176... Training loss: 1.3521... 0.1476 sec/batch\n", - "Epoch: 11/20... Training Step: 2177... Training loss: 1.3445... 0.1468 sec/batch\n", - "Epoch: 11/20... Training Step: 2178... Training loss: 1.3204... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2179... Training loss: 1.4756... 0.1466 sec/batch\n", - "Epoch: 12/20... Training Step: 2180... Training loss: 1.3727... 0.1489 sec/batch\n", - "Epoch: 12/20... Training Step: 2181... Training loss: 1.3658... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2182... Training loss: 1.3756... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2183... Training loss: 1.3249... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2184... Training loss: 1.3180... 0.1502 sec/batch\n", - "Epoch: 12/20... Training Step: 2185... Training loss: 1.3601... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2186... Training loss: 1.3400... 0.1472 sec/batch\n", - "Epoch: 12/20... Training Step: 2187... Training loss: 1.3605... 0.1483 sec/batch\n", - "Epoch: 12/20... Training Step: 2188... Training loss: 1.3398... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2189... Training loss: 1.3308... 0.1507 sec/batch\n", - "Epoch: 12/20... Training Step: 2190... Training loss: 1.3518... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2191... Training loss: 1.3577... 0.1486 sec/batch\n", - "Epoch: 12/20... Training Step: 2192... Training loss: 1.3663... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2193... Training loss: 1.3306... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2194... Training loss: 1.3252... 0.1471 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 12/20... Training Step: 2195... Training loss: 1.3605... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2196... Training loss: 1.3758... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2197... Training loss: 1.3522... 0.1466 sec/batch\n", - "Epoch: 12/20... Training Step: 2198... Training loss: 1.3659... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2199... Training loss: 1.3411... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2200... Training loss: 1.3617... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2201... Training loss: 1.3426... 0.1519 sec/batch\n", - "Epoch: 12/20... Training Step: 2202... Training loss: 1.3594... 0.1531 sec/batch\n", - "Epoch: 12/20... Training Step: 2203... Training loss: 1.3515... 0.1502 sec/batch\n", - "Epoch: 12/20... Training Step: 2204... Training loss: 1.3065... 0.1501 sec/batch\n", - "Epoch: 12/20... Training Step: 2205... Training loss: 1.3095... 0.1492 sec/batch\n", - "Epoch: 12/20... Training Step: 2206... Training loss: 1.3698... 0.1501 sec/batch\n", - "Epoch: 12/20... Training Step: 2207... Training loss: 1.3599... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2208... Training loss: 1.3650... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2209... Training loss: 1.3397... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2210... Training loss: 1.3196... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2211... Training loss: 1.3529... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2212... Training loss: 1.3498... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2213... Training loss: 1.3398... 0.1505 sec/batch\n", - "Epoch: 12/20... Training Step: 2214... Training loss: 1.3469... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2215... Training loss: 1.3261... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2216... Training loss: 1.2937... 0.1491 sec/batch\n", - "Epoch: 12/20... Training Step: 2217... Training loss: 1.2835... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2218... Training loss: 1.3274... 0.1498 sec/batch\n", - "Epoch: 12/20... Training Step: 2219... Training loss: 1.3170... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2220... Training loss: 1.3793... 0.1488 sec/batch\n", - "Epoch: 12/20... Training Step: 2221... Training loss: 1.3310... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2222... Training loss: 1.3181... 0.1471 sec/batch\n", - "Epoch: 12/20... Training Step: 2223... Training loss: 1.3451... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2224... Training loss: 1.3102... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2225... Training loss: 1.3358... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2226... Training loss: 1.3332... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2227... Training loss: 1.3293... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2228... Training loss: 1.3538... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2229... Training loss: 1.3126... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2230... Training loss: 1.3869... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2231... Training loss: 1.3407... 0.1489 sec/batch\n", - "Epoch: 12/20... Training Step: 2232... Training loss: 1.3508... 0.1491 sec/batch\n", - "Epoch: 12/20... Training Step: 2233... Training loss: 1.3313... 0.1511 sec/batch\n", - "Epoch: 12/20... Training Step: 2234... Training loss: 1.3435... 0.1512 sec/batch\n", - "Epoch: 12/20... Training Step: 2235... Training loss: 1.3527... 0.1467 sec/batch\n", - "Epoch: 12/20... Training Step: 2236... Training loss: 1.3216... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2237... Training loss: 1.3081... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2238... Training loss: 1.3696... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2239... Training loss: 1.3547... 0.1470 sec/batch\n", - "Epoch: 12/20... Training Step: 2240... Training loss: 1.3907... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2241... Training loss: 1.3651... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2242... Training loss: 1.3511... 0.1490 sec/batch\n", - "Epoch: 12/20... Training Step: 2243... Training loss: 1.3351... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2244... Training loss: 1.3453... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2245... Training loss: 1.3433... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2246... Training loss: 1.3182... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2247... Training loss: 1.3468... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2248... Training loss: 1.3286... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2249... Training loss: 1.3827... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2250... Training loss: 1.3562... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2251... Training loss: 1.3740... 0.1497 sec/batch\n", - "Epoch: 12/20... Training Step: 2252... Training loss: 1.3329... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2253... Training loss: 1.3323... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2254... Training loss: 1.3538... 0.1490 sec/batch\n", - "Epoch: 12/20... Training Step: 2255... Training loss: 1.3398... 0.1489 sec/batch\n", - "Epoch: 12/20... Training Step: 2256... Training loss: 1.3286... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2257... Training loss: 1.3032... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2258... Training loss: 1.3457... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2259... Training loss: 1.2986... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2260... Training loss: 1.3276... 0.1486 sec/batch\n", - "Epoch: 12/20... Training Step: 2261... Training loss: 1.3034... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2262... Training loss: 1.3392... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2263... Training loss: 1.3193... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2264... Training loss: 1.3243... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2265... Training loss: 1.3147... 0.1500 sec/batch\n", - "Epoch: 12/20... Training Step: 2266... Training loss: 1.3214... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2267... Training loss: 1.3074... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2268... Training loss: 1.3446... 0.1472 sec/batch\n", - "Epoch: 12/20... Training Step: 2269... Training loss: 1.3059... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2270... Training loss: 1.3286... 0.1489 sec/batch\n", - "Epoch: 12/20... Training Step: 2271... Training loss: 1.2940... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2272... Training loss: 1.3023... 0.1500 sec/batch\n", - "Epoch: 12/20... Training Step: 2273... Training loss: 1.3105... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2274... Training loss: 1.3506... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2275... Training loss: 1.3319... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2276... Training loss: 1.2976... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2277... Training loss: 1.3155... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2278... Training loss: 1.2991... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2279... Training loss: 1.3407... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2280... Training loss: 1.3237... 0.1472 sec/batch\n", - "Epoch: 12/20... Training Step: 2281... Training loss: 1.3268... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2282... Training loss: 1.3147... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2283... Training loss: 1.3247... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2284... Training loss: 1.3295... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2285... Training loss: 1.3371... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2286... Training loss: 1.3356... 0.1491 sec/batch\n", - "Epoch: 12/20... Training Step: 2287... Training loss: 1.3190... 0.1470 sec/batch\n", - "Epoch: 12/20... Training Step: 2288... Training loss: 1.3455... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2289... Training loss: 1.3077... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2290... Training loss: 1.3333... 0.1494 sec/batch\n", - "Epoch: 12/20... Training Step: 2291... Training loss: 1.3300... 0.1492 sec/batch\n", - "Epoch: 12/20... Training Step: 2292... Training loss: 1.3202... 0.1474 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 12/20... Training Step: 2293... Training loss: 1.3000... 0.1467 sec/batch\n", - "Epoch: 12/20... Training Step: 2294... Training loss: 1.2860... 0.1497 sec/batch\n", - "Epoch: 12/20... Training Step: 2295... Training loss: 1.3298... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2296... Training loss: 1.3370... 0.1507 sec/batch\n", - "Epoch: 12/20... Training Step: 2297... Training loss: 1.3246... 0.1501 sec/batch\n", - "Epoch: 12/20... Training Step: 2298... Training loss: 1.3330... 0.1486 sec/batch\n", - "Epoch: 12/20... Training Step: 2299... Training loss: 1.3312... 0.1492 sec/batch\n", - "Epoch: 12/20... Training Step: 2300... Training loss: 1.2986... 0.1493 sec/batch\n", - "Epoch: 12/20... Training Step: 2301... Training loss: 1.2864... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2302... Training loss: 1.3340... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2303... Training loss: 1.3253... 0.1492 sec/batch\n", - "Epoch: 12/20... Training Step: 2304... Training loss: 1.2847... 0.1495 sec/batch\n", - "Epoch: 12/20... Training Step: 2305... Training loss: 1.3379... 0.1490 sec/batch\n", - "Epoch: 12/20... Training Step: 2306... Training loss: 1.3414... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2307... Training loss: 1.3170... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2308... Training loss: 1.2916... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2309... Training loss: 1.2772... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2310... Training loss: 1.3030... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2311... Training loss: 1.3569... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2312... Training loss: 1.3437... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2313... Training loss: 1.3384... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2314... Training loss: 1.3210... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2315... Training loss: 1.3636... 0.1483 sec/batch\n", - "Epoch: 12/20... Training Step: 2316... Training loss: 1.3458... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2317... Training loss: 1.3404... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2318... Training loss: 1.3323... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2319... Training loss: 1.3820... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2320... Training loss: 1.3467... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2321... Training loss: 1.3141... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2322... Training loss: 1.3639... 0.1482 sec/batch\n", - "Epoch: 12/20... Training Step: 2323... Training loss: 1.3125... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2324... Training loss: 1.3558... 0.1483 sec/batch\n", - "Epoch: 12/20... Training Step: 2325... Training loss: 1.3404... 0.1465 sec/batch\n", - "Epoch: 12/20... Training Step: 2326... Training loss: 1.3620... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2327... Training loss: 1.3562... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2328... Training loss: 1.3288... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2329... Training loss: 1.2917... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2330... Training loss: 1.3012... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2331... Training loss: 1.3415... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2332... Training loss: 1.3295... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2333... Training loss: 1.3213... 0.1485 sec/batch\n", - "Epoch: 12/20... Training Step: 2334... Training loss: 1.3299... 0.1514 sec/batch\n", - "Epoch: 12/20... Training Step: 2335... Training loss: 1.3330... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2336... Training loss: 1.3160... 0.1486 sec/batch\n", - "Epoch: 12/20... Training Step: 2337... Training loss: 1.2977... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2338... Training loss: 1.3468... 0.1465 sec/batch\n", - "Epoch: 12/20... Training Step: 2339... Training loss: 1.3541... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2340... Training loss: 1.3358... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2341... Training loss: 1.3238... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2342... Training loss: 1.3206... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2343... Training loss: 1.3247... 0.1483 sec/batch\n", - "Epoch: 12/20... Training Step: 2344... Training loss: 1.3186... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2345... Training loss: 1.3509... 0.1488 sec/batch\n", - "Epoch: 12/20... Training Step: 2346... Training loss: 1.3876... 0.1479 sec/batch\n", - "Epoch: 12/20... Training Step: 2347... Training loss: 1.3427... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2348... Training loss: 1.3236... 0.1504 sec/batch\n", - "Epoch: 12/20... Training Step: 2349... Training loss: 1.3205... 0.1487 sec/batch\n", - "Epoch: 12/20... Training Step: 2350... Training loss: 1.3176... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2351... Training loss: 1.3563... 0.1493 sec/batch\n", - "Epoch: 12/20... Training Step: 2352... Training loss: 1.3233... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2353... Training loss: 1.3413... 0.1496 sec/batch\n", - "Epoch: 12/20... Training Step: 2354... Training loss: 1.2944... 0.1477 sec/batch\n", - "Epoch: 12/20... Training Step: 2355... Training loss: 1.3180... 0.1475 sec/batch\n", - "Epoch: 12/20... Training Step: 2356... Training loss: 1.3525... 0.1473 sec/batch\n", - "Epoch: 12/20... Training Step: 2357... Training loss: 1.3048... 0.1467 sec/batch\n", - "Epoch: 12/20... Training Step: 2358... Training loss: 1.3009... 0.1468 sec/batch\n", - "Epoch: 12/20... Training Step: 2359... Training loss: 1.3037... 0.1483 sec/batch\n", - "Epoch: 12/20... Training Step: 2360... Training loss: 1.3213... 0.1503 sec/batch\n", - "Epoch: 12/20... Training Step: 2361... Training loss: 1.3216... 0.1489 sec/batch\n", - "Epoch: 12/20... Training Step: 2362... Training loss: 1.3161... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2363... Training loss: 1.3171... 0.1481 sec/batch\n", - "Epoch: 12/20... Training Step: 2364... Training loss: 1.3148... 0.1476 sec/batch\n", - "Epoch: 12/20... Training Step: 2365... Training loss: 1.3452... 0.1467 sec/batch\n", - "Epoch: 12/20... Training Step: 2366... Training loss: 1.3088... 0.1465 sec/batch\n", - "Epoch: 12/20... Training Step: 2367... Training loss: 1.3220... 0.1492 sec/batch\n", - "Epoch: 12/20... Training Step: 2368... Training loss: 1.3233... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2369... Training loss: 1.2984... 0.1480 sec/batch\n", - "Epoch: 12/20... Training Step: 2370... Training loss: 1.2999... 0.1474 sec/batch\n", - "Epoch: 12/20... Training Step: 2371... Training loss: 1.3229... 0.1469 sec/batch\n", - "Epoch: 12/20... Training Step: 2372... Training loss: 1.3008... 0.1509 sec/batch\n", - "Epoch: 12/20... Training Step: 2373... Training loss: 1.2751... 0.1478 sec/batch\n", - "Epoch: 12/20... Training Step: 2374... Training loss: 1.3290... 0.1470 sec/batch\n", - "Epoch: 12/20... Training Step: 2375... Training loss: 1.3210... 0.1484 sec/batch\n", - "Epoch: 12/20... Training Step: 2376... Training loss: 1.3003... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2377... Training loss: 1.4554... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2378... Training loss: 1.3499... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2379... Training loss: 1.3362... 0.1503 sec/batch\n", - "Epoch: 13/20... Training Step: 2380... Training loss: 1.3479... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2381... Training loss: 1.3061... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2382... Training loss: 1.2889... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2383... Training loss: 1.3429... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2384... Training loss: 1.3222... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2385... Training loss: 1.3246... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2386... Training loss: 1.3232... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2387... Training loss: 1.3128... 0.1492 sec/batch\n", - "Epoch: 13/20... Training Step: 2388... Training loss: 1.3306... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2389... Training loss: 1.3256... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2390... Training loss: 1.3403... 0.1480 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 13/20... Training Step: 2391... Training loss: 1.3154... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2392... Training loss: 1.3019... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2393... Training loss: 1.3365... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2394... Training loss: 1.3491... 0.1488 sec/batch\n", - "Epoch: 13/20... Training Step: 2395... Training loss: 1.3321... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2396... Training loss: 1.3515... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2397... Training loss: 1.3160... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2398... Training loss: 1.3321... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2399... Training loss: 1.3159... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2400... Training loss: 1.3318... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2401... Training loss: 1.3235... 0.1556 sec/batch\n", - "Epoch: 13/20... Training Step: 2402... Training loss: 1.2828... 0.1534 sec/batch\n", - "Epoch: 13/20... Training Step: 2403... Training loss: 1.2878... 0.1503 sec/batch\n", - "Epoch: 13/20... Training Step: 2404... Training loss: 1.3372... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2405... Training loss: 1.3428... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2406... Training loss: 1.3403... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2407... Training loss: 1.3100... 0.1496 sec/batch\n", - "Epoch: 13/20... Training Step: 2408... Training loss: 1.2999... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2409... Training loss: 1.3319... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2410... Training loss: 1.3273... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2411... Training loss: 1.3038... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2412... Training loss: 1.3225... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2413... Training loss: 1.2941... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2414... Training loss: 1.2895... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2415... Training loss: 1.2704... 0.1497 sec/batch\n", - "Epoch: 13/20... Training Step: 2416... Training loss: 1.3079... 0.1484 sec/batch\n", - "Epoch: 13/20... Training Step: 2417... Training loss: 1.3062... 0.1484 sec/batch\n", - "Epoch: 13/20... Training Step: 2418... Training loss: 1.3551... 0.1490 sec/batch\n", - "Epoch: 13/20... Training Step: 2419... Training loss: 1.3070... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2420... Training loss: 1.2848... 0.1467 sec/batch\n", - "Epoch: 13/20... Training Step: 2421... Training loss: 1.3313... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2422... Training loss: 1.2839... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2423... Training loss: 1.3073... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2424... Training loss: 1.3108... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2425... Training loss: 1.3115... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2426... Training loss: 1.3388... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2427... Training loss: 1.2869... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2428... Training loss: 1.3657... 0.1484 sec/batch\n", - "Epoch: 13/20... Training Step: 2429... Training loss: 1.3282... 0.1493 sec/batch\n", - "Epoch: 13/20... Training Step: 2430... Training loss: 1.3308... 0.1472 sec/batch\n", - "Epoch: 13/20... Training Step: 2431... Training loss: 1.3046... 0.1467 sec/batch\n", - "Epoch: 13/20... Training Step: 2432... Training loss: 1.3220... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2433... Training loss: 1.3323... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2434... Training loss: 1.3155... 0.1506 sec/batch\n", - "Epoch: 13/20... Training Step: 2435... Training loss: 1.2982... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2436... Training loss: 1.3476... 0.1490 sec/batch\n", - "Epoch: 13/20... Training Step: 2437... Training loss: 1.3315... 0.1495 sec/batch\n", - "Epoch: 13/20... Training Step: 2438... Training loss: 1.3694... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2439... Training loss: 1.3525... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2440... Training loss: 1.3275... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2441... Training loss: 1.3177... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2442... Training loss: 1.3327... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2443... Training loss: 1.3395... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2444... Training loss: 1.3026... 0.1472 sec/batch\n", - "Epoch: 13/20... Training Step: 2445... Training loss: 1.3209... 0.1466 sec/batch\n", - "Epoch: 13/20... Training Step: 2446... Training loss: 1.2918... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2447... Training loss: 1.3635... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2448... Training loss: 1.3407... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2449... Training loss: 1.3470... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2450... Training loss: 1.3096... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2451... Training loss: 1.3212... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2452... Training loss: 1.3323... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2453... Training loss: 1.3165... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2454... Training loss: 1.3060... 0.1498 sec/batch\n", - "Epoch: 13/20... Training Step: 2455... Training loss: 1.2730... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2456... Training loss: 1.3167... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2457... Training loss: 1.2815... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2458... Training loss: 1.3141... 0.1495 sec/batch\n", - "Epoch: 13/20... Training Step: 2459... Training loss: 1.2833... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2460... Training loss: 1.3072... 0.1465 sec/batch\n", - "Epoch: 13/20... Training Step: 2461... Training loss: 1.2946... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2462... Training loss: 1.3074... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2463... Training loss: 1.2827... 0.1511 sec/batch\n", - "Epoch: 13/20... Training Step: 2464... Training loss: 1.3028... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2465... Training loss: 1.2827... 0.1484 sec/batch\n", - "Epoch: 13/20... Training Step: 2466... Training loss: 1.3159... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2467... Training loss: 1.2938... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2468... Training loss: 1.3088... 0.1472 sec/batch\n", - "Epoch: 13/20... Training Step: 2469... Training loss: 1.2824... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2470... Training loss: 1.2875... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2471... Training loss: 1.3003... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2472... Training loss: 1.3236... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2473... Training loss: 1.3181... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2474... Training loss: 1.2739... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2475... Training loss: 1.2892... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2476... Training loss: 1.2803... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2477... Training loss: 1.3181... 0.1472 sec/batch\n", - "Epoch: 13/20... Training Step: 2478... Training loss: 1.3064... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2479... Training loss: 1.3080... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2480... Training loss: 1.3060... 0.1481 sec/batch\n", - "Epoch: 13/20... Training Step: 2481... Training loss: 1.3071... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2482... Training loss: 1.3185... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2483... Training loss: 1.3186... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2484... Training loss: 1.3157... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2485... Training loss: 1.3021... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2486... Training loss: 1.3215... 0.1472 sec/batch\n", - "Epoch: 13/20... Training Step: 2487... Training loss: 1.3030... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2488... Training loss: 1.3177... 0.1488 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 13/20... Training Step: 2489... Training loss: 1.3221... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2490... Training loss: 1.3075... 0.1510 sec/batch\n", - "Epoch: 13/20... Training Step: 2491... Training loss: 1.2870... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2492... Training loss: 1.2638... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2493... Training loss: 1.3111... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2494... Training loss: 1.3298... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2495... Training loss: 1.3051... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2496... Training loss: 1.3205... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2497... Training loss: 1.3107... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2498... Training loss: 1.2791... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2499... Training loss: 1.2622... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2500... Training loss: 1.3103... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2501... Training loss: 1.3100... 0.1496 sec/batch\n", - "Epoch: 13/20... Training Step: 2502... Training loss: 1.2679... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2503... Training loss: 1.3145... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2504... Training loss: 1.3134... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2505... Training loss: 1.2864... 0.1486 sec/batch\n", - "Epoch: 13/20... Training Step: 2506... Training loss: 1.2802... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2507... Training loss: 1.2584... 0.1503 sec/batch\n", - "Epoch: 13/20... Training Step: 2508... Training loss: 1.2907... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2509... Training loss: 1.3304... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2510... Training loss: 1.3240... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2511... Training loss: 1.3213... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2512... Training loss: 1.3145... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2513... Training loss: 1.3402... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2514... Training loss: 1.3188... 0.1493 sec/batch\n", - "Epoch: 13/20... Training Step: 2515... Training loss: 1.3210... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2516... Training loss: 1.3058... 0.1501 sec/batch\n", - "Epoch: 13/20... Training Step: 2517... Training loss: 1.3710... 0.1496 sec/batch\n", - "Epoch: 13/20... Training Step: 2518... Training loss: 1.3335... 0.1497 sec/batch\n", - "Epoch: 13/20... Training Step: 2519... Training loss: 1.3074... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2520... Training loss: 1.3427... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2521... Training loss: 1.2907... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2522... Training loss: 1.3375... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2523... Training loss: 1.3159... 0.1488 sec/batch\n", - "Epoch: 13/20... Training Step: 2524... Training loss: 1.3415... 0.1488 sec/batch\n", - "Epoch: 13/20... Training Step: 2525... Training loss: 1.3302... 0.1476 sec/batch\n", - "Epoch: 13/20... Training Step: 2526... Training loss: 1.2983... 0.1484 sec/batch\n", - "Epoch: 13/20... Training Step: 2527... Training loss: 1.2791... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2528... Training loss: 1.2933... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2529... Training loss: 1.3250... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2530... Training loss: 1.3056... 0.1491 sec/batch\n", - "Epoch: 13/20... Training Step: 2531... Training loss: 1.3021... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2532... Training loss: 1.3080... 0.1480 sec/batch\n", - "Epoch: 13/20... Training Step: 2533... Training loss: 1.3152... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2534... Training loss: 1.2979... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2535... Training loss: 1.2793... 0.1490 sec/batch\n", - "Epoch: 13/20... Training Step: 2536... Training loss: 1.3287... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2537... Training loss: 1.3268... 0.1470 sec/batch\n", - "Epoch: 13/20... Training Step: 2538... Training loss: 1.3141... 0.1477 sec/batch\n", - "Epoch: 13/20... Training Step: 2539... Training loss: 1.3029... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2540... Training loss: 1.3069... 0.1474 sec/batch\n", - "Epoch: 13/20... Training Step: 2541... Training loss: 1.3127... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2542... Training loss: 1.3061... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2543... Training loss: 1.3322... 0.1510 sec/batch\n", - "Epoch: 13/20... Training Step: 2544... Training loss: 1.3674... 0.1494 sec/batch\n", - "Epoch: 13/20... Training Step: 2545... Training loss: 1.3195... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2546... Training loss: 1.3109... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2547... Training loss: 1.3060... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2548... Training loss: 1.2982... 0.1465 sec/batch\n", - "Epoch: 13/20... Training Step: 2549... Training loss: 1.3444... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2550... Training loss: 1.3100... 0.1471 sec/batch\n", - "Epoch: 13/20... Training Step: 2551... Training loss: 1.3130... 0.1502 sec/batch\n", - "Epoch: 13/20... Training Step: 2552... Training loss: 1.2852... 0.1497 sec/batch\n", - "Epoch: 13/20... Training Step: 2553... Training loss: 1.2941... 0.1504 sec/batch\n", - "Epoch: 13/20... Training Step: 2554... Training loss: 1.3392... 0.1492 sec/batch\n", - "Epoch: 13/20... Training Step: 2555... Training loss: 1.2950... 0.1487 sec/batch\n", - "Epoch: 13/20... Training Step: 2556... Training loss: 1.2802... 0.1485 sec/batch\n", - "Epoch: 13/20... Training Step: 2557... Training loss: 1.2812... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2558... Training loss: 1.3029... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2559... Training loss: 1.3086... 0.1475 sec/batch\n", - "Epoch: 13/20... Training Step: 2560... Training loss: 1.2931... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2561... Training loss: 1.2955... 0.1481 sec/batch\n", - "Epoch: 13/20... Training Step: 2562... Training loss: 1.2868... 0.1482 sec/batch\n", - "Epoch: 13/20... Training Step: 2563... Training loss: 1.3319... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2564... Training loss: 1.2985... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2565... Training loss: 1.2988... 0.1489 sec/batch\n", - "Epoch: 13/20... Training Step: 2566... Training loss: 1.3123... 0.1479 sec/batch\n", - "Epoch: 13/20... Training Step: 2567... Training loss: 1.2775... 0.1473 sec/batch\n", - "Epoch: 13/20... Training Step: 2568... Training loss: 1.2860... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2569... Training loss: 1.2960... 0.1468 sec/batch\n", - "Epoch: 13/20... Training Step: 2570... Training loss: 1.2858... 0.1469 sec/batch\n", - "Epoch: 13/20... Training Step: 2571... Training loss: 1.2714... 0.1483 sec/batch\n", - "Epoch: 13/20... Training Step: 2572... Training loss: 1.3065... 0.1478 sec/batch\n", - "Epoch: 13/20... Training Step: 2573... Training loss: 1.3000... 0.1502 sec/batch\n", - "Epoch: 13/20... Training Step: 2574... Training loss: 1.2906... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2575... Training loss: 1.4465... 0.1497 sec/batch\n", - "Epoch: 14/20... Training Step: 2576... Training loss: 1.3276... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2577... Training loss: 1.3114... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2578... Training loss: 1.3230... 0.1490 sec/batch\n", - "Epoch: 14/20... Training Step: 2579... Training loss: 1.2798... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2580... Training loss: 1.2752... 0.1496 sec/batch\n", - "Epoch: 14/20... Training Step: 2581... Training loss: 1.3076... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2582... Training loss: 1.2926... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2583... Training loss: 1.3185... 0.1478 sec/batch\n", - "Epoch: 14/20... Training Step: 2584... Training loss: 1.3006... 0.1504 sec/batch\n", - "Epoch: 14/20... Training Step: 2585... Training loss: 1.2958... 0.1472 sec/batch\n", - "Epoch: 14/20... Training Step: 2586... Training loss: 1.3024... 0.1469 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 14/20... Training Step: 2587... Training loss: 1.3108... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2588... Training loss: 1.3204... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2589... Training loss: 1.2850... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2590... Training loss: 1.2799... 0.1467 sec/batch\n", - "Epoch: 14/20... Training Step: 2591... Training loss: 1.3130... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2592... Training loss: 1.3244... 0.1491 sec/batch\n", - "Epoch: 14/20... Training Step: 2593... Training loss: 1.3083... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2594... Training loss: 1.3344... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2595... Training loss: 1.3001... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2596... Training loss: 1.3115... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2597... Training loss: 1.2944... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2598... Training loss: 1.3284... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2599... Training loss: 1.3072... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2600... Training loss: 1.2749... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2601... Training loss: 1.2772... 0.1557 sec/batch\n", - "Epoch: 14/20... Training Step: 2602... Training loss: 1.3287... 0.1518 sec/batch\n", - "Epoch: 14/20... Training Step: 2603... Training loss: 1.3219... 0.1512 sec/batch\n", - "Epoch: 14/20... Training Step: 2604... Training loss: 1.3211... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2605... Training loss: 1.2868... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2606... Training loss: 1.2801... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2607... Training loss: 1.3002... 0.1470 sec/batch\n", - "Epoch: 14/20... Training Step: 2608... Training loss: 1.3100... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2609... Training loss: 1.2942... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2610... Training loss: 1.3115... 0.1472 sec/batch\n", - "Epoch: 14/20... Training Step: 2611... Training loss: 1.2850... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2612... Training loss: 1.2582... 0.1494 sec/batch\n", - "Epoch: 14/20... Training Step: 2613... Training loss: 1.2514... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2614... Training loss: 1.2852... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2615... Training loss: 1.2789... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2616... Training loss: 1.3332... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2617... Training loss: 1.2943... 0.1510 sec/batch\n", - "Epoch: 14/20... Training Step: 2618... Training loss: 1.2773... 0.1473 sec/batch\n", - "Epoch: 14/20... Training Step: 2619... Training loss: 1.3056... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2620... Training loss: 1.2754... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2621... Training loss: 1.2885... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2622... Training loss: 1.2988... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2623... Training loss: 1.2915... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2624... Training loss: 1.3189... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2625... Training loss: 1.2815... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2626... Training loss: 1.3408... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2627... Training loss: 1.3098... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2628... Training loss: 1.3096... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2629... Training loss: 1.3003... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2630... Training loss: 1.3018... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2631... Training loss: 1.3207... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2632... Training loss: 1.2893... 0.1499 sec/batch\n", - "Epoch: 14/20... Training Step: 2633... Training loss: 1.2785... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2634... Training loss: 1.3318... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2635... Training loss: 1.3062... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2636... Training loss: 1.3447... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2637... Training loss: 1.3250... 0.1491 sec/batch\n", - "Epoch: 14/20... Training Step: 2638... Training loss: 1.3142... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2639... Training loss: 1.2966... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2640... Training loss: 1.3151... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2641... Training loss: 1.3202... 0.1511 sec/batch\n", - "Epoch: 14/20... Training Step: 2642... Training loss: 1.2838... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2643... Training loss: 1.3081... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2644... Training loss: 1.2807... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2645... Training loss: 1.3527... 0.1503 sec/batch\n", - "Epoch: 14/20... Training Step: 2646... Training loss: 1.3259... 0.1475 sec/batch\n", - "Epoch: 14/20... Training Step: 2647... Training loss: 1.3346... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2648... Training loss: 1.2849... 0.1495 sec/batch\n", - "Epoch: 14/20... Training Step: 2649... Training loss: 1.2956... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2650... Training loss: 1.3212... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2651... Training loss: 1.3038... 0.1478 sec/batch\n", - "Epoch: 14/20... Training Step: 2652... Training loss: 1.2913... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2653... Training loss: 1.2668... 0.1475 sec/batch\n", - "Epoch: 14/20... Training Step: 2654... Training loss: 1.3024... 0.1478 sec/batch\n", - "Epoch: 14/20... Training Step: 2655... Training loss: 1.2679... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2656... Training loss: 1.3022... 0.1490 sec/batch\n", - "Epoch: 14/20... Training Step: 2657... Training loss: 1.2761... 0.1494 sec/batch\n", - "Epoch: 14/20... Training Step: 2658... Training loss: 1.2898... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2659... Training loss: 1.2831... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2660... Training loss: 1.2943... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2661... Training loss: 1.2795... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2662... Training loss: 1.2810... 0.1475 sec/batch\n", - "Epoch: 14/20... Training Step: 2663... Training loss: 1.2712... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2664... Training loss: 1.3055... 0.1495 sec/batch\n", - "Epoch: 14/20... Training Step: 2665... Training loss: 1.2756... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2666... Training loss: 1.2904... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2667... Training loss: 1.2707... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2668... Training loss: 1.2596... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2669... Training loss: 1.2833... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2670... Training loss: 1.3087... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2671... Training loss: 1.3034... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2672... Training loss: 1.2609... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2673... Training loss: 1.2718... 0.1493 sec/batch\n", - "Epoch: 14/20... Training Step: 2674... Training loss: 1.2616... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2675... Training loss: 1.2946... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2676... Training loss: 1.2884... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2677... Training loss: 1.3000... 0.1509 sec/batch\n", - "Epoch: 14/20... Training Step: 2678... Training loss: 1.2880... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2679... Training loss: 1.2900... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2680... Training loss: 1.2882... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2681... Training loss: 1.2964... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2682... Training loss: 1.2952... 0.1491 sec/batch\n", - "Epoch: 14/20... Training Step: 2683... Training loss: 1.2794... 0.1496 sec/batch\n", - "Epoch: 14/20... Training Step: 2684... Training loss: 1.3105... 0.1481 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 14/20... Training Step: 2685... Training loss: 1.2830... 0.1493 sec/batch\n", - "Epoch: 14/20... Training Step: 2686... Training loss: 1.2971... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2687... Training loss: 1.3018... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2688... Training loss: 1.2809... 0.1510 sec/batch\n", - "Epoch: 14/20... Training Step: 2689... Training loss: 1.2612... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2690... Training loss: 1.2511... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2691... Training loss: 1.2972... 0.1474 sec/batch\n", - "Epoch: 14/20... Training Step: 2692... Training loss: 1.3025... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2693... Training loss: 1.2962... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2694... Training loss: 1.2894... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2695... Training loss: 1.2868... 0.1496 sec/batch\n", - "Epoch: 14/20... Training Step: 2696... Training loss: 1.2711... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2697... Training loss: 1.2519... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2698... Training loss: 1.3010... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2699... Training loss: 1.2812... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2700... Training loss: 1.2459... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2701... Training loss: 1.3030... 0.1479 sec/batch\n", - "Epoch: 14/20... Training Step: 2702... Training loss: 1.3020... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2703... Training loss: 1.2768... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2704... Training loss: 1.2733... 0.1495 sec/batch\n", - "Epoch: 14/20... Training Step: 2705... Training loss: 1.2390... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2706... Training loss: 1.2699... 0.1517 sec/batch\n", - "Epoch: 14/20... Training Step: 2707... Training loss: 1.3149... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2708... Training loss: 1.2898... 0.1490 sec/batch\n", - "Epoch: 14/20... Training Step: 2709... Training loss: 1.3015... 0.1478 sec/batch\n", - "Epoch: 14/20... Training Step: 2710... Training loss: 1.2922... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2711... Training loss: 1.3209... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2712... Training loss: 1.3213... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2713... Training loss: 1.3040... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2714... Training loss: 1.2982... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2715... Training loss: 1.3493... 0.1515 sec/batch\n", - "Epoch: 14/20... Training Step: 2716... Training loss: 1.3163... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2717... Training loss: 1.2831... 0.1483 sec/batch\n", - "Epoch: 14/20... Training Step: 2718... Training loss: 1.3243... 0.1475 sec/batch\n", - "Epoch: 14/20... Training Step: 2719... Training loss: 1.2820... 0.1472 sec/batch\n", - "Epoch: 14/20... Training Step: 2720... Training loss: 1.3179... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2721... Training loss: 1.3003... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2722... Training loss: 1.3274... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2723... Training loss: 1.3234... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2724... Training loss: 1.2834... 0.1509 sec/batch\n", - "Epoch: 14/20... Training Step: 2725... Training loss: 1.2686... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2726... Training loss: 1.2700... 0.1478 sec/batch\n", - "Epoch: 14/20... Training Step: 2727... Training loss: 1.3023... 0.1477 sec/batch\n", - "Epoch: 14/20... Training Step: 2728... Training loss: 1.2849... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2729... Training loss: 1.2844... 0.1493 sec/batch\n", - "Epoch: 14/20... Training Step: 2730... Training loss: 1.2908... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2731... Training loss: 1.3009... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2732... Training loss: 1.2818... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2733... Training loss: 1.2641... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2734... Training loss: 1.3144... 0.1490 sec/batch\n", - "Epoch: 14/20... Training Step: 2735... Training loss: 1.3159... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2736... Training loss: 1.2976... 0.1473 sec/batch\n", - "Epoch: 14/20... Training Step: 2737... Training loss: 1.2955... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2738... Training loss: 1.2879... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2739... Training loss: 1.2907... 0.1481 sec/batch\n", - "Epoch: 14/20... Training Step: 2740... Training loss: 1.2883... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2741... Training loss: 1.3101... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2742... Training loss: 1.3509... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2743... Training loss: 1.3036... 0.1491 sec/batch\n", - "Epoch: 14/20... Training Step: 2744... Training loss: 1.2951... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2745... Training loss: 1.2828... 0.1471 sec/batch\n", - "Epoch: 14/20... Training Step: 2746... Training loss: 1.2783... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2747... Training loss: 1.3225... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2748... Training loss: 1.2934... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2749... Training loss: 1.3084... 0.1486 sec/batch\n", - "Epoch: 14/20... Training Step: 2750... Training loss: 1.2631... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2751... Training loss: 1.2820... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2752... Training loss: 1.3215... 0.1492 sec/batch\n", - "Epoch: 14/20... Training Step: 2753... Training loss: 1.2698... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2754... Training loss: 1.2670... 0.1491 sec/batch\n", - "Epoch: 14/20... Training Step: 2755... Training loss: 1.2740... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2756... Training loss: 1.2807... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2757... Training loss: 1.2959... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2758... Training loss: 1.2724... 0.1488 sec/batch\n", - "Epoch: 14/20... Training Step: 2759... Training loss: 1.2869... 0.1514 sec/batch\n", - "Epoch: 14/20... Training Step: 2760... Training loss: 1.2726... 0.1504 sec/batch\n", - "Epoch: 14/20... Training Step: 2761... Training loss: 1.3172... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2762... Training loss: 1.2811... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2763... Training loss: 1.2881... 0.1476 sec/batch\n", - "Epoch: 14/20... Training Step: 2764... Training loss: 1.2903... 0.1480 sec/batch\n", - "Epoch: 14/20... Training Step: 2765... Training loss: 1.2626... 0.1489 sec/batch\n", - "Epoch: 14/20... Training Step: 2766... Training loss: 1.2682... 0.1485 sec/batch\n", - "Epoch: 14/20... Training Step: 2767... Training loss: 1.2874... 0.1484 sec/batch\n", - "Epoch: 14/20... Training Step: 2768... Training loss: 1.2667... 0.1490 sec/batch\n", - "Epoch: 14/20... Training Step: 2769... Training loss: 1.2496... 0.1506 sec/batch\n", - "Epoch: 14/20... Training Step: 2770... Training loss: 1.2938... 0.1482 sec/batch\n", - "Epoch: 14/20... Training Step: 2771... Training loss: 1.2760... 0.1487 sec/batch\n", - "Epoch: 14/20... Training Step: 2772... Training loss: 1.2825... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2773... Training loss: 1.4272... 0.1470 sec/batch\n", - "Epoch: 15/20... Training Step: 2774... Training loss: 1.3114... 0.1482 sec/batch\n", - "Epoch: 15/20... Training Step: 2775... Training loss: 1.2928... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2776... Training loss: 1.3199... 0.1512 sec/batch\n", - "Epoch: 15/20... Training Step: 2777... Training loss: 1.2779... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2778... Training loss: 1.2483... 0.1484 sec/batch\n", - "Epoch: 15/20... Training Step: 2779... Training loss: 1.2913... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2780... Training loss: 1.2912... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2781... Training loss: 1.3038... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2782... Training loss: 1.2817... 0.1485 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 15/20... Training Step: 2783... Training loss: 1.2741... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2784... Training loss: 1.2889... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2785... Training loss: 1.2950... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2786... Training loss: 1.2912... 0.1502 sec/batch\n", - "Epoch: 15/20... Training Step: 2787... Training loss: 1.2777... 0.1482 sec/batch\n", - "Epoch: 15/20... Training Step: 2788... Training loss: 1.2636... 0.1470 sec/batch\n", - "Epoch: 15/20... Training Step: 2789... Training loss: 1.3132... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2790... Training loss: 1.3060... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2791... Training loss: 1.2898... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2792... Training loss: 1.3115... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2793... Training loss: 1.2850... 0.1487 sec/batch\n", - "Epoch: 15/20... Training Step: 2794... Training loss: 1.3076... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2795... Training loss: 1.2831... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2796... Training loss: 1.3003... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2797... Training loss: 1.2938... 0.1504 sec/batch\n", - "Epoch: 15/20... Training Step: 2798... Training loss: 1.2427... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2799... Training loss: 1.2650... 0.1482 sec/batch\n", - "Epoch: 15/20... Training Step: 2800... Training loss: 1.3126... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2801... Training loss: 1.2917... 0.1523 sec/batch\n", - "Epoch: 15/20... Training Step: 2802... Training loss: 1.3023... 0.1527 sec/batch\n", - "Epoch: 15/20... Training Step: 2803... Training loss: 1.2724... 0.1496 sec/batch\n", - "Epoch: 15/20... Training Step: 2804... Training loss: 1.2568... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2805... Training loss: 1.2879... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2806... Training loss: 1.2891... 0.1484 sec/batch\n", - "Epoch: 15/20... Training Step: 2807... Training loss: 1.2745... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2808... Training loss: 1.2931... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2809... Training loss: 1.2667... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2810... Training loss: 1.2470... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2811... Training loss: 1.2379... 0.1496 sec/batch\n", - "Epoch: 15/20... Training Step: 2812... Training loss: 1.2722... 0.1497 sec/batch\n", - "Epoch: 15/20... Training Step: 2813... Training loss: 1.2648... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2814... Training loss: 1.3266... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2815... Training loss: 1.2755... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2816... Training loss: 1.2634... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2817... Training loss: 1.2933... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2818... Training loss: 1.2565... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2819... Training loss: 1.2822... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2820... Training loss: 1.2793... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2821... Training loss: 1.2775... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2822... Training loss: 1.3046... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2823... Training loss: 1.2578... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2824... Training loss: 1.3235... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2825... Training loss: 1.2850... 0.1500 sec/batch\n", - "Epoch: 15/20... Training Step: 2826... Training loss: 1.2930... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2827... Training loss: 1.2817... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2828... Training loss: 1.2873... 0.1483 sec/batch\n", - "Epoch: 15/20... Training Step: 2829... Training loss: 1.3026... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2830... Training loss: 1.2758... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2831... Training loss: 1.2593... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2832... Training loss: 1.3228... 0.1469 sec/batch\n", - "Epoch: 15/20... Training Step: 2833... Training loss: 1.2946... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2834... Training loss: 1.3281... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2835... Training loss: 1.3140... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2836... Training loss: 1.2870... 0.1512 sec/batch\n", - "Epoch: 15/20... Training Step: 2837... Training loss: 1.2839... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2838... Training loss: 1.2988... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2839... Training loss: 1.3005... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2840... Training loss: 1.2767... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2841... Training loss: 1.2943... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2842... Training loss: 1.2690... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2843... Training loss: 1.3261... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2844... Training loss: 1.3121... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2845... Training loss: 1.3147... 0.1503 sec/batch\n", - "Epoch: 15/20... Training Step: 2846... Training loss: 1.2651... 0.1492 sec/batch\n", - "Epoch: 15/20... Training Step: 2847... Training loss: 1.2924... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2848... Training loss: 1.3187... 0.1487 sec/batch\n", - "Epoch: 15/20... Training Step: 2849... Training loss: 1.2943... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2850... Training loss: 1.2689... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2851... Training loss: 1.2440... 0.1470 sec/batch\n", - "Epoch: 15/20... Training Step: 2852... Training loss: 1.2836... 0.1515 sec/batch\n", - "Epoch: 15/20... Training Step: 2853... Training loss: 1.2491... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2854... Training loss: 1.2874... 0.1494 sec/batch\n", - "Epoch: 15/20... Training Step: 2855... Training loss: 1.2529... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2856... Training loss: 1.2756... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2857... Training loss: 1.2549... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2858... Training loss: 1.2809... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2859... Training loss: 1.2604... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2860... Training loss: 1.2686... 0.1461 sec/batch\n", - "Epoch: 15/20... Training Step: 2861... Training loss: 1.2545... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2862... Training loss: 1.2849... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2863... Training loss: 1.2628... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2864... Training loss: 1.2735... 0.1497 sec/batch\n", - "Epoch: 15/20... Training Step: 2865... Training loss: 1.2563... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2866... Training loss: 1.2543... 0.1482 sec/batch\n", - "Epoch: 15/20... Training Step: 2867... Training loss: 1.2581... 0.1484 sec/batch\n", - "Epoch: 15/20... Training Step: 2868... Training loss: 1.3007... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2869... Training loss: 1.2914... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2870... Training loss: 1.2408... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2871... Training loss: 1.2608... 0.1499 sec/batch\n", - "Epoch: 15/20... Training Step: 2872... Training loss: 1.2567... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2873... Training loss: 1.2795... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2874... Training loss: 1.2719... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2875... Training loss: 1.2833... 0.1496 sec/batch\n", - "Epoch: 15/20... Training Step: 2876... Training loss: 1.2686... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2877... Training loss: 1.2673... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2878... Training loss: 1.2642... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2879... Training loss: 1.2904... 0.1487 sec/batch\n", - "Epoch: 15/20... Training Step: 2880... Training loss: 1.2892... 0.1487 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 15/20... Training Step: 2881... Training loss: 1.2646... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2882... Training loss: 1.2896... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2883... Training loss: 1.2661... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2884... Training loss: 1.2914... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2885... Training loss: 1.2880... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2886... Training loss: 1.2697... 0.1466 sec/batch\n", - "Epoch: 15/20... Training Step: 2887... Training loss: 1.2506... 0.1483 sec/batch\n", - "Epoch: 15/20... Training Step: 2888... Training loss: 1.2400... 0.1492 sec/batch\n", - "Epoch: 15/20... Training Step: 2889... Training loss: 1.2702... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2890... Training loss: 1.2921... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2891... Training loss: 1.2744... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2892... Training loss: 1.2811... 0.1482 sec/batch\n", - "Epoch: 15/20... Training Step: 2893... Training loss: 1.2779... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2894... Training loss: 1.2480... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2895... Training loss: 1.2403... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2896... Training loss: 1.2729... 0.1466 sec/batch\n", - "Epoch: 15/20... Training Step: 2897... Training loss: 1.2733... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2898... Training loss: 1.2374... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2899... Training loss: 1.2887... 0.1489 sec/batch\n", - "Epoch: 15/20... Training Step: 2900... Training loss: 1.2811... 0.1470 sec/batch\n", - "Epoch: 15/20... Training Step: 2901... Training loss: 1.2609... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2902... Training loss: 1.2377... 0.1492 sec/batch\n", - "Epoch: 15/20... Training Step: 2903... Training loss: 1.2364... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2904... Training loss: 1.2633... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2905... Training loss: 1.2940... 0.1481 sec/batch\n", - "Epoch: 15/20... Training Step: 2906... Training loss: 1.2818... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2907... Training loss: 1.2831... 0.1484 sec/batch\n", - "Epoch: 15/20... Training Step: 2908... Training loss: 1.2811... 0.1514 sec/batch\n", - "Epoch: 15/20... Training Step: 2909... Training loss: 1.3077... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2910... Training loss: 1.2937... 0.1491 sec/batch\n", - "Epoch: 15/20... Training Step: 2911... Training loss: 1.2864... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2912... Training loss: 1.2858... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2913... Training loss: 1.3296... 0.1500 sec/batch\n", - "Epoch: 15/20... Training Step: 2914... Training loss: 1.2903... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2915... Training loss: 1.2697... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2916... Training loss: 1.3161... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2917... Training loss: 1.2636... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2918... Training loss: 1.3043... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2919... Training loss: 1.2862... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2920... Training loss: 1.3089... 0.1486 sec/batch\n", - "Epoch: 15/20... Training Step: 2921... Training loss: 1.3014... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2922... Training loss: 1.2766... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2923... Training loss: 1.2431... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2924... Training loss: 1.2539... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2925... Training loss: 1.2935... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2926... Training loss: 1.2691... 0.1487 sec/batch\n", - "Epoch: 15/20... Training Step: 2927... Training loss: 1.2731... 0.1511 sec/batch\n", - "Epoch: 15/20... Training Step: 2928... Training loss: 1.2690... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2929... Training loss: 1.2812... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2930... Training loss: 1.2704... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2931... Training loss: 1.2433... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2932... Training loss: 1.2904... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2933... Training loss: 1.2945... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2934... Training loss: 1.2874... 0.1483 sec/batch\n", - "Epoch: 15/20... Training Step: 2935... Training loss: 1.2822... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2936... Training loss: 1.2765... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2937... Training loss: 1.2751... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2938... Training loss: 1.2681... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2939... Training loss: 1.3096... 0.1497 sec/batch\n", - "Epoch: 15/20... Training Step: 2940... Training loss: 1.3313... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2941... Training loss: 1.2911... 0.1491 sec/batch\n", - "Epoch: 15/20... Training Step: 2942... Training loss: 1.2785... 0.1488 sec/batch\n", - "Epoch: 15/20... Training Step: 2943... Training loss: 1.2648... 0.1471 sec/batch\n", - "Epoch: 15/20... Training Step: 2944... Training loss: 1.2701... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2945... Training loss: 1.3068... 0.1479 sec/batch\n", - "Epoch: 15/20... Training Step: 2946... Training loss: 1.2724... 0.1490 sec/batch\n", - "Epoch: 15/20... Training Step: 2947... Training loss: 1.2829... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2948... Training loss: 1.2465... 0.1492 sec/batch\n", - "Epoch: 15/20... Training Step: 2949... Training loss: 1.2700... 0.1476 sec/batch\n", - "Epoch: 15/20... Training Step: 2950... Training loss: 1.3118... 0.1465 sec/batch\n", - "Epoch: 15/20... Training Step: 2951... Training loss: 1.2557... 0.1473 sec/batch\n", - "Epoch: 15/20... Training Step: 2952... Training loss: 1.2486... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2953... Training loss: 1.2499... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2954... Training loss: 1.2712... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2955... Training loss: 1.2849... 0.1475 sec/batch\n", - "Epoch: 15/20... Training Step: 2956... Training loss: 1.2701... 0.1474 sec/batch\n", - "Epoch: 15/20... Training Step: 2957... Training loss: 1.2765... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2958... Training loss: 1.2512... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2959... Training loss: 1.3069... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2960... Training loss: 1.2638... 0.1467 sec/batch\n", - "Epoch: 15/20... Training Step: 2961... Training loss: 1.2715... 0.1503 sec/batch\n", - "Epoch: 15/20... Training Step: 2962... Training loss: 1.2722... 0.1485 sec/batch\n", - "Epoch: 15/20... Training Step: 2963... Training loss: 1.2489... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2964... Training loss: 1.2556... 0.1472 sec/batch\n", - "Epoch: 15/20... Training Step: 2965... Training loss: 1.2769... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2966... Training loss: 1.2580... 0.1480 sec/batch\n", - "Epoch: 15/20... Training Step: 2967... Training loss: 1.2382... 0.1478 sec/batch\n", - "Epoch: 15/20... Training Step: 2968... Training loss: 1.2751... 0.1477 sec/batch\n", - "Epoch: 15/20... Training Step: 2969... Training loss: 1.2738... 0.1491 sec/batch\n", - "Epoch: 15/20... Training Step: 2970... Training loss: 1.2498... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 2971... Training loss: 1.4048... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 2972... Training loss: 1.2962... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 2973... Training loss: 1.2768... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 2974... Training loss: 1.2993... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 2975... Training loss: 1.2541... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 2976... Training loss: 1.2385... 0.1472 sec/batch\n", - "Epoch: 16/20... Training Step: 2977... Training loss: 1.2833... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 2978... Training loss: 1.2694... 0.1489 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 16/20... Training Step: 2979... Training loss: 1.2837... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 2980... Training loss: 1.2713... 0.1527 sec/batch\n", - "Epoch: 16/20... Training Step: 2981... Training loss: 1.2646... 0.1495 sec/batch\n", - "Epoch: 16/20... Training Step: 2982... Training loss: 1.2676... 0.1493 sec/batch\n", - "Epoch: 16/20... Training Step: 2983... Training loss: 1.2773... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 2984... Training loss: 1.2810... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 2985... Training loss: 1.2569... 0.1476 sec/batch\n", - "Epoch: 16/20... Training Step: 2986... Training loss: 1.2544... 0.1464 sec/batch\n", - "Epoch: 16/20... Training Step: 2987... Training loss: 1.2871... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 2988... Training loss: 1.2888... 0.1471 sec/batch\n", - "Epoch: 16/20... Training Step: 2989... Training loss: 1.2761... 0.1491 sec/batch\n", - "Epoch: 16/20... Training Step: 2990... Training loss: 1.2928... 0.1489 sec/batch\n", - "Epoch: 16/20... Training Step: 2991... Training loss: 1.2671... 0.1502 sec/batch\n", - "Epoch: 16/20... Training Step: 2992... Training loss: 1.2865... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 2993... Training loss: 1.2713... 0.1486 sec/batch\n", - "Epoch: 16/20... Training Step: 2994... Training loss: 1.3016... 0.1476 sec/batch\n", - "Epoch: 16/20... Training Step: 2995... Training loss: 1.2769... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 2996... Training loss: 1.2374... 0.1498 sec/batch\n", - "Epoch: 16/20... Training Step: 2997... Training loss: 1.2452... 0.1473 sec/batch\n", - "Epoch: 16/20... Training Step: 2998... Training loss: 1.2891... 0.1490 sec/batch\n", - "Epoch: 16/20... Training Step: 2999... Training loss: 1.2760... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3000... Training loss: 1.2803... 0.1503 sec/batch\n", - "Epoch: 16/20... Training Step: 3001... Training loss: 1.2595... 0.1507 sec/batch\n", - "Epoch: 16/20... Training Step: 3002... Training loss: 1.2470... 0.1529 sec/batch\n", - "Epoch: 16/20... Training Step: 3003... Training loss: 1.2774... 0.1500 sec/batch\n", - "Epoch: 16/20... Training Step: 3004... Training loss: 1.2884... 0.1486 sec/batch\n", - "Epoch: 16/20... Training Step: 3005... Training loss: 1.2584... 0.1475 sec/batch\n", - "Epoch: 16/20... Training Step: 3006... Training loss: 1.2704... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3007... Training loss: 1.2484... 0.1494 sec/batch\n", - "Epoch: 16/20... Training Step: 3008... Training loss: 1.2375... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3009... Training loss: 1.2300... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3010... Training loss: 1.2668... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3011... Training loss: 1.2423... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3012... Training loss: 1.3111... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 3013... Training loss: 1.2524... 0.1506 sec/batch\n", - "Epoch: 16/20... Training Step: 3014... Training loss: 1.2387... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3015... Training loss: 1.2701... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3016... Training loss: 1.2437... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3017... Training loss: 1.2607... 0.1473 sec/batch\n", - "Epoch: 16/20... Training Step: 3018... Training loss: 1.2620... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 3019... Training loss: 1.2694... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3020... Training loss: 1.2865... 0.1504 sec/batch\n", - "Epoch: 16/20... Training Step: 3021... Training loss: 1.2484... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3022... Training loss: 1.3085... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3023... Training loss: 1.2715... 0.1493 sec/batch\n", - "Epoch: 16/20... Training Step: 3024... Training loss: 1.2800... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3025... Training loss: 1.2661... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3026... Training loss: 1.2735... 0.1472 sec/batch\n", - "Epoch: 16/20... Training Step: 3027... Training loss: 1.2836... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3028... Training loss: 1.2669... 0.1468 sec/batch\n", - "Epoch: 16/20... Training Step: 3029... Training loss: 1.2357... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3030... Training loss: 1.2945... 0.1497 sec/batch\n", - "Epoch: 16/20... Training Step: 3031... Training loss: 1.2753... 0.1472 sec/batch\n", - "Epoch: 16/20... Training Step: 3032... Training loss: 1.3133... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3033... Training loss: 1.2946... 0.1495 sec/batch\n", - "Epoch: 16/20... Training Step: 3034... Training loss: 1.2862... 0.1475 sec/batch\n", - "Epoch: 16/20... Training Step: 3035... Training loss: 1.2610... 0.1471 sec/batch\n", - "Epoch: 16/20... Training Step: 3036... Training loss: 1.2774... 0.1468 sec/batch\n", - "Epoch: 16/20... Training Step: 3037... Training loss: 1.2796... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3038... Training loss: 1.2517... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3039... Training loss: 1.2769... 0.1503 sec/batch\n", - "Epoch: 16/20... Training Step: 3040... Training loss: 1.2635... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3041... Training loss: 1.3080... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3042... Training loss: 1.2875... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3043... Training loss: 1.3020... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3044... Training loss: 1.2566... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3045... Training loss: 1.2658... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3046... Training loss: 1.2975... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3047... Training loss: 1.2770... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3048... Training loss: 1.2573... 0.1529 sec/batch\n", - "Epoch: 16/20... Training Step: 3049... Training loss: 1.2284... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3050... Training loss: 1.2723... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3051... Training loss: 1.2341... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3052... Training loss: 1.2749... 0.1475 sec/batch\n", - "Epoch: 16/20... Training Step: 3053... Training loss: 1.2458... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3054... Training loss: 1.2664... 0.1473 sec/batch\n", - "Epoch: 16/20... Training Step: 3055... Training loss: 1.2473... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3056... Training loss: 1.2595... 0.1472 sec/batch\n", - "Epoch: 16/20... Training Step: 3057... Training loss: 1.2473... 0.1501 sec/batch\n", - "Epoch: 16/20... Training Step: 3058... Training loss: 1.2565... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3059... Training loss: 1.2411... 0.1486 sec/batch\n", - "Epoch: 16/20... Training Step: 3060... Training loss: 1.2827... 0.1492 sec/batch\n", - "Epoch: 16/20... Training Step: 3061... Training loss: 1.2537... 0.1489 sec/batch\n", - "Epoch: 16/20... Training Step: 3062... Training loss: 1.2664... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 3063... Training loss: 1.2408... 0.1489 sec/batch\n", - "Epoch: 16/20... Training Step: 3064... Training loss: 1.2387... 0.1491 sec/batch\n", - "Epoch: 16/20... Training Step: 3065... Training loss: 1.2507... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3066... Training loss: 1.2799... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3067... Training loss: 1.2787... 0.1497 sec/batch\n", - "Epoch: 16/20... Training Step: 3068... Training loss: 1.2318... 0.1484 sec/batch\n", - "Epoch: 16/20... Training Step: 3069... Training loss: 1.2468... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 3070... Training loss: 1.2339... 0.1492 sec/batch\n", - "Epoch: 16/20... Training Step: 3071... Training loss: 1.2659... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3072... Training loss: 1.2518... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3073... Training loss: 1.2644... 0.1490 sec/batch\n", - "Epoch: 16/20... Training Step: 3074... Training loss: 1.2658... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3075... Training loss: 1.2621... 0.1485 sec/batch\n", - "Epoch: 16/20... Training Step: 3076... Training loss: 1.2610... 0.1487 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 16/20... Training Step: 3077... Training loss: 1.2671... 0.1484 sec/batch\n", - "Epoch: 16/20... Training Step: 3078... Training loss: 1.2724... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3079... Training loss: 1.2559... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3080... Training loss: 1.2791... 0.1496 sec/batch\n", - "Epoch: 16/20... Training Step: 3081... Training loss: 1.2506... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3082... Training loss: 1.2799... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3083... Training loss: 1.2730... 0.1494 sec/batch\n", - "Epoch: 16/20... Training Step: 3084... Training loss: 1.2610... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3085... Training loss: 1.2424... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3086... Training loss: 1.2255... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3087... Training loss: 1.2682... 0.1509 sec/batch\n", - "Epoch: 16/20... Training Step: 3088... Training loss: 1.2820... 0.1490 sec/batch\n", - "Epoch: 16/20... Training Step: 3089... Training loss: 1.2689... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3090... Training loss: 1.2662... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3091... Training loss: 1.2649... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3092... Training loss: 1.2338... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3093... Training loss: 1.2265... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3094... Training loss: 1.2652... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3095... Training loss: 1.2571... 0.1471 sec/batch\n", - "Epoch: 16/20... Training Step: 3096... Training loss: 1.2272... 0.1471 sec/batch\n", - "Epoch: 16/20... Training Step: 3097... Training loss: 1.2801... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3098... Training loss: 1.2687... 0.1476 sec/batch\n", - "Epoch: 16/20... Training Step: 3099... Training loss: 1.2507... 0.1471 sec/batch\n", - "Epoch: 16/20... Training Step: 3100... Training loss: 1.2304... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3101... Training loss: 1.2214... 0.1473 sec/batch\n", - "Epoch: 16/20... Training Step: 3102... Training loss: 1.2425... 0.1468 sec/batch\n", - "Epoch: 16/20... Training Step: 3103... Training loss: 1.2878... 0.1475 sec/batch\n", - "Epoch: 16/20... Training Step: 3104... Training loss: 1.2697... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3105... Training loss: 1.2705... 0.1475 sec/batch\n", - "Epoch: 16/20... Training Step: 3106... Training loss: 1.2575... 0.1484 sec/batch\n", - "Epoch: 16/20... Training Step: 3107... Training loss: 1.2935... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3108... Training loss: 1.2909... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3109... Training loss: 1.2696... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3110... Training loss: 1.2777... 0.1495 sec/batch\n", - "Epoch: 16/20... Training Step: 3111... Training loss: 1.3147... 0.1507 sec/batch\n", - "Epoch: 16/20... Training Step: 3112... Training loss: 1.2787... 0.1472 sec/batch\n", - "Epoch: 16/20... Training Step: 3113... Training loss: 1.2689... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3114... Training loss: 1.2995... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3115... Training loss: 1.2508... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3116... Training loss: 1.2866... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3117... Training loss: 1.2850... 0.1480 sec/batch\n", - "Epoch: 16/20... Training Step: 3118... Training loss: 1.2979... 0.1490 sec/batch\n", - "Epoch: 16/20... Training Step: 3119... Training loss: 1.2971... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3120... Training loss: 1.2631... 0.1476 sec/batch\n", - "Epoch: 16/20... Training Step: 3121... Training loss: 1.2376... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3122... Training loss: 1.2337... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3123... Training loss: 1.2724... 0.1494 sec/batch\n", - "Epoch: 16/20... Training Step: 3124... Training loss: 1.2581... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3125... Training loss: 1.2593... 0.1506 sec/batch\n", - "Epoch: 16/20... Training Step: 3126... Training loss: 1.2590... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3127... Training loss: 1.2681... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3128... Training loss: 1.2540... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3129... Training loss: 1.2367... 0.1486 sec/batch\n", - "Epoch: 16/20... Training Step: 3130... Training loss: 1.2850... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3131... Training loss: 1.2901... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3132... Training loss: 1.2640... 0.1491 sec/batch\n", - "Epoch: 16/20... Training Step: 3133... Training loss: 1.2629... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3134... Training loss: 1.2683... 0.1468 sec/batch\n", - "Epoch: 16/20... Training Step: 3135... Training loss: 1.2670... 0.1473 sec/batch\n", - "Epoch: 16/20... Training Step: 3136... Training loss: 1.2532... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3137... Training loss: 1.2838... 0.1501 sec/batch\n", - "Epoch: 16/20... Training Step: 3138... Training loss: 1.3269... 0.1470 sec/batch\n", - "Epoch: 16/20... Training Step: 3139... Training loss: 1.2731... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3140... Training loss: 1.2725... 0.1468 sec/batch\n", - "Epoch: 16/20... Training Step: 3141... Training loss: 1.2613... 0.1484 sec/batch\n", - "Epoch: 16/20... Training Step: 3142... Training loss: 1.2581... 0.1495 sec/batch\n", - "Epoch: 16/20... Training Step: 3143... Training loss: 1.2943... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3144... Training loss: 1.2689... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3145... Training loss: 1.2777... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3146... Training loss: 1.2370... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3147... Training loss: 1.2555... 0.1481 sec/batch\n", - "Epoch: 16/20... Training Step: 3148... Training loss: 1.2937... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3149... Training loss: 1.2456... 0.1498 sec/batch\n", - "Epoch: 16/20... Training Step: 3150... Training loss: 1.2408... 0.1479 sec/batch\n", - "Epoch: 16/20... Training Step: 3151... Training loss: 1.2460... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3152... Training loss: 1.2609... 0.1484 sec/batch\n", - "Epoch: 16/20... Training Step: 3153... Training loss: 1.2641... 0.1483 sec/batch\n", - "Epoch: 16/20... Training Step: 3154... Training loss: 1.2537... 0.1487 sec/batch\n", - "Epoch: 16/20... Training Step: 3155... Training loss: 1.2624... 0.1491 sec/batch\n", - "Epoch: 16/20... Training Step: 3156... Training loss: 1.2541... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3157... Training loss: 1.2874... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3158... Training loss: 1.2575... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3159... Training loss: 1.2596... 0.1488 sec/batch\n", - "Epoch: 16/20... Training Step: 3160... Training loss: 1.2605... 0.1489 sec/batch\n", - "Epoch: 16/20... Training Step: 3161... Training loss: 1.2383... 0.1486 sec/batch\n", - "Epoch: 16/20... Training Step: 3162... Training loss: 1.2447... 0.1482 sec/batch\n", - "Epoch: 16/20... Training Step: 3163... Training loss: 1.2610... 0.1478 sec/batch\n", - "Epoch: 16/20... Training Step: 3164... Training loss: 1.2421... 0.1477 sec/batch\n", - "Epoch: 16/20... Training Step: 3165... Training loss: 1.2242... 0.1510 sec/batch\n", - "Epoch: 16/20... Training Step: 3166... Training loss: 1.2652... 0.1474 sec/batch\n", - "Epoch: 16/20... Training Step: 3167... Training loss: 1.2565... 0.1501 sec/batch\n", - "Epoch: 16/20... Training Step: 3168... Training loss: 1.2520... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3169... Training loss: 1.3944... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3170... Training loss: 1.2867... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3171... Training loss: 1.2664... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3172... Training loss: 1.2796... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3173... Training loss: 1.2375... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3174... Training loss: 1.2220... 0.1513 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 17/20... Training Step: 3175... Training loss: 1.2730... 0.1476 sec/batch\n", - "Epoch: 17/20... Training Step: 3176... Training loss: 1.2584... 0.1496 sec/batch\n", - "Epoch: 17/20... Training Step: 3177... Training loss: 1.2777... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3178... Training loss: 1.2723... 0.1504 sec/batch\n", - "Epoch: 17/20... Training Step: 3179... Training loss: 1.2478... 0.1481 sec/batch\n", - "Epoch: 17/20... Training Step: 3180... Training loss: 1.2613... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3181... Training loss: 1.2692... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3182... Training loss: 1.2838... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3183... Training loss: 1.2395... 0.1488 sec/batch\n", - "Epoch: 17/20... Training Step: 3184... Training loss: 1.2392... 0.1489 sec/batch\n", - "Epoch: 17/20... Training Step: 3185... Training loss: 1.2700... 0.1508 sec/batch\n", - "Epoch: 17/20... Training Step: 3186... Training loss: 1.2845... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3187... Training loss: 1.2628... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3188... Training loss: 1.2942... 0.1488 sec/batch\n", - "Epoch: 17/20... Training Step: 3189... Training loss: 1.2471... 0.1481 sec/batch\n", - "Epoch: 17/20... Training Step: 3190... Training loss: 1.2715... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3191... Training loss: 1.2588... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3192... Training loss: 1.2839... 0.1499 sec/batch\n", - "Epoch: 17/20... Training Step: 3193... Training loss: 1.2576... 0.1475 sec/batch\n", - "Epoch: 17/20... Training Step: 3194... Training loss: 1.2239... 0.1490 sec/batch\n", - "Epoch: 17/20... Training Step: 3195... Training loss: 1.2296... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3196... Training loss: 1.2833... 0.1481 sec/batch\n", - "Epoch: 17/20... Training Step: 3197... Training loss: 1.2672... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3198... Training loss: 1.2792... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3199... Training loss: 1.2536... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3200... Training loss: 1.2266... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3201... Training loss: 1.2633... 0.1512 sec/batch\n", - "Epoch: 17/20... Training Step: 3202... Training loss: 1.2697... 0.1512 sec/batch\n", - "Epoch: 17/20... Training Step: 3203... Training loss: 1.2521... 0.1519 sec/batch\n", - "Epoch: 17/20... Training Step: 3204... Training loss: 1.2624... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3205... Training loss: 1.2322... 0.1510 sec/batch\n", - "Epoch: 17/20... Training Step: 3206... Training loss: 1.2141... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3207... Training loss: 1.2013... 0.1521 sec/batch\n", - "Epoch: 17/20... Training Step: 3208... Training loss: 1.2490... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3209... Training loss: 1.2370... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3210... Training loss: 1.2975... 0.1487 sec/batch\n", - "Epoch: 17/20... Training Step: 3211... Training loss: 1.2492... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3212... Training loss: 1.2299... 0.1470 sec/batch\n", - "Epoch: 17/20... Training Step: 3213... Training loss: 1.2702... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3214... Training loss: 1.2396... 0.1504 sec/batch\n", - "Epoch: 17/20... Training Step: 3215... Training loss: 1.2537... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3216... Training loss: 1.2523... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3217... Training loss: 1.2480... 0.1487 sec/batch\n", - "Epoch: 17/20... Training Step: 3218... Training loss: 1.2765... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3219... Training loss: 1.2269... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3220... Training loss: 1.2987... 0.1470 sec/batch\n", - "Epoch: 17/20... Training Step: 3221... Training loss: 1.2663... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3222... Training loss: 1.2730... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3223... Training loss: 1.2535... 0.1481 sec/batch\n", - "Epoch: 17/20... Training Step: 3224... Training loss: 1.2550... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3225... Training loss: 1.2602... 0.1471 sec/batch\n", - "Epoch: 17/20... Training Step: 3226... Training loss: 1.2524... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3227... Training loss: 1.2333... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3228... Training loss: 1.2938... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3229... Training loss: 1.2693... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3230... Training loss: 1.3012... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3231... Training loss: 1.2761... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3232... Training loss: 1.2677... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3233... Training loss: 1.2545... 0.1491 sec/batch\n", - "Epoch: 17/20... Training Step: 3234... Training loss: 1.2711... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3235... Training loss: 1.2862... 0.1476 sec/batch\n", - "Epoch: 17/20... Training Step: 3236... Training loss: 1.2490... 0.1503 sec/batch\n", - "Epoch: 17/20... Training Step: 3237... Training loss: 1.2658... 0.1489 sec/batch\n", - "Epoch: 17/20... Training Step: 3238... Training loss: 1.2483... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3239... Training loss: 1.2965... 0.1469 sec/batch\n", - "Epoch: 17/20... Training Step: 3240... Training loss: 1.2804... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3241... Training loss: 1.2982... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3242... Training loss: 1.2470... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3243... Training loss: 1.2674... 0.1485 sec/batch\n", - "Epoch: 17/20... Training Step: 3244... Training loss: 1.2862... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3245... Training loss: 1.2658... 0.1498 sec/batch\n", - "Epoch: 17/20... Training Step: 3246... Training loss: 1.2556... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3247... Training loss: 1.2074... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3248... Training loss: 1.2719... 0.1468 sec/batch\n", - "Epoch: 17/20... Training Step: 3249... Training loss: 1.2283... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3250... Training loss: 1.2687... 0.1514 sec/batch\n", - "Epoch: 17/20... Training Step: 3251... Training loss: 1.2389... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3252... Training loss: 1.2486... 0.1485 sec/batch\n", - "Epoch: 17/20... Training Step: 3253... Training loss: 1.2342... 0.1467 sec/batch\n", - "Epoch: 17/20... Training Step: 3254... Training loss: 1.2455... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3255... Training loss: 1.2330... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3256... Training loss: 1.2381... 0.1471 sec/batch\n", - "Epoch: 17/20... Training Step: 3257... Training loss: 1.2342... 0.1466 sec/batch\n", - "Epoch: 17/20... Training Step: 3258... Training loss: 1.2643... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3259... Training loss: 1.2361... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3260... Training loss: 1.2432... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3261... Training loss: 1.2291... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3262... Training loss: 1.2297... 0.1506 sec/batch\n", - "Epoch: 17/20... Training Step: 3263... Training loss: 1.2409... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3264... Training loss: 1.2674... 0.1512 sec/batch\n", - "Epoch: 17/20... Training Step: 3265... Training loss: 1.2687... 0.1491 sec/batch\n", - "Epoch: 17/20... Training Step: 3266... Training loss: 1.2129... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3267... Training loss: 1.2306... 0.1487 sec/batch\n", - "Epoch: 17/20... Training Step: 3268... Training loss: 1.2375... 0.1496 sec/batch\n", - "Epoch: 17/20... Training Step: 3269... Training loss: 1.2620... 0.1488 sec/batch\n", - "Epoch: 17/20... Training Step: 3270... Training loss: 1.2554... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3271... Training loss: 1.2493... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3272... Training loss: 1.2541... 0.1480 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 17/20... Training Step: 3273... Training loss: 1.2404... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3274... Training loss: 1.2471... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3275... Training loss: 1.2534... 0.1475 sec/batch\n", - "Epoch: 17/20... Training Step: 3276... Training loss: 1.2622... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3277... Training loss: 1.2507... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3278... Training loss: 1.2618... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3279... Training loss: 1.2445... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3280... Training loss: 1.2590... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3281... Training loss: 1.2557... 0.1493 sec/batch\n", - "Epoch: 17/20... Training Step: 3282... Training loss: 1.2503... 0.1492 sec/batch\n", - "Epoch: 17/20... Training Step: 3283... Training loss: 1.2273... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3284... Training loss: 1.2144... 0.1477 sec/batch\n", - "Epoch: 17/20... Training Step: 3285... Training loss: 1.2482... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3286... Training loss: 1.2597... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3287... Training loss: 1.2523... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3288... Training loss: 1.2556... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3289... Training loss: 1.2439... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3290... Training loss: 1.2091... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3291... Training loss: 1.2120... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3292... Training loss: 1.2524... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3293... Training loss: 1.2473... 0.1468 sec/batch\n", - "Epoch: 17/20... Training Step: 3294... Training loss: 1.2073... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3295... Training loss: 1.2578... 0.1489 sec/batch\n", - "Epoch: 17/20... Training Step: 3296... Training loss: 1.2583... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3297... Training loss: 1.2341... 0.1512 sec/batch\n", - "Epoch: 17/20... Training Step: 3298... Training loss: 1.2150... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3299... Training loss: 1.2046... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3300... Training loss: 1.2314... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3301... Training loss: 1.2773... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3302... Training loss: 1.2528... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3303... Training loss: 1.2616... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3304... Training loss: 1.2523... 0.1488 sec/batch\n", - "Epoch: 17/20... Training Step: 3305... Training loss: 1.2774... 0.1465 sec/batch\n", - "Epoch: 17/20... Training Step: 3306... Training loss: 1.2721... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3307... Training loss: 1.2672... 0.1470 sec/batch\n", - "Epoch: 17/20... Training Step: 3308... Training loss: 1.2635... 0.1493 sec/batch\n", - "Epoch: 17/20... Training Step: 3309... Training loss: 1.3069... 0.1491 sec/batch\n", - "Epoch: 17/20... Training Step: 3310... Training loss: 1.2739... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3311... Training loss: 1.2529... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3312... Training loss: 1.2870... 0.1475 sec/batch\n", - "Epoch: 17/20... Training Step: 3313... Training loss: 1.2346... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3314... Training loss: 1.2784... 0.1483 sec/batch\n", - "Epoch: 17/20... Training Step: 3315... Training loss: 1.2698... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3316... Training loss: 1.2892... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3317... Training loss: 1.2766... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3318... Training loss: 1.2479... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3319... Training loss: 1.2287... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3320... Training loss: 1.2281... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3321... Training loss: 1.2786... 0.1481 sec/batch\n", - "Epoch: 17/20... Training Step: 3322... Training loss: 1.2418... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3323... Training loss: 1.2536... 0.1487 sec/batch\n", - "Epoch: 17/20... Training Step: 3324... Training loss: 1.2449... 0.1496 sec/batch\n", - "Epoch: 17/20... Training Step: 3325... Training loss: 1.2561... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3326... Training loss: 1.2399... 0.1470 sec/batch\n", - "Epoch: 17/20... Training Step: 3327... Training loss: 1.2338... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3328... Training loss: 1.2742... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3329... Training loss: 1.2713... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3330... Training loss: 1.2623... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3331... Training loss: 1.2450... 0.1485 sec/batch\n", - "Epoch: 17/20... Training Step: 3332... Training loss: 1.2531... 0.1490 sec/batch\n", - "Epoch: 17/20... Training Step: 3333... Training loss: 1.2499... 0.1503 sec/batch\n", - "Epoch: 17/20... Training Step: 3334... Training loss: 1.2472... 0.1478 sec/batch\n", - "Epoch: 17/20... Training Step: 3335... Training loss: 1.2752... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3336... Training loss: 1.3160... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3337... Training loss: 1.2642... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3338... Training loss: 1.2565... 0.1471 sec/batch\n", - "Epoch: 17/20... Training Step: 3339... Training loss: 1.2519... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3340... Training loss: 1.2484... 0.1493 sec/batch\n", - "Epoch: 17/20... Training Step: 3341... Training loss: 1.2861... 0.1488 sec/batch\n", - "Epoch: 17/20... Training Step: 3342... Training loss: 1.2614... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3343... Training loss: 1.2533... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3344... Training loss: 1.2226... 0.1505 sec/batch\n", - "Epoch: 17/20... Training Step: 3345... Training loss: 1.2436... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3346... Training loss: 1.2840... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3347... Training loss: 1.2259... 0.1474 sec/batch\n", - "Epoch: 17/20... Training Step: 3348... Training loss: 1.2234... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3349... Training loss: 1.2409... 0.1479 sec/batch\n", - "Epoch: 17/20... Training Step: 3350... Training loss: 1.2546... 0.1482 sec/batch\n", - "Epoch: 17/20... Training Step: 3351... Training loss: 1.2443... 0.1508 sec/batch\n", - "Epoch: 17/20... Training Step: 3352... Training loss: 1.2354... 0.1473 sec/batch\n", - "Epoch: 17/20... Training Step: 3353... Training loss: 1.2474... 0.1475 sec/batch\n", - "Epoch: 17/20... Training Step: 3354... Training loss: 1.2273... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3355... Training loss: 1.2810... 0.1485 sec/batch\n", - "Epoch: 17/20... Training Step: 3356... Training loss: 1.2532... 0.1476 sec/batch\n", - "Epoch: 17/20... Training Step: 3357... Training loss: 1.2392... 0.1469 sec/batch\n", - "Epoch: 17/20... Training Step: 3358... Training loss: 1.2526... 0.1494 sec/batch\n", - "Epoch: 17/20... Training Step: 3359... Training loss: 1.2220... 0.1472 sec/batch\n", - "Epoch: 17/20... Training Step: 3360... Training loss: 1.2333... 0.1485 sec/batch\n", - "Epoch: 17/20... Training Step: 3361... Training loss: 1.2462... 0.1480 sec/batch\n", - "Epoch: 17/20... Training Step: 3362... Training loss: 1.2392... 0.1476 sec/batch\n", - "Epoch: 17/20... Training Step: 3363... Training loss: 1.2151... 0.1484 sec/batch\n", - "Epoch: 17/20... Training Step: 3364... Training loss: 1.2526... 0.1487 sec/batch\n", - "Epoch: 17/20... Training Step: 3365... Training loss: 1.2507... 0.1486 sec/batch\n", - "Epoch: 17/20... Training Step: 3366... Training loss: 1.2456... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3367... Training loss: 1.3820... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3368... Training loss: 1.2677... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3369... Training loss: 1.2394... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3370... Training loss: 1.2669... 0.1491 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 18/20... Training Step: 3371... Training loss: 1.2325... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3372... Training loss: 1.2207... 0.1480 sec/batch\n", - "Epoch: 18/20... Training Step: 3373... Training loss: 1.2490... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3374... Training loss: 1.2487... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3375... Training loss: 1.2643... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3376... Training loss: 1.2587... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3377... Training loss: 1.2436... 0.1510 sec/batch\n", - "Epoch: 18/20... Training Step: 3378... Training loss: 1.2519... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3379... Training loss: 1.2559... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3380... Training loss: 1.2648... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3381... Training loss: 1.2332... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3382... Training loss: 1.2333... 0.1489 sec/batch\n", - "Epoch: 18/20... Training Step: 3383... Training loss: 1.2707... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3384... Training loss: 1.2733... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3385... Training loss: 1.2555... 0.1472 sec/batch\n", - "Epoch: 18/20... Training Step: 3386... Training loss: 1.2789... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3387... Training loss: 1.2509... 0.1491 sec/batch\n", - "Epoch: 18/20... Training Step: 3388... Training loss: 1.2706... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3389... Training loss: 1.2479... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3390... Training loss: 1.2774... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3391... Training loss: 1.2601... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3392... Training loss: 1.2190... 0.1472 sec/batch\n", - "Epoch: 18/20... Training Step: 3393... Training loss: 1.2220... 0.1468 sec/batch\n", - "Epoch: 18/20... Training Step: 3394... Training loss: 1.2718... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3395... Training loss: 1.2613... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3396... Training loss: 1.2712... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3397... Training loss: 1.2408... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3398... Training loss: 1.2350... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3399... Training loss: 1.2499... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3400... Training loss: 1.2471... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3401... Training loss: 1.2411... 0.1535 sec/batch\n", - "Epoch: 18/20... Training Step: 3402... Training loss: 1.2537... 0.1531 sec/batch\n", - "Epoch: 18/20... Training Step: 3403... Training loss: 1.2349... 0.1516 sec/batch\n", - "Epoch: 18/20... Training Step: 3404... Training loss: 1.2097... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3405... Training loss: 1.1983... 0.1485 sec/batch\n", - "Epoch: 18/20... Training Step: 3406... Training loss: 1.2306... 0.1469 sec/batch\n", - "Epoch: 18/20... Training Step: 3407... Training loss: 1.2282... 0.1492 sec/batch\n", - "Epoch: 18/20... Training Step: 3408... Training loss: 1.2880... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3409... Training loss: 1.2372... 0.1513 sec/batch\n", - "Epoch: 18/20... Training Step: 3410... Training loss: 1.2246... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3411... Training loss: 1.2528... 0.1491 sec/batch\n", - "Epoch: 18/20... Training Step: 3412... Training loss: 1.2240... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3413... Training loss: 1.2354... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3414... Training loss: 1.2352... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3415... Training loss: 1.2493... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3416... Training loss: 1.2616... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3417... Training loss: 1.2255... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3418... Training loss: 1.2837... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3419... Training loss: 1.2491... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3420... Training loss: 1.2601... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3421... Training loss: 1.2361... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3422... Training loss: 1.2421... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3423... Training loss: 1.2700... 0.1492 sec/batch\n", - "Epoch: 18/20... Training Step: 3424... Training loss: 1.2362... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3425... Training loss: 1.2307... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3426... Training loss: 1.2856... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3427... Training loss: 1.2638... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3428... Training loss: 1.2862... 0.1504 sec/batch\n", - "Epoch: 18/20... Training Step: 3429... Training loss: 1.2636... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3430... Training loss: 1.2579... 0.1502 sec/batch\n", - "Epoch: 18/20... Training Step: 3431... Training loss: 1.2519... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3432... Training loss: 1.2562... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3433... Training loss: 1.2669... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3434... Training loss: 1.2337... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3435... Training loss: 1.2526... 0.1492 sec/batch\n", - "Epoch: 18/20... Training Step: 3436... Training loss: 1.2303... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3437... Training loss: 1.2858... 0.1514 sec/batch\n", - "Epoch: 18/20... Training Step: 3438... Training loss: 1.2746... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3439... Training loss: 1.2790... 0.1489 sec/batch\n", - "Epoch: 18/20... Training Step: 3440... Training loss: 1.2366... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3441... Training loss: 1.2471... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3442... Training loss: 1.2645... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3443... Training loss: 1.2486... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3444... Training loss: 1.2363... 0.1497 sec/batch\n", - "Epoch: 18/20... Training Step: 3445... Training loss: 1.2097... 0.1469 sec/batch\n", - "Epoch: 18/20... Training Step: 3446... Training loss: 1.2464... 0.1516 sec/batch\n", - "Epoch: 18/20... Training Step: 3447... Training loss: 1.2069... 0.1471 sec/batch\n", - "Epoch: 18/20... Training Step: 3448... Training loss: 1.2429... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3449... Training loss: 1.2185... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3450... Training loss: 1.2415... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3451... Training loss: 1.2215... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3452... Training loss: 1.2365... 0.1468 sec/batch\n", - "Epoch: 18/20... Training Step: 3453... Training loss: 1.2215... 0.1469 sec/batch\n", - "Epoch: 18/20... Training Step: 3454... Training loss: 1.2277... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3455... Training loss: 1.2146... 0.1469 sec/batch\n", - "Epoch: 18/20... Training Step: 3456... Training loss: 1.2452... 0.1497 sec/batch\n", - "Epoch: 18/20... Training Step: 3457... Training loss: 1.2278... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3458... Training loss: 1.2372... 0.1485 sec/batch\n", - "Epoch: 18/20... Training Step: 3459... Training loss: 1.2149... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3460... Training loss: 1.2174... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3461... Training loss: 1.2320... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3462... Training loss: 1.2589... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3463... Training loss: 1.2479... 0.1511 sec/batch\n", - "Epoch: 18/20... Training Step: 3464... Training loss: 1.1982... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3465... Training loss: 1.2277... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3466... Training loss: 1.2242... 0.1480 sec/batch\n", - "Epoch: 18/20... Training Step: 3467... Training loss: 1.2510... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3468... Training loss: 1.2339... 0.1478 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 18/20... Training Step: 3469... Training loss: 1.2463... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3470... Training loss: 1.2374... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3471... Training loss: 1.2280... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3472... Training loss: 1.2351... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3473... Training loss: 1.2462... 0.1507 sec/batch\n", - "Epoch: 18/20... Training Step: 3474... Training loss: 1.2373... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3475... Training loss: 1.2382... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3476... Training loss: 1.2594... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3477... Training loss: 1.2281... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3478... Training loss: 1.2492... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3479... Training loss: 1.2588... 0.1471 sec/batch\n", - "Epoch: 18/20... Training Step: 3480... Training loss: 1.2280... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3481... Training loss: 1.2197... 0.1485 sec/batch\n", - "Epoch: 18/20... Training Step: 3482... Training loss: 1.2021... 0.1490 sec/batch\n", - "Epoch: 18/20... Training Step: 3483... Training loss: 1.2429... 0.1464 sec/batch\n", - "Epoch: 18/20... Training Step: 3484... Training loss: 1.2487... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3485... Training loss: 1.2335... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3486... Training loss: 1.2386... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3487... Training loss: 1.2398... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3488... Training loss: 1.2061... 0.1490 sec/batch\n", - "Epoch: 18/20... Training Step: 3489... Training loss: 1.1985... 0.1496 sec/batch\n", - "Epoch: 18/20... Training Step: 3490... Training loss: 1.2431... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3491... Training loss: 1.2235... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3492... Training loss: 1.2015... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3493... Training loss: 1.2427... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3494... Training loss: 1.2389... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3495... Training loss: 1.2262... 0.1472 sec/batch\n", - "Epoch: 18/20... Training Step: 3496... Training loss: 1.2042... 0.1464 sec/batch\n", - "Epoch: 18/20... Training Step: 3497... Training loss: 1.1953... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3498... Training loss: 1.2273... 0.1495 sec/batch\n", - "Epoch: 18/20... Training Step: 3499... Training loss: 1.2603... 0.1499 sec/batch\n", - "Epoch: 18/20... Training Step: 3500... Training loss: 1.2456... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3501... Training loss: 1.2465... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3502... Training loss: 1.2488... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3503... Training loss: 1.2743... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3504... Training loss: 1.2607... 0.1475 sec/batch\n", - "Epoch: 18/20... Training Step: 3505... Training loss: 1.2414... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3506... Training loss: 1.2521... 0.1506 sec/batch\n", - "Epoch: 18/20... Training Step: 3507... Training loss: 1.2918... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3508... Training loss: 1.2602... 0.1499 sec/batch\n", - "Epoch: 18/20... Training Step: 3509... Training loss: 1.2436... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3510... Training loss: 1.2711... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3511... Training loss: 1.2192... 0.1480 sec/batch\n", - "Epoch: 18/20... Training Step: 3512... Training loss: 1.2664... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3513... Training loss: 1.2494... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3514... Training loss: 1.2716... 0.1468 sec/batch\n", - "Epoch: 18/20... Training Step: 3515... Training loss: 1.2639... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3516... Training loss: 1.2431... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3517... Training loss: 1.2091... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3518... Training loss: 1.2137... 0.1504 sec/batch\n", - "Epoch: 18/20... Training Step: 3519... Training loss: 1.2622... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3520... Training loss: 1.2283... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3521... Training loss: 1.2276... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3522... Training loss: 1.2409... 0.1480 sec/batch\n", - "Epoch: 18/20... Training Step: 3523... Training loss: 1.2467... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3524... Training loss: 1.2395... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3525... Training loss: 1.2117... 0.1476 sec/batch\n", - "Epoch: 18/20... Training Step: 3526... Training loss: 1.2636... 0.1469 sec/batch\n", - "Epoch: 18/20... Training Step: 3527... Training loss: 1.2677... 0.1491 sec/batch\n", - "Epoch: 18/20... Training Step: 3528... Training loss: 1.2521... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3529... Training loss: 1.2442... 0.1483 sec/batch\n", - "Epoch: 18/20... Training Step: 3530... Training loss: 1.2482... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3531... Training loss: 1.2342... 0.1481 sec/batch\n", - "Epoch: 18/20... Training Step: 3532... Training loss: 1.2434... 0.1466 sec/batch\n", - "Epoch: 18/20... Training Step: 3533... Training loss: 1.2553... 0.1468 sec/batch\n", - "Epoch: 18/20... Training Step: 3534... Training loss: 1.3007... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3535... Training loss: 1.2562... 0.1468 sec/batch\n", - "Epoch: 18/20... Training Step: 3536... Training loss: 1.2398... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3537... Training loss: 1.2400... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3538... Training loss: 1.2324... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3539... Training loss: 1.2682... 0.1503 sec/batch\n", - "Epoch: 18/20... Training Step: 3540... Training loss: 1.2469... 0.1495 sec/batch\n", - "Epoch: 18/20... Training Step: 3541... Training loss: 1.2493... 0.1490 sec/batch\n", - "Epoch: 18/20... Training Step: 3542... Training loss: 1.2098... 0.1493 sec/batch\n", - "Epoch: 18/20... Training Step: 3543... Training loss: 1.2296... 0.1495 sec/batch\n", - "Epoch: 18/20... Training Step: 3544... Training loss: 1.2631... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3545... Training loss: 1.2272... 0.1482 sec/batch\n", - "Epoch: 18/20... Training Step: 3546... Training loss: 1.2233... 0.1486 sec/batch\n", - "Epoch: 18/20... Training Step: 3547... Training loss: 1.2286... 0.1487 sec/batch\n", - "Epoch: 18/20... Training Step: 3548... Training loss: 1.2410... 0.1498 sec/batch\n", - "Epoch: 18/20... Training Step: 3549... Training loss: 1.2328... 0.1471 sec/batch\n", - "Epoch: 18/20... Training Step: 3550... Training loss: 1.2266... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3551... Training loss: 1.2275... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3552... Training loss: 1.2132... 0.1470 sec/batch\n", - "Epoch: 18/20... Training Step: 3553... Training loss: 1.2686... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3554... Training loss: 1.2348... 0.1484 sec/batch\n", - "Epoch: 18/20... Training Step: 3555... Training loss: 1.2281... 0.1494 sec/batch\n", - "Epoch: 18/20... Training Step: 3556... Training loss: 1.2369... 0.1474 sec/batch\n", - "Epoch: 18/20... Training Step: 3557... Training loss: 1.2106... 0.1478 sec/batch\n", - "Epoch: 18/20... Training Step: 3558... Training loss: 1.2260... 0.1479 sec/batch\n", - "Epoch: 18/20... Training Step: 3559... Training loss: 1.2445... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3560... Training loss: 1.2271... 0.1473 sec/batch\n", - "Epoch: 18/20... Training Step: 3561... Training loss: 1.2060... 0.1477 sec/batch\n", - "Epoch: 18/20... Training Step: 3562... Training loss: 1.2425... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3563... Training loss: 1.2326... 0.1488 sec/batch\n", - "Epoch: 18/20... Training Step: 3564... Training loss: 1.2208... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3565... Training loss: 1.3620... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3566... Training loss: 1.2557... 0.1468 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 19/20... Training Step: 3567... Training loss: 1.2384... 0.1494 sec/batch\n", - "Epoch: 19/20... Training Step: 3568... Training loss: 1.2587... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3569... Training loss: 1.2143... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3570... Training loss: 1.2050... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3571... Training loss: 1.2385... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3572... Training loss: 1.2453... 0.1517 sec/batch\n", - "Epoch: 19/20... Training Step: 3573... Training loss: 1.2494... 0.1493 sec/batch\n", - "Epoch: 19/20... Training Step: 3574... Training loss: 1.2538... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3575... Training loss: 1.2208... 0.1471 sec/batch\n", - "Epoch: 19/20... Training Step: 3576... Training loss: 1.2331... 0.1486 sec/batch\n", - "Epoch: 19/20... Training Step: 3577... Training loss: 1.2458... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3578... Training loss: 1.2431... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3579... Training loss: 1.2218... 0.1497 sec/batch\n", - "Epoch: 19/20... Training Step: 3580... Training loss: 1.2130... 0.1467 sec/batch\n", - "Epoch: 19/20... Training Step: 3581... Training loss: 1.2580... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3582... Training loss: 1.2633... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3583... Training loss: 1.2526... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3584... Training loss: 1.2684... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3585... Training loss: 1.2382... 0.1486 sec/batch\n", - "Epoch: 19/20... Training Step: 3586... Training loss: 1.2524... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3587... Training loss: 1.2335... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3588... Training loss: 1.2654... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3589... Training loss: 1.2404... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3590... Training loss: 1.2013... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3591... Training loss: 1.2032... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3592... Training loss: 1.2556... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3593... Training loss: 1.2477... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3594... Training loss: 1.2477... 0.1488 sec/batch\n", - "Epoch: 19/20... Training Step: 3595... Training loss: 1.2247... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3596... Training loss: 1.2147... 0.1486 sec/batch\n", - "Epoch: 19/20... Training Step: 3597... Training loss: 1.2401... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3598... Training loss: 1.2399... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3599... Training loss: 1.2241... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3600... Training loss: 1.2387... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3601... Training loss: 1.2158... 0.1512 sec/batch\n", - "Epoch: 19/20... Training Step: 3602... Training loss: 1.2014... 0.1528 sec/batch\n", - "Epoch: 19/20... Training Step: 3603... Training loss: 1.1994... 0.1515 sec/batch\n", - "Epoch: 19/20... Training Step: 3604... Training loss: 1.2255... 0.1501 sec/batch\n", - "Epoch: 19/20... Training Step: 3605... Training loss: 1.2095... 0.1490 sec/batch\n", - "Epoch: 19/20... Training Step: 3606... Training loss: 1.2797... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3607... Training loss: 1.2211... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3608... Training loss: 1.2161... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3609... Training loss: 1.2418... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3610... Training loss: 1.2169... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3611... Training loss: 1.2215... 0.1514 sec/batch\n", - "Epoch: 19/20... Training Step: 3612... Training loss: 1.2285... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3613... Training loss: 1.2377... 0.1508 sec/batch\n", - "Epoch: 19/20... Training Step: 3614... Training loss: 1.2573... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3615... Training loss: 1.2116... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3616... Training loss: 1.2691... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3617... Training loss: 1.2437... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3618... Training loss: 1.2514... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3619... Training loss: 1.2256... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3620... Training loss: 1.2381... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3621... Training loss: 1.2438... 0.1487 sec/batch\n", - "Epoch: 19/20... Training Step: 3622... Training loss: 1.2280... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3623... Training loss: 1.2109... 0.1508 sec/batch\n", - "Epoch: 19/20... Training Step: 3624... Training loss: 1.2874... 0.1486 sec/batch\n", - "Epoch: 19/20... Training Step: 3625... Training loss: 1.2452... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3626... Training loss: 1.2779... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3627... Training loss: 1.2526... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3628... Training loss: 1.2435... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3629... Training loss: 1.2312... 0.1490 sec/batch\n", - "Epoch: 19/20... Training Step: 3630... Training loss: 1.2472... 0.1503 sec/batch\n", - "Epoch: 19/20... Training Step: 3631... Training loss: 1.2553... 0.1480 sec/batch\n", - "Epoch: 19/20... Training Step: 3632... Training loss: 1.2265... 0.1498 sec/batch\n", - "Epoch: 19/20... Training Step: 3633... Training loss: 1.2400... 0.1480 sec/batch\n", - "Epoch: 19/20... Training Step: 3634... Training loss: 1.2313... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3635... Training loss: 1.2742... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3636... Training loss: 1.2517... 0.1465 sec/batch\n", - "Epoch: 19/20... Training Step: 3637... Training loss: 1.2634... 0.1512 sec/batch\n", - "Epoch: 19/20... Training Step: 3638... Training loss: 1.2148... 0.1488 sec/batch\n", - "Epoch: 19/20... Training Step: 3639... Training loss: 1.2354... 0.1498 sec/batch\n", - "Epoch: 19/20... Training Step: 3640... Training loss: 1.2529... 0.1496 sec/batch\n", - "Epoch: 19/20... Training Step: 3641... Training loss: 1.2325... 0.1503 sec/batch\n", - "Epoch: 19/20... Training Step: 3642... Training loss: 1.2273... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3643... Training loss: 1.1937... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3644... Training loss: 1.2439... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3645... Training loss: 1.2062... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3646... Training loss: 1.2387... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3647... Training loss: 1.2143... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3648... Training loss: 1.2297... 0.1488 sec/batch\n", - "Epoch: 19/20... Training Step: 3649... Training loss: 1.2156... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3650... Training loss: 1.2263... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3651... Training loss: 1.2125... 0.1491 sec/batch\n", - "Epoch: 19/20... Training Step: 3652... Training loss: 1.2196... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3653... Training loss: 1.2103... 0.1504 sec/batch\n", - "Epoch: 19/20... Training Step: 3654... Training loss: 1.2449... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3655... Training loss: 1.2243... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3656... Training loss: 1.2301... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3657... Training loss: 1.2053... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3658... Training loss: 1.2056... 0.1489 sec/batch\n", - "Epoch: 19/20... Training Step: 3659... Training loss: 1.2204... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3660... Training loss: 1.2419... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3661... Training loss: 1.2462... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3662... Training loss: 1.1937... 0.1465 sec/batch\n", - "Epoch: 19/20... Training Step: 3663... Training loss: 1.2207... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3664... Training loss: 1.2169... 0.1463 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 19/20... Training Step: 3665... Training loss: 1.2354... 0.1471 sec/batch\n", - "Epoch: 19/20... Training Step: 3666... Training loss: 1.2226... 0.1480 sec/batch\n", - "Epoch: 19/20... Training Step: 3667... Training loss: 1.2340... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3668... Training loss: 1.2251... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3669... Training loss: 1.2215... 0.1466 sec/batch\n", - "Epoch: 19/20... Training Step: 3670... Training loss: 1.2192... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3671... Training loss: 1.2418... 0.1494 sec/batch\n", - "Epoch: 19/20... Training Step: 3672... Training loss: 1.2358... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3673... Training loss: 1.2199... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3674... Training loss: 1.2588... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3675... Training loss: 1.2083... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3676... Training loss: 1.2264... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3677... Training loss: 1.2437... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3678... Training loss: 1.2257... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3679... Training loss: 1.2079... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3680... Training loss: 1.1940... 0.1465 sec/batch\n", - "Epoch: 19/20... Training Step: 3681... Training loss: 1.2293... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3682... Training loss: 1.2495... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3683... Training loss: 1.2298... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3684... Training loss: 1.2302... 0.1503 sec/batch\n", - "Epoch: 19/20... Training Step: 3685... Training loss: 1.2293... 0.1487 sec/batch\n", - "Epoch: 19/20... Training Step: 3686... Training loss: 1.1971... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3687... Training loss: 1.1924... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3688... Training loss: 1.2303... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3689... Training loss: 1.2284... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3690... Training loss: 1.1895... 0.1495 sec/batch\n", - "Epoch: 19/20... Training Step: 3691... Training loss: 1.2327... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3692... Training loss: 1.2326... 0.1475 sec/batch\n", - "Epoch: 19/20... Training Step: 3693... Training loss: 1.2133... 0.1489 sec/batch\n", - "Epoch: 19/20... Training Step: 3694... Training loss: 1.1992... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3695... Training loss: 1.1783... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3696... Training loss: 1.2209... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3697... Training loss: 1.2512... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3698... Training loss: 1.2347... 0.1471 sec/batch\n", - "Epoch: 19/20... Training Step: 3699... Training loss: 1.2354... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3700... Training loss: 1.2400... 0.1467 sec/batch\n", - "Epoch: 19/20... Training Step: 3701... Training loss: 1.2552... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3702... Training loss: 1.2561... 0.1496 sec/batch\n", - "Epoch: 19/20... Training Step: 3703... Training loss: 1.2464... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3704... Training loss: 1.2359... 0.1491 sec/batch\n", - "Epoch: 19/20... Training Step: 3705... Training loss: 1.2815... 0.1485 sec/batch\n", - "Epoch: 19/20... Training Step: 3706... Training loss: 1.2372... 0.1480 sec/batch\n", - "Epoch: 19/20... Training Step: 3707... Training loss: 1.2268... 0.1473 sec/batch\n", - "Epoch: 19/20... Training Step: 3708... Training loss: 1.2670... 0.1468 sec/batch\n", - "Epoch: 19/20... Training Step: 3709... Training loss: 1.2135... 0.1507 sec/batch\n", - "Epoch: 19/20... Training Step: 3710... Training loss: 1.2544... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3711... Training loss: 1.2474... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3712... Training loss: 1.2572... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3713... Training loss: 1.2603... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3714... Training loss: 1.2295... 0.1495 sec/batch\n", - "Epoch: 19/20... Training Step: 3715... Training loss: 1.2001... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3716... Training loss: 1.2062... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3717... Training loss: 1.2443... 0.1469 sec/batch\n", - "Epoch: 19/20... Training Step: 3718... Training loss: 1.2291... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3719... Training loss: 1.2235... 0.1489 sec/batch\n", - "Epoch: 19/20... Training Step: 3720... Training loss: 1.2269... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3721... Training loss: 1.2282... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3722... Training loss: 1.2181... 0.1486 sec/batch\n", - "Epoch: 19/20... Training Step: 3723... Training loss: 1.2051... 0.1502 sec/batch\n", - "Epoch: 19/20... Training Step: 3724... Training loss: 1.2620... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3725... Training loss: 1.2462... 0.1494 sec/batch\n", - "Epoch: 19/20... Training Step: 3726... Training loss: 1.2378... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3727... Training loss: 1.2275... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3728... Training loss: 1.2434... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3729... Training loss: 1.2347... 0.1482 sec/batch\n", - "Epoch: 19/20... Training Step: 3730... Training loss: 1.2281... 0.1500 sec/batch\n", - "Epoch: 19/20... Training Step: 3731... Training loss: 1.2605... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3732... Training loss: 1.2883... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3733... Training loss: 1.2489... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3734... Training loss: 1.2392... 0.1474 sec/batch\n", - "Epoch: 19/20... Training Step: 3735... Training loss: 1.2333... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3736... Training loss: 1.2260... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3737... Training loss: 1.2577... 0.1513 sec/batch\n", - "Epoch: 19/20... Training Step: 3738... Training loss: 1.2291... 0.1492 sec/batch\n", - "Epoch: 19/20... Training Step: 3739... Training loss: 1.2491... 0.1501 sec/batch\n", - "Epoch: 19/20... Training Step: 3740... Training loss: 1.2146... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3741... Training loss: 1.2145... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3742... Training loss: 1.2678... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3743... Training loss: 1.2129... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3744... Training loss: 1.2098... 0.1477 sec/batch\n", - "Epoch: 19/20... Training Step: 3745... Training loss: 1.2204... 0.1478 sec/batch\n", - "Epoch: 19/20... Training Step: 3746... Training loss: 1.2312... 0.1479 sec/batch\n", - "Epoch: 19/20... Training Step: 3747... Training loss: 1.2257... 0.1471 sec/batch\n", - "Epoch: 19/20... Training Step: 3748... Training loss: 1.2208... 0.1472 sec/batch\n", - "Epoch: 19/20... Training Step: 3749... Training loss: 1.2232... 0.1493 sec/batch\n", - "Epoch: 19/20... Training Step: 3750... Training loss: 1.2099... 0.1502 sec/batch\n", - "Epoch: 19/20... Training Step: 3751... Training loss: 1.2581... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3752... Training loss: 1.2183... 0.1470 sec/batch\n", - "Epoch: 19/20... Training Step: 3753... Training loss: 1.2243... 0.1481 sec/batch\n", - "Epoch: 19/20... Training Step: 3754... Training loss: 1.2315... 0.1487 sec/batch\n", - "Epoch: 19/20... Training Step: 3755... Training loss: 1.2058... 0.1483 sec/batch\n", - "Epoch: 19/20... Training Step: 3756... Training loss: 1.2122... 0.1499 sec/batch\n", - "Epoch: 19/20... Training Step: 3757... Training loss: 1.2345... 0.1488 sec/batch\n", - "Epoch: 19/20... Training Step: 3758... Training loss: 1.2124... 0.1496 sec/batch\n", - "Epoch: 19/20... Training Step: 3759... Training loss: 1.1955... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3760... Training loss: 1.2256... 0.1484 sec/batch\n", - "Epoch: 19/20... Training Step: 3761... Training loss: 1.2189... 0.1476 sec/batch\n", - "Epoch: 19/20... Training Step: 3762... Training loss: 1.2118... 0.1481 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 20/20... Training Step: 3763... Training loss: 1.3555... 0.1490 sec/batch\n", - "Epoch: 20/20... Training Step: 3764... Training loss: 1.2519... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3765... Training loss: 1.2305... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3766... Training loss: 1.2518... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3767... Training loss: 1.2007... 0.1499 sec/batch\n", - "Epoch: 20/20... Training Step: 3768... Training loss: 1.2055... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3769... Training loss: 1.2339... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3770... Training loss: 1.2224... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3771... Training loss: 1.2373... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3772... Training loss: 1.2310... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3773... Training loss: 1.2217... 0.1489 sec/batch\n", - "Epoch: 20/20... Training Step: 3774... Training loss: 1.2381... 0.1510 sec/batch\n", - "Epoch: 20/20... Training Step: 3775... Training loss: 1.2419... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3776... Training loss: 1.2415... 0.1503 sec/batch\n", - "Epoch: 20/20... Training Step: 3777... Training loss: 1.2131... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3778... Training loss: 1.2100... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3779... Training loss: 1.2463... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3780... Training loss: 1.2538... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3781... Training loss: 1.2331... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3782... Training loss: 1.2606... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3783... Training loss: 1.2269... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3784... Training loss: 1.2432... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3785... Training loss: 1.2203... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3786... Training loss: 1.2517... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3787... Training loss: 1.2335... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3788... Training loss: 1.1924... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3789... Training loss: 1.1993... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3790... Training loss: 1.2501... 0.1468 sec/batch\n", - "Epoch: 20/20... Training Step: 3791... Training loss: 1.2445... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3792... Training loss: 1.2472... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3793... Training loss: 1.2188... 0.1500 sec/batch\n", - "Epoch: 20/20... Training Step: 3794... Training loss: 1.2073... 0.1475 sec/batch\n", - "Epoch: 20/20... Training Step: 3795... Training loss: 1.2316... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3796... Training loss: 1.2236... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3797... Training loss: 1.2137... 0.1468 sec/batch\n", - "Epoch: 20/20... Training Step: 3798... Training loss: 1.2296... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3799... Training loss: 1.2090... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3800... Training loss: 1.1808... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3801... Training loss: 1.1805... 0.1518 sec/batch\n", - "Epoch: 20/20... Training Step: 3802... Training loss: 1.2263... 0.1520 sec/batch\n", - "Epoch: 20/20... Training Step: 3803... Training loss: 1.2027... 0.1516 sec/batch\n", - "Epoch: 20/20... Training Step: 3804... Training loss: 1.2632... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3805... Training loss: 1.2132... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3806... Training loss: 1.2011... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3807... Training loss: 1.2388... 0.1501 sec/batch\n", - "Epoch: 20/20... Training Step: 3808... Training loss: 1.1978... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3809... Training loss: 1.2149... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3810... Training loss: 1.2292... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3811... Training loss: 1.2241... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3812... Training loss: 1.2416... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3813... Training loss: 1.1988... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3814... Training loss: 1.2630... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3815... Training loss: 1.2269... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3816... Training loss: 1.2330... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3817... Training loss: 1.2187... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3818... Training loss: 1.2269... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3819... Training loss: 1.2310... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3820... Training loss: 1.2248... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3821... Training loss: 1.2128... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3822... Training loss: 1.2699... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3823... Training loss: 1.2343... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3824... Training loss: 1.2702... 0.1494 sec/batch\n", - "Epoch: 20/20... Training Step: 3825... Training loss: 1.2418... 0.1495 sec/batch\n", - "Epoch: 20/20... Training Step: 3826... Training loss: 1.2356... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3827... Training loss: 1.2258... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3828... Training loss: 1.2351... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3829... Training loss: 1.2383... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3830... Training loss: 1.2122... 0.1494 sec/batch\n", - "Epoch: 20/20... Training Step: 3831... Training loss: 1.2278... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3832... Training loss: 1.2113... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3833... Training loss: 1.2687... 0.1494 sec/batch\n", - "Epoch: 20/20... Training Step: 3834... Training loss: 1.2429... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3835... Training loss: 1.2583... 0.1498 sec/batch\n", - "Epoch: 20/20... Training Step: 3836... Training loss: 1.2088... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3837... Training loss: 1.2307... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3838... Training loss: 1.2492... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3839... Training loss: 1.2255... 0.1508 sec/batch\n", - "Epoch: 20/20... Training Step: 3840... Training loss: 1.2107... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3841... Training loss: 1.1838... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3842... Training loss: 1.2237... 0.1475 sec/batch\n", - "Epoch: 20/20... Training Step: 3843... Training loss: 1.1985... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3844... Training loss: 1.2229... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3845... Training loss: 1.2047... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3846... Training loss: 1.2164... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3847... Training loss: 1.1972... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3848... Training loss: 1.2263... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3849... Training loss: 1.2092... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3850... Training loss: 1.2111... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3851... Training loss: 1.1955... 0.1502 sec/batch\n", - "Epoch: 20/20... Training Step: 3852... Training loss: 1.2304... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3853... Training loss: 1.2039... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3854... Training loss: 1.2151... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3855... Training loss: 1.1955... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3856... Training loss: 1.1889... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3857... Training loss: 1.2217... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3858... Training loss: 1.2343... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3859... Training loss: 1.2273... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3860... Training loss: 1.1932... 0.1494 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 20/20... Training Step: 3861... Training loss: 1.2007... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3862... Training loss: 1.1959... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3863... Training loss: 1.2267... 0.1493 sec/batch\n", - "Epoch: 20/20... Training Step: 3864... Training loss: 1.2114... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3865... Training loss: 1.2234... 0.1468 sec/batch\n", - "Epoch: 20/20... Training Step: 3866... Training loss: 1.2173... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3867... Training loss: 1.2187... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3868... Training loss: 1.2237... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3869... Training loss: 1.2189... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3870... Training loss: 1.2217... 0.1475 sec/batch\n", - "Epoch: 20/20... Training Step: 3871... Training loss: 1.2078... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3872... Training loss: 1.2391... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3873... Training loss: 1.2167... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3874... Training loss: 1.2237... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3875... Training loss: 1.2267... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3876... Training loss: 1.2156... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3877... Training loss: 1.2038... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3878... Training loss: 1.1850... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3879... Training loss: 1.2276... 0.1489 sec/batch\n", - "Epoch: 20/20... Training Step: 3880... Training loss: 1.2270... 0.1489 sec/batch\n", - "Epoch: 20/20... Training Step: 3881... Training loss: 1.2192... 0.1490 sec/batch\n", - "Epoch: 20/20... Training Step: 3882... Training loss: 1.2264... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3883... Training loss: 1.2149... 0.1476 sec/batch\n", - "Epoch: 20/20... Training Step: 3884... Training loss: 1.1959... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3885... Training loss: 1.1726... 0.1475 sec/batch\n", - "Epoch: 20/20... Training Step: 3886... Training loss: 1.2180... 0.1494 sec/batch\n", - "Epoch: 20/20... Training Step: 3887... Training loss: 1.2038... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3888... Training loss: 1.1782... 0.1517 sec/batch\n", - "Epoch: 20/20... Training Step: 3889... Training loss: 1.2293... 0.1469 sec/batch\n", - "Epoch: 20/20... Training Step: 3890... Training loss: 1.2189... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3891... Training loss: 1.2015... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3892... Training loss: 1.1900... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3893... Training loss: 1.1796... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3894... Training loss: 1.2023... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3895... Training loss: 1.2399... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3896... Training loss: 1.2208... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3897... Training loss: 1.2286... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3898... Training loss: 1.2272... 0.1496 sec/batch\n", - "Epoch: 20/20... Training Step: 3899... Training loss: 1.2430... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3900... Training loss: 1.2484... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3901... Training loss: 1.2236... 0.1469 sec/batch\n", - "Epoch: 20/20... Training Step: 3902... Training loss: 1.2291... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3903... Training loss: 1.2705... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3904... Training loss: 1.2353... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3905... Training loss: 1.2173... 0.1501 sec/batch\n", - "Epoch: 20/20... Training Step: 3906... Training loss: 1.2562... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3907... Training loss: 1.2092... 0.1498 sec/batch\n", - "Epoch: 20/20... Training Step: 3908... Training loss: 1.2469... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3909... Training loss: 1.2355... 0.1490 sec/batch\n", - "Epoch: 20/20... Training Step: 3910... Training loss: 1.2479... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3911... Training loss: 1.2489... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3912... Training loss: 1.2219... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3913... Training loss: 1.1993... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3914... Training loss: 1.1994... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3915... Training loss: 1.2341... 0.1479 sec/batch\n", - "Epoch: 20/20... Training Step: 3916... Training loss: 1.2149... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3917... Training loss: 1.2107... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3918... Training loss: 1.2111... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3919... Training loss: 1.2203... 0.1490 sec/batch\n", - "Epoch: 20/20... Training Step: 3920... Training loss: 1.2154... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3921... Training loss: 1.1971... 0.1478 sec/batch\n", - "Epoch: 20/20... Training Step: 3922... Training loss: 1.2383... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3923... Training loss: 1.2396... 0.1483 sec/batch\n", - "Epoch: 20/20... Training Step: 3924... Training loss: 1.2338... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3925... Training loss: 1.2206... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3926... Training loss: 1.2254... 0.1501 sec/batch\n", - "Epoch: 20/20... Training Step: 3927... Training loss: 1.2200... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3928... Training loss: 1.2171... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3929... Training loss: 1.2431... 0.1474 sec/batch\n", - "Epoch: 20/20... Training Step: 3930... Training loss: 1.2823... 0.1465 sec/batch\n", - "Epoch: 20/20... Training Step: 3931... Training loss: 1.2324... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3932... Training loss: 1.2261... 0.1485 sec/batch\n", - "Epoch: 20/20... Training Step: 3933... Training loss: 1.2114... 0.1504 sec/batch\n", - "Epoch: 20/20... Training Step: 3934... Training loss: 1.2158... 0.1471 sec/batch\n", - "Epoch: 20/20... Training Step: 3935... Training loss: 1.2500... 0.1475 sec/batch\n", - "Epoch: 20/20... Training Step: 3936... Training loss: 1.2349... 0.1480 sec/batch\n", - "Epoch: 20/20... Training Step: 3937... Training loss: 1.2348... 0.1468 sec/batch\n", - "Epoch: 20/20... Training Step: 3938... Training loss: 1.1911... 0.1465 sec/batch\n", - "Epoch: 20/20... Training Step: 3939... Training loss: 1.2188... 0.1486 sec/batch\n", - "Epoch: 20/20... Training Step: 3940... Training loss: 1.2530... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3941... Training loss: 1.1968... 0.1512 sec/batch\n", - "Epoch: 20/20... Training Step: 3942... Training loss: 1.1948... 0.1495 sec/batch\n", - "Epoch: 20/20... Training Step: 3943... Training loss: 1.2086... 0.1487 sec/batch\n", - "Epoch: 20/20... Training Step: 3944... Training loss: 1.2236... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3945... Training loss: 1.2191... 0.1473 sec/batch\n", - "Epoch: 20/20... Training Step: 3946... Training loss: 1.2147... 0.1489 sec/batch\n", - "Epoch: 20/20... Training Step: 3947... Training loss: 1.2133... 0.1481 sec/batch\n", - "Epoch: 20/20... Training Step: 3948... Training loss: 1.2047... 0.1488 sec/batch\n", - "Epoch: 20/20... Training Step: 3949... Training loss: 1.2459... 0.1491 sec/batch\n", - "Epoch: 20/20... Training Step: 3950... Training loss: 1.2035... 0.1477 sec/batch\n", - "Epoch: 20/20... Training Step: 3951... Training loss: 1.2023... 0.1531 sec/batch\n", - "Epoch: 20/20... Training Step: 3952... Training loss: 1.2119... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3953... Training loss: 1.2010... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3954... Training loss: 1.1981... 0.1491 sec/batch\n", - "Epoch: 20/20... Training Step: 3955... Training loss: 1.2165... 0.1482 sec/batch\n", - "Epoch: 20/20... Training Step: 3956... Training loss: 1.2050... 0.1468 sec/batch\n", - "Epoch: 20/20... Training Step: 3957... Training loss: 1.1838... 0.1484 sec/batch\n", - "Epoch: 20/20... Training Step: 3958... Training loss: 1.2309... 0.1487 sec/batch\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 20/20... Training Step: 3959... Training loss: 1.2127... 0.1472 sec/batch\n", - "Epoch: 20/20... Training Step: 3960... Training loss: 1.2155... 0.1483 sec/batch\n" - ] - } - ], + "outputs": [], "source": [ "epochs = 20\n", "# Save every N iterations\n", @@ -5251,7 +1044,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.0" } }, "nbformat": 4, diff --git a/reinforcement/Q-learning-cart.ipynb b/reinforcement/Q-learning-cart.ipynb index 67c5fa2a47..4afd2c476c 100644 --- a/reinforcement/Q-learning-cart.ipynb +++ b/reinforcement/Q-learning-cart.ipynb @@ -2,10 +2,7 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Deep Q-learning\n", "\n", @@ -20,9 +17,7 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -41,11 +36,7 @@ { "cell_type": "code", "execution_count": 150, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -62,10 +53,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "We interact with the simulation through `env`. To show the simulation running, you can use `env.render()` to render one frame. Passing in an action as an integer to `env.step` will generate the next step in the simulation. You can see how many actions are possible from `env.action_space` and to get a random action you can use `env.action_space.sample()`. This is general to all Gym games. In the Cart-Pole game, there are two possible actions, moving the cart left or right. So there are two actions we can take, encoded as 0 and 1.\n", "\n", @@ -75,11 +63,7 @@ { "cell_type": "code", "execution_count": 152, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "env.reset()\n", @@ -95,20 +79,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "To shut the window showing the simulation, use `env.close()`." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "If you ran the simulation above, we can look at the rewards:" ] @@ -116,11 +94,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -136,20 +110,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The game resets after the pole has fallen past a certain angle. For each frame while the simulation is running, it returns a reward of 1.0. The longer the game runs, the more reward we get. Then, our network's goal is to maximize the reward by keeping the pole vertical. It will do this by moving the cart to the left and the right." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Q-Network\n", "\n", @@ -180,11 +148,7 @@ { "cell_type": "code", "execution_count": 153, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "class QNetwork:\n", @@ -221,10 +185,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Experience replay\n", "\n", @@ -239,9 +200,7 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -306,11 +265,7 @@ { "cell_type": "code", "execution_count": 174, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "train_episodes = 1000 # max number of episodes to learn from\n", @@ -336,9 +291,7 @@ "cell_type": "code", "execution_count": 175, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -348,10 +301,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Populate the experience memory\n", "\n", @@ -361,11 +311,7 @@ { "cell_type": "code", "execution_count": 179, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "# Initialize the simulation\n", @@ -402,10 +348,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Training\n", "\n", @@ -414,1019 +357,9 @@ }, { "cell_type": "code", - "execution_count": 180, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Episode: 1 Total reward: 17.0 Training loss: 1.1206 Explore P: 0.9983\n", - "Episode: 2 Total reward: 26.0 Training loss: 1.0218 Explore P: 0.9958\n", - "Episode: 3 Total reward: 25.0 Training loss: 1.0208 Explore P: 0.9933\n", - "Episode: 4 Total reward: 20.0 Training loss: 1.0127 Explore P: 0.9913\n", - "Episode: 5 Total reward: 14.0 Training loss: 0.9955 Explore P: 0.9900\n", - "Episode: 6 Total reward: 10.0 Training loss: 0.9772 Explore P: 0.9890\n", - "Episode: 7 Total reward: 15.0 Training loss: 1.0659 Explore P: 0.9875\n", - "Episode: 8 Total reward: 17.0 Training loss: 1.0213 Explore P: 0.9858\n", - "Episode: 9 Total reward: 36.0 Training loss: 0.9342 Explore P: 0.9823\n", - "Episode: 10 Total reward: 25.0 Training loss: 1.1546 Explore P: 0.9799\n", - "Episode: 11 Total reward: 34.0 Training loss: 1.0758 Explore P: 0.9766\n", - "Episode: 12 Total reward: 14.0 Training loss: 1.2110 Explore P: 0.9753\n", - "Episode: 13 Total reward: 15.0 Training loss: 1.1440 Explore P: 0.9738\n", - "Episode: 14 Total reward: 23.0 Training loss: 1.2101 Explore P: 0.9716\n", - "Episode: 15 Total reward: 25.0 Training loss: 1.1455 Explore P: 0.9692\n", - "Episode: 16 Total reward: 22.0 Training loss: 1.3338 Explore P: 0.9671\n", - "Episode: 17 Total reward: 22.0 Training loss: 1.4460 Explore P: 0.9650\n", - "Episode: 18 Total reward: 29.0 Training loss: 1.2165 Explore P: 0.9622\n", - "Episode: 19 Total reward: 17.0 Training loss: 1.3380 Explore P: 0.9606\n", - "Episode: 20 Total reward: 10.0 Training loss: 1.8483 Explore P: 0.9597\n", - "Episode: 21 Total reward: 36.0 Training loss: 1.5271 Explore P: 0.9562\n", - "Episode: 22 Total reward: 13.0 Training loss: 1.5226 Explore P: 0.9550\n", - "Episode: 23 Total reward: 23.0 Training loss: 2.0890 Explore P: 0.9528\n", - "Episode: 24 Total reward: 14.0 Training loss: 1.9238 Explore P: 0.9515\n", - "Episode: 25 Total reward: 10.0 Training loss: 2.4278 Explore P: 0.9506\n", - "Episode: 26 Total reward: 33.0 Training loss: 4.0854 Explore P: 0.9475\n", - "Episode: 27 Total reward: 15.0 Training loss: 2.5364 Explore P: 0.9461\n", - "Episode: 28 Total reward: 23.0 Training loss: 1.9061 Explore P: 0.9439\n", - "Episode: 29 Total reward: 10.0 Training loss: 2.1729 Explore P: 0.9430\n", - "Episode: 30 Total reward: 9.0 Training loss: 3.2109 Explore P: 0.9422\n", - "Episode: 31 Total reward: 9.0 Training loss: 2.1891 Explore P: 0.9413\n", - "Episode: 32 Total reward: 16.0 Training loss: 3.3236 Explore P: 0.9398\n", - "Episode: 33 Total reward: 32.0 Training loss: 3.2529 Explore P: 0.9369\n", - "Episode: 34 Total reward: 26.0 Training loss: 11.6666 Explore P: 0.9345\n", - "Episode: 35 Total reward: 11.0 Training loss: 2.8504 Explore P: 0.9334\n", - "Episode: 36 Total reward: 21.0 Training loss: 6.6362 Explore P: 0.9315\n", - "Episode: 37 Total reward: 24.0 Training loss: 5.3012 Explore P: 0.9293\n", - "Episode: 38 Total reward: 12.0 Training loss: 8.3892 Explore P: 0.9282\n", - "Episode: 39 Total reward: 18.0 Training loss: 4.8623 Explore P: 0.9265\n", - "Episode: 40 Total reward: 20.0 Training loss: 7.2531 Explore P: 0.9247\n", - "Episode: 41 Total reward: 16.0 Training loss: 12.5615 Explore P: 0.9232\n", - "Episode: 42 Total reward: 12.0 Training loss: 14.6787 Explore P: 0.9222\n", - "Episode: 43 Total reward: 15.0 Training loss: 13.1904 Explore P: 0.9208\n", - "Episode: 44 Total reward: 25.0 Training loss: 5.1145 Explore P: 0.9185\n", - "Episode: 45 Total reward: 16.0 Training loss: 13.5735 Explore P: 0.9171\n", - "Episode: 46 Total reward: 14.0 Training loss: 3.7813 Explore P: 0.9158\n", - "Episode: 47 Total reward: 21.0 Training loss: 2.7451 Explore P: 0.9139\n", - "Episode: 48 Total reward: 13.0 Training loss: 16.0633 Explore P: 0.9127\n", - "Episode: 49 Total reward: 17.0 Training loss: 5.5656 Explore P: 0.9112\n", - "Episode: 50 Total reward: 12.0 Training loss: 10.4045 Explore P: 0.9101\n", - "Episode: 51 Total reward: 9.0 Training loss: 3.8938 Explore P: 0.9093\n", - "Episode: 52 Total reward: 21.0 Training loss: 3.3927 Explore P: 0.9074\n", - "Episode: 53 Total reward: 18.0 Training loss: 8.4606 Explore P: 0.9058\n", - "Episode: 54 Total reward: 11.0 Training loss: 5.5103 Explore P: 0.9048\n", - "Episode: 55 Total reward: 13.0 Training loss: 8.2914 Explore P: 0.9036\n", - "Episode: 56 Total reward: 14.0 Training loss: 10.9073 Explore P: 0.9024\n", - "Episode: 57 Total reward: 11.0 Training loss: 46.1621 Explore P: 0.9014\n", - "Episode: 58 Total reward: 20.0 Training loss: 20.2280 Explore P: 0.8996\n", - "Episode: 59 Total reward: 17.0 Training loss: 27.6749 Explore P: 0.8981\n", - "Episode: 60 Total reward: 28.0 Training loss: 13.9478 Explore P: 0.8956\n", - "Episode: 61 Total reward: 23.0 Training loss: 7.4843 Explore P: 0.8936\n", - "Episode: 62 Total reward: 16.0 Training loss: 22.5546 Explore P: 0.8922\n", - "Episode: 63 Total reward: 10.0 Training loss: 3.4280 Explore P: 0.8913\n", - "Episode: 64 Total reward: 54.0 Training loss: 3.8070 Explore P: 0.8866\n", - "Episode: 65 Total reward: 16.0 Training loss: 24.4927 Explore P: 0.8852\n", - "Episode: 66 Total reward: 15.0 Training loss: 15.5157 Explore P: 0.8838\n", - "Episode: 67 Total reward: 19.0 Training loss: 4.2744 Explore P: 0.8822\n", - "Episode: 68 Total reward: 15.0 Training loss: 4.2704 Explore P: 0.8809\n", - "Episode: 69 Total reward: 11.0 Training loss: 43.5983 Explore P: 0.8799\n", - "Episode: 70 Total reward: 10.0 Training loss: 12.5133 Explore P: 0.8791\n", - "Episode: 71 Total reward: 17.0 Training loss: 21.0250 Explore P: 0.8776\n", - "Episode: 72 Total reward: 19.0 Training loss: 45.7504 Explore P: 0.8759\n", - "Episode: 73 Total reward: 14.0 Training loss: 3.6890 Explore P: 0.8747\n", - "Episode: 74 Total reward: 9.0 Training loss: 54.0030 Explore P: 0.8739\n", - "Episode: 75 Total reward: 23.0 Training loss: 18.6674 Explore P: 0.8720\n", - "Episode: 76 Total reward: 12.0 Training loss: 5.0799 Explore P: 0.8709\n", - "Episode: 77 Total reward: 10.0 Training loss: 36.8676 Explore P: 0.8701\n", - "Episode: 78 Total reward: 17.0 Training loss: 3.4120 Explore P: 0.8686\n", - "Episode: 79 Total reward: 21.0 Training loss: 3.9401 Explore P: 0.8668\n", - "Episode: 80 Total reward: 14.0 Training loss: 3.8938 Explore P: 0.8656\n", - "Episode: 81 Total reward: 16.0 Training loss: 4.6542 Explore P: 0.8642\n", - "Episode: 82 Total reward: 19.0 Training loss: 4.1615 Explore P: 0.8626\n", - "Episode: 83 Total reward: 10.0 Training loss: 3.5368 Explore P: 0.8618\n", - "Episode: 84 Total reward: 16.0 Training loss: 4.1609 Explore P: 0.8604\n", - "Episode: 85 Total reward: 16.0 Training loss: 4.2845 Explore P: 0.8590\n", - "Episode: 86 Total reward: 25.0 Training loss: 13.5774 Explore P: 0.8569\n", - "Episode: 87 Total reward: 12.0 Training loss: 45.5491 Explore P: 0.8559\n", - "Episode: 88 Total reward: 20.0 Training loss: 14.9611 Explore P: 0.8542\n", - "Episode: 89 Total reward: 11.0 Training loss: 54.5635 Explore P: 0.8533\n", - "Episode: 90 Total reward: 16.0 Training loss: 2.8360 Explore P: 0.8519\n", - "Episode: 91 Total reward: 12.0 Training loss: 27.1144 Explore P: 0.8509\n", - "Episode: 92 Total reward: 43.0 Training loss: 20.1494 Explore P: 0.8473\n", - "Episode: 93 Total reward: 21.0 Training loss: 3.7703 Explore P: 0.8456\n", - "Episode: 94 Total reward: 22.0 Training loss: 40.4261 Explore P: 0.8437\n", - "Episode: 95 Total reward: 23.0 Training loss: 36.6660 Explore P: 0.8418\n", - "Episode: 96 Total reward: 17.0 Training loss: 3.6217 Explore P: 0.8404\n", - "Episode: 97 Total reward: 8.0 Training loss: 53.6104 Explore P: 0.8397\n", - "Episode: 98 Total reward: 18.0 Training loss: 2.6480 Explore P: 0.8382\n", - "Episode: 99 Total reward: 17.0 Training loss: 17.7209 Explore P: 0.8368\n", - "Episode: 100 Total reward: 13.0 Training loss: 5.1901 Explore P: 0.8358\n", - "Episode: 101 Total reward: 9.0 Training loss: 18.6505 Explore P: 0.8350\n", - "Episode: 102 Total reward: 54.0 Training loss: 3.5657 Explore P: 0.8306\n", - "Episode: 103 Total reward: 18.0 Training loss: 27.4858 Explore P: 0.8291\n", - "Episode: 104 Total reward: 10.0 Training loss: 3.5050 Explore P: 0.8283\n", - "Episode: 105 Total reward: 14.0 Training loss: 32.5401 Explore P: 0.8271\n", - "Episode: 106 Total reward: 17.0 Training loss: 60.3308 Explore P: 0.8257\n", - "Episode: 107 Total reward: 19.0 Training loss: 71.2544 Explore P: 0.8242\n", - "Episode: 108 Total reward: 12.0 Training loss: 3.7398 Explore P: 0.8232\n", - "Episode: 109 Total reward: 28.0 Training loss: 2.6638 Explore P: 0.8209\n", - "Episode: 110 Total reward: 10.0 Training loss: 3.3126 Explore P: 0.8201\n", - "Episode: 111 Total reward: 29.0 Training loss: 3.2285 Explore P: 0.8178\n", - "Episode: 112 Total reward: 39.0 Training loss: 3.8231 Explore P: 0.8146\n", - "Episode: 113 Total reward: 18.0 Training loss: 18.1708 Explore P: 0.8132\n", - "Episode: 114 Total reward: 21.0 Training loss: 3.3717 Explore P: 0.8115\n", - "Episode: 115 Total reward: 16.0 Training loss: 63.7529 Explore P: 0.8102\n", - "Episode: 116 Total reward: 13.0 Training loss: 91.4296 Explore P: 0.8092\n", - "Episode: 117 Total reward: 18.0 Training loss: 19.4533 Explore P: 0.8078\n", - "Episode: 118 Total reward: 20.0 Training loss: 87.0660 Explore P: 0.8062\n", - "Episode: 119 Total reward: 17.0 Training loss: 2.4913 Explore P: 0.8048\n", - "Episode: 120 Total reward: 50.0 Training loss: 2.2268 Explore P: 0.8008\n", - "Episode: 121 Total reward: 16.0 Training loss: 34.5852 Explore P: 0.7996\n", - "Episode: 122 Total reward: 13.0 Training loss: 1.9908 Explore P: 0.7986\n", - "Episode: 123 Total reward: 9.0 Training loss: 85.5774 Explore P: 0.7978\n", - "Episode: 124 Total reward: 14.0 Training loss: 1.6783 Explore P: 0.7967\n", - "Episode: 125 Total reward: 19.0 Training loss: 1.9224 Explore P: 0.7953\n", - "Episode: 126 Total reward: 18.0 Training loss: 51.5488 Explore P: 0.7938\n", - "Episode: 127 Total reward: 18.0 Training loss: 45.5044 Explore P: 0.7924\n", - "Episode: 128 Total reward: 18.0 Training loss: 22.1692 Explore P: 0.7910\n", - "Episode: 129 Total reward: 12.0 Training loss: 1.6662 Explore P: 0.7901\n", - "Episode: 130 Total reward: 15.0 Training loss: 51.3073 Explore P: 0.7889\n", - "Episode: 131 Total reward: 13.0 Training loss: 25.1820 Explore P: 0.7879\n", - "Episode: 132 Total reward: 14.0 Training loss: 1.2954 Explore P: 0.7868\n", - "Episode: 133 Total reward: 24.0 Training loss: 49.3264 Explore P: 0.7850\n", - "Episode: 134 Total reward: 13.0 Training loss: 37.0957 Explore P: 0.7839\n", - "Episode: 135 Total reward: 55.0 Training loss: 1.5140 Explore P: 0.7797\n", - "Episode: 136 Total reward: 9.0 Training loss: 1.0697 Explore P: 0.7790\n", - "Episode: 137 Total reward: 11.0 Training loss: 19.1716 Explore P: 0.7782\n", - "Episode: 138 Total reward: 15.0 Training loss: 21.7645 Explore P: 0.7770\n", - "Episode: 139 Total reward: 19.0 Training loss: 34.5943 Explore P: 0.7756\n", - "Episode: 140 Total reward: 11.0 Training loss: 80.8147 Explore P: 0.7747\n", - "Episode: 141 Total reward: 16.0 Training loss: 1.2375 Explore P: 0.7735\n", - "Episode: 142 Total reward: 12.0 Training loss: 16.9596 Explore P: 0.7726\n", - "Episode: 143 Total reward: 11.0 Training loss: 1.0556 Explore P: 0.7717\n", - "Episode: 144 Total reward: 19.0 Training loss: 1.5200 Explore P: 0.7703\n", - "Episode: 145 Total reward: 31.0 Training loss: 1.7578 Explore P: 0.7679\n", - "Episode: 146 Total reward: 13.0 Training loss: 17.2973 Explore P: 0.7670\n", - "Episode: 147 Total reward: 22.0 Training loss: 20.3419 Explore P: 0.7653\n", - "Episode: 148 Total reward: 16.0 Training loss: 17.1969 Explore P: 0.7641\n", - "Episode: 149 Total reward: 14.0 Training loss: 18.0493 Explore P: 0.7630\n", - "Episode: 150 Total reward: 11.0 Training loss: 1.4657 Explore P: 0.7622\n", - "Episode: 151 Total reward: 14.0 Training loss: 77.1907 Explore P: 0.7611\n", - "Episode: 152 Total reward: 9.0 Training loss: 1.1595 Explore P: 0.7605\n", - "Episode: 153 Total reward: 22.0 Training loss: 1.6726 Explore P: 0.7588\n", - "Episode: 154 Total reward: 12.0 Training loss: 31.5019 Explore P: 0.7579\n", - "Episode: 155 Total reward: 12.0 Training loss: 17.4513 Explore P: 0.7570\n", - "Episode: 156 Total reward: 17.0 Training loss: 61.1833 Explore P: 0.7558\n", - "Episode: 157 Total reward: 19.0 Training loss: 28.9027 Explore P: 0.7543\n", - "Episode: 158 Total reward: 22.0 Training loss: 1.7281 Explore P: 0.7527\n", - "Episode: 159 Total reward: 11.0 Training loss: 0.8690 Explore P: 0.7519\n", - "Episode: 160 Total reward: 11.0 Training loss: 14.6552 Explore P: 0.7511\n", - "Episode: 161 Total reward: 23.0 Training loss: 0.9547 Explore P: 0.7494\n", - "Episode: 162 Total reward: 9.0 Training loss: 32.5453 Explore P: 0.7487\n", - "Episode: 163 Total reward: 13.0 Training loss: 0.9973 Explore P: 0.7477\n", - "Episode: 164 Total reward: 17.0 Training loss: 15.8071 Explore P: 0.7465\n", - "Episode: 165 Total reward: 14.0 Training loss: 16.6675 Explore P: 0.7455\n", - "Episode: 166 Total reward: 48.0 Training loss: 1.5696 Explore P: 0.7419\n", - "Episode: 167 Total reward: 22.0 Training loss: 27.5716 Explore P: 0.7403\n", - "Episode: 168 Total reward: 12.0 Training loss: 13.4423 Explore P: 0.7395\n", - "Episode: 169 Total reward: 12.0 Training loss: 29.9405 Explore P: 0.7386\n", - "Episode: 170 Total reward: 14.0 Training loss: 16.5667 Explore P: 0.7376\n", - "Episode: 171 Total reward: 13.0 Training loss: 1.3104 Explore P: 0.7366\n", - "Episode: 172 Total reward: 21.0 Training loss: 14.5656 Explore P: 0.7351\n", - "Episode: 173 Total reward: 11.0 Training loss: 20.1742 Explore P: 0.7343\n", - "Episode: 174 Total reward: 9.0 Training loss: 43.2411 Explore P: 0.7336\n", - "Episode: 175 Total reward: 43.0 Training loss: 26.1445 Explore P: 0.7305\n", - "Episode: 176 Total reward: 20.0 Training loss: 29.3663 Explore P: 0.7291\n", - "Episode: 177 Total reward: 28.0 Training loss: 1.2792 Explore P: 0.7271\n", - "Episode: 178 Total reward: 18.0 Training loss: 15.6953 Explore P: 0.7258\n", - "Episode: 179 Total reward: 13.0 Training loss: 0.9252 Explore P: 0.7249\n", - "Episode: 180 Total reward: 15.0 Training loss: 14.0913 Explore P: 0.7238\n", - "Episode: 181 Total reward: 10.0 Training loss: 11.9260 Explore P: 0.7231\n", - "Episode: 182 Total reward: 61.0 Training loss: 43.5261 Explore P: 0.7188\n", - "Episode: 183 Total reward: 23.0 Training loss: 14.8166 Explore P: 0.7171\n", - "Episode: 184 Total reward: 12.0 Training loss: 27.7430 Explore P: 0.7163\n", - "Episode: 185 Total reward: 9.0 Training loss: 1.1685 Explore P: 0.7156\n", - "Episode: 186 Total reward: 13.0 Training loss: 1.0672 Explore P: 0.7147\n", - "Episode: 187 Total reward: 9.0 Training loss: 1.0079 Explore P: 0.7141\n", - "Episode: 188 Total reward: 19.0 Training loss: 23.3360 Explore P: 0.7128\n", - "Episode: 189 Total reward: 27.0 Training loss: 14.8869 Explore P: 0.7109\n", - "Episode: 190 Total reward: 14.0 Training loss: 1.1450 Explore P: 0.7099\n", - "Episode: 191 Total reward: 11.0 Training loss: 24.4285 Explore P: 0.7091\n", - "Episode: 192 Total reward: 48.0 Training loss: 0.8194 Explore P: 0.7058\n", - "Episode: 193 Total reward: 52.0 Training loss: 0.7375 Explore P: 0.7022\n", - "Episode: 194 Total reward: 11.0 Training loss: 1.3513 Explore P: 0.7014\n", - "Episode: 195 Total reward: 9.0 Training loss: 28.9936 Explore P: 0.7008\n", - "Episode: 196 Total reward: 29.0 Training loss: 8.6707 Explore P: 0.6988\n", - "Episode: 197 Total reward: 21.0 Training loss: 1.0339 Explore P: 0.6973\n", - "Episode: 198 Total reward: 35.0 Training loss: 1.1917 Explore P: 0.6949\n", - "Episode: 199 Total reward: 20.0 Training loss: 1.8236 Explore P: 0.6936\n", - "Episode: 200 Total reward: 10.0 Training loss: 9.6862 Explore P: 0.6929\n", - "Episode: 201 Total reward: 12.0 Training loss: 23.2567 Explore P: 0.6921\n", - "Episode: 202 Total reward: 14.0 Training loss: 11.7447 Explore P: 0.6911\n", - "Episode: 203 Total reward: 12.0 Training loss: 31.3232 Explore P: 0.6903\n", - "Episode: 204 Total reward: 19.0 Training loss: 1.8621 Explore P: 0.6890\n", - "Episode: 205 Total reward: 15.0 Training loss: 25.3452 Explore P: 0.6880\n", - "Episode: 206 Total reward: 12.0 Training loss: 19.1190 Explore P: 0.6872\n", - "Episode: 207 Total reward: 12.0 Training loss: 1.5270 Explore P: 0.6863\n", - "Episode: 208 Total reward: 16.0 Training loss: 27.1424 Explore P: 0.6853\n", - "Episode: 209 Total reward: 14.0 Training loss: 17.0334 Explore P: 0.6843\n", - "Episode: 210 Total reward: 16.0 Training loss: 13.6669 Explore P: 0.6832\n", - "Episode: 211 Total reward: 33.0 Training loss: 15.1320 Explore P: 0.6810\n", - "Episode: 212 Total reward: 8.0 Training loss: 14.3394 Explore P: 0.6805\n", - "Episode: 213 Total reward: 57.0 Training loss: 22.5143 Explore P: 0.6767\n", - "Episode: 214 Total reward: 15.0 Training loss: 1.6825 Explore P: 0.6757\n", - "Episode: 215 Total reward: 9.0 Training loss: 15.6226 Explore P: 0.6751\n", - "Episode: 216 Total reward: 15.0 Training loss: 0.8208 Explore P: 0.6741\n", - "Episode: 217 Total reward: 19.0 Training loss: 25.8185 Explore P: 0.6728\n", - "Episode: 218 Total reward: 16.0 Training loss: 28.2378 Explore P: 0.6718\n", - "Episode: 219 Total reward: 21.0 Training loss: 33.2714 Explore P: 0.6704\n", - "Episode: 220 Total reward: 25.0 Training loss: 1.6306 Explore P: 0.6687\n", - "Episode: 221 Total reward: 20.0 Training loss: 27.6761 Explore P: 0.6674\n", - "Episode: 222 Total reward: 37.0 Training loss: 12.6685 Explore P: 0.6650\n", - "Episode: 223 Total reward: 16.0 Training loss: 0.6315 Explore P: 0.6639\n", - "Episode: 224 Total reward: 47.0 Training loss: 29.7206 Explore P: 0.6609\n", - "Episode: 225 Total reward: 24.0 Training loss: 1.2380 Explore P: 0.6593\n", - "Episode: 226 Total reward: 11.0 Training loss: 0.9398 Explore P: 0.6586\n", - "Episode: 227 Total reward: 109.0 Training loss: 19.1867 Explore P: 0.6516\n", - "Episode: 228 Total reward: 43.0 Training loss: 1.0905 Explore P: 0.6488\n", - "Episode: 229 Total reward: 56.0 Training loss: 1.1841 Explore P: 0.6452\n", - "Episode: 230 Total reward: 61.0 Training loss: 0.8106 Explore P: 0.6414\n", - "Episode: 231 Total reward: 21.0 Training loss: 1.2421 Explore P: 0.6401\n", - "Episode: 232 Total reward: 21.0 Training loss: 35.6267 Explore P: 0.6387\n", - "Episode: 233 Total reward: 16.0 Training loss: 24.2003 Explore P: 0.6377\n", - "Episode: 234 Total reward: 43.0 Training loss: 43.3695 Explore P: 0.6350\n", - "Episode: 235 Total reward: 39.0 Training loss: 9.6420 Explore P: 0.6326\n", - "Episode: 236 Total reward: 37.0 Training loss: 0.8997 Explore P: 0.6303\n", - "Episode: 237 Total reward: 22.0 Training loss: 12.0944 Explore P: 0.6289\n", - "Episode: 238 Total reward: 90.0 Training loss: 1.5722 Explore P: 0.6234\n", - "Episode: 239 Total reward: 31.0 Training loss: 10.4122 Explore P: 0.6215\n", - "Episode: 240 Total reward: 21.0 Training loss: 1.1064 Explore P: 0.6202\n", - "Episode: 241 Total reward: 12.0 Training loss: 1.7305 Explore P: 0.6195\n", - "Episode: 242 Total reward: 14.0 Training loss: 1.5095 Explore P: 0.6186\n", - "Episode: 243 Total reward: 21.0 Training loss: 38.0825 Explore P: 0.6173\n", - "Episode: 244 Total reward: 9.0 Training loss: 1.0369 Explore P: 0.6168\n", - "Episode: 245 Total reward: 18.0 Training loss: 14.9495 Explore P: 0.6157\n", - "Episode: 246 Total reward: 48.0 Training loss: 19.2856 Explore P: 0.6128\n", - "Episode: 247 Total reward: 20.0 Training loss: 1.0256 Explore P: 0.6116\n", - "Episode: 248 Total reward: 33.0 Training loss: 9.9197 Explore P: 0.6096\n", - "Episode: 249 Total reward: 73.0 Training loss: 9.3543 Explore P: 0.6053\n", - "Episode: 250 Total reward: 50.0 Training loss: 16.1099 Explore P: 0.6023\n", - "Episode: 251 Total reward: 31.0 Training loss: 43.6362 Explore P: 0.6005\n", - "Episode: 252 Total reward: 57.0 Training loss: 9.9603 Explore P: 0.5971\n", - "Episode: 253 Total reward: 21.0 Training loss: 1.4524 Explore P: 0.5959\n", - "Episode: 254 Total reward: 140.0 Training loss: 18.0943 Explore P: 0.5877\n", - "Episode: 255 Total reward: 27.0 Training loss: 10.9844 Explore P: 0.5862\n", - "Episode: 256 Total reward: 30.0 Training loss: 1.1315 Explore P: 0.5844\n", - "Episode: 257 Total reward: 27.0 Training loss: 2.8768 Explore P: 0.5829\n", - "Episode: 258 Total reward: 34.0 Training loss: 34.8262 Explore P: 0.5810\n", - "Episode: 259 Total reward: 20.0 Training loss: 16.5783 Explore P: 0.5798\n", - "Episode: 260 Total reward: 37.0 Training loss: 18.8078 Explore P: 0.5777\n", - "Episode: 261 Total reward: 23.0 Training loss: 1.1251 Explore P: 0.5764\n", - "Episode: 262 Total reward: 41.0 Training loss: 14.8541 Explore P: 0.5741\n", - "Episode: 263 Total reward: 31.0 Training loss: 21.0861 Explore P: 0.5723\n", - "Episode: 264 Total reward: 37.0 Training loss: 22.5903 Explore P: 0.5703\n", - "Episode: 265 Total reward: 16.0 Training loss: 19.8100 Explore P: 0.5694\n", - "Episode: 266 Total reward: 58.0 Training loss: 1.7579 Explore P: 0.5661\n", - "Episode: 267 Total reward: 43.0 Training loss: 26.5017 Explore P: 0.5637\n", - "Episode: 268 Total reward: 45.0 Training loss: 16.9318 Explore P: 0.5613\n", - "Episode: 269 Total reward: 61.0 Training loss: 58.1334 Explore P: 0.5579\n", - "Episode: 270 Total reward: 199.0 Training loss: 25.5074 Explore P: 0.5471\n", - "Episode: 271 Total reward: 57.0 Training loss: 9.2742 Explore P: 0.5441\n", - "Episode: 272 Total reward: 27.0 Training loss: 1.4723 Explore P: 0.5426\n", - "Episode: 273 Total reward: 41.0 Training loss: 2.2789 Explore P: 0.5404\n", - "Episode: 274 Total reward: 49.0 Training loss: 2.1655 Explore P: 0.5378\n", - "Episode: 275 Total reward: 16.0 Training loss: 28.9118 Explore P: 0.5370\n", - "Episode: 276 Total reward: 22.0 Training loss: 74.8766 Explore P: 0.5358\n", - "Episode: 277 Total reward: 36.0 Training loss: 1.6711 Explore P: 0.5340\n", - "Episode: 278 Total reward: 14.0 Training loss: 1.9132 Explore P: 0.5332\n", - "Episode: 279 Total reward: 12.0 Training loss: 15.4892 Explore P: 0.5326\n", - "Episode: 280 Total reward: 25.0 Training loss: 1.8606 Explore P: 0.5313\n", - "Episode: 281 Total reward: 32.0 Training loss: 1.1279 Explore P: 0.5296\n", - "Episode: 282 Total reward: 38.0 Training loss: 25.1395 Explore P: 0.5277\n", - "Episode: 283 Total reward: 52.0 Training loss: 1.7054 Explore P: 0.5250\n", - "Episode: 284 Total reward: 23.0 Training loss: 22.4826 Explore P: 0.5238\n", - "Episode: 285 Total reward: 61.0 Training loss: 61.2339 Explore P: 0.5207\n", - "Episode: 286 Total reward: 24.0 Training loss: 2.1378 Explore P: 0.5194\n", - "Episode: 287 Total reward: 41.0 Training loss: 0.8545 Explore P: 0.5174\n", - "Episode: 288 Total reward: 49.0 Training loss: 68.7279 Explore P: 0.5149\n", - "Episode: 289 Total reward: 30.0 Training loss: 14.0972 Explore P: 0.5134\n", - "Episode: 290 Total reward: 40.0 Training loss: 36.3917 Explore P: 0.5114\n", - "Episode: 291 Total reward: 22.0 Training loss: 1.6624 Explore P: 0.5102\n", - "Episode: 292 Total reward: 24.0 Training loss: 55.8627 Explore P: 0.5090\n", - "Episode: 293 Total reward: 62.0 Training loss: 21.5204 Explore P: 0.5060\n", - "Episode: 294 Total reward: 49.0 Training loss: 30.3661 Explore P: 0.5035\n", - "Episode: 295 Total reward: 68.0 Training loss: 32.2571 Explore P: 0.5002\n", - "Episode: 296 Total reward: 74.0 Training loss: 33.9799 Explore P: 0.4966\n", - "Episode: 297 Total reward: 81.0 Training loss: 17.8939 Explore P: 0.4927\n", - "Episode: 298 Total reward: 95.0 Training loss: 27.2488 Explore P: 0.4881\n", - "Episode: 299 Total reward: 39.0 Training loss: 31.0800 Explore P: 0.4862\n", - "Episode: 300 Total reward: 111.0 Training loss: 31.5917 Explore P: 0.4810\n", - "Episode: 301 Total reward: 76.0 Training loss: 26.1014 Explore P: 0.4774\n", - "Episode: 302 Total reward: 21.0 Training loss: 1.2520 Explore P: 0.4764\n", - "Episode: 303 Total reward: 30.0 Training loss: 2.3258 Explore P: 0.4750\n", - "Episode: 304 Total reward: 24.0 Training loss: 25.2338 Explore P: 0.4739\n", - "Episode: 305 Total reward: 64.0 Training loss: 2.1968 Explore P: 0.4710\n", - "Episode: 306 Total reward: 35.0 Training loss: 1.3668 Explore P: 0.4693\n", - "Episode: 307 Total reward: 52.0 Training loss: 1.6064 Explore P: 0.4670\n", - "Episode: 308 Total reward: 43.0 Training loss: 1.2549 Explore P: 0.4650\n", - "Episode: 309 Total reward: 42.0 Training loss: 2.0016 Explore P: 0.4631\n", - "Episode: 310 Total reward: 29.0 Training loss: 2.5409 Explore P: 0.4618\n", - "Episode: 311 Total reward: 47.0 Training loss: 26.7988 Explore P: 0.4597\n", - "Episode: 312 Total reward: 90.0 Training loss: 2.6183 Explore P: 0.4556\n", - "Episode: 313 Total reward: 63.0 Training loss: 13.4143 Explore P: 0.4528\n", - "Episode: 314 Total reward: 26.0 Training loss: 1.2748 Explore P: 0.4517\n", - "Episode: 315 Total reward: 92.0 Training loss: 29.4627 Explore P: 0.4476\n", - "Episode: 316 Total reward: 76.0 Training loss: 55.6863 Explore P: 0.4443\n", - "Episode: 317 Total reward: 22.0 Training loss: 1.5616 Explore P: 0.4434\n", - "Episode: 318 Total reward: 80.0 Training loss: 31.1706 Explore P: 0.4399\n", - "Episode: 319 Total reward: 37.0 Training loss: 24.4887 Explore P: 0.4383\n", - "Episode: 320 Total reward: 92.0 Training loss: 30.5411 Explore P: 0.4344\n", - "Episode: 321 Total reward: 83.0 Training loss: 2.8237 Explore P: 0.4309\n", - "Episode: 322 Total reward: 66.0 Training loss: 2.0921 Explore P: 0.4281\n", - "Episode: 323 Total reward: 131.0 Training loss: 1.9476 Explore P: 0.4227\n", - "Episode: 324 Total reward: 136.0 Training loss: 13.6442 Explore P: 0.4171\n", - "Episode: 325 Total reward: 91.0 Training loss: 2.5814 Explore P: 0.4134\n", - "Episode: 326 Total reward: 21.0 Training loss: 1.2230 Explore P: 0.4126\n", - "Episode: 327 Total reward: 78.0 Training loss: 3.0740 Explore P: 0.4095\n", - "Episode: 328 Total reward: 94.0 Training loss: 27.0074 Explore P: 0.4057\n", - "Episode: 329 Total reward: 29.0 Training loss: 55.2112 Explore P: 0.4046\n", - "Episode: 330 Total reward: 16.0 Training loss: 39.2272 Explore P: 0.4039\n", - "Episode: 331 Total reward: 74.0 Training loss: 2.4631 Explore P: 0.4010\n", - "Episode: 332 Total reward: 75.0 Training loss: 1.8856 Explore P: 0.3981\n", - "Episode: 333 Total reward: 74.0 Training loss: 2.2539 Explore P: 0.3953\n", - "Episode: 334 Total reward: 46.0 Training loss: 81.3527 Explore P: 0.3935\n", - "Episode: 335 Total reward: 199.0 Training loss: 2.3580 Explore P: 0.3859\n", - "Episode: 336 Total reward: 36.0 Training loss: 51.8734 Explore P: 0.3846\n", - "Episode: 337 Total reward: 69.0 Training loss: 85.6328 Explore P: 0.3820\n", - "Episode: 338 Total reward: 78.0 Training loss: 2.5133 Explore P: 0.3791\n", - "Episode: 339 Total reward: 78.0 Training loss: 29.1299 Explore P: 0.3762\n", - "Episode: 340 Total reward: 29.0 Training loss: 2.9610 Explore P: 0.3752\n", - "Episode: 341 Total reward: 65.0 Training loss: 23.1379 Explore P: 0.3728\n", - "Episode: 342 Total reward: 91.0 Training loss: 2.2797 Explore P: 0.3695\n", - "Episode: 343 Total reward: 80.0 Training loss: 9.5901 Explore P: 0.3667\n", - "Episode: 344 Total reward: 68.0 Training loss: 2.5456 Explore P: 0.3643\n", - "Episode: 345 Total reward: 97.0 Training loss: 24.8821 Explore P: 0.3608\n", - "Episode: 346 Total reward: 10.0 Training loss: 1.9749 Explore P: 0.3605\n", - "Episode: 347 Total reward: 101.0 Training loss: 39.8190 Explore P: 0.3570\n", - "Episode: 348 Total reward: 112.0 Training loss: 38.0154 Explore P: 0.3531\n", - "Episode: 349 Total reward: 54.0 Training loss: 1.9245 Explore P: 0.3512\n", - "Episode: 350 Total reward: 129.0 Training loss: 47.6669 Explore P: 0.3469\n", - "Episode: 351 Total reward: 73.0 Training loss: 37.4377 Explore P: 0.3444\n", - "Episode: 352 Total reward: 199.0 Training loss: 39.4635 Explore P: 0.3378\n", - "Episode: 353 Total reward: 199.0 Training loss: 3.2080 Explore P: 0.3314\n", - "Episode: 354 Total reward: 199.0 Training loss: 43.3151 Explore P: 0.3250\n", - "Episode: 355 Total reward: 199.0 Training loss: 54.4272 Explore P: 0.3188\n", - "Episode: 356 Total reward: 199.0 Training loss: 1.0675 Explore P: 0.3127\n", - "Episode: 357 Total reward: 46.0 Training loss: 2.9369 Explore P: 0.3114\n", - "Episode: 358 Total reward: 178.0 Training loss: 2.8638 Explore P: 0.3060\n", - "Episode: 359 Total reward: 75.0 Training loss: 96.8733 Explore P: 0.3038\n", - "Episode: 360 Total reward: 199.0 Training loss: 4.0032 Explore P: 0.2980\n", - "Episode: 361 Total reward: 134.0 Training loss: 1.7205 Explore P: 0.2942\n", - "Episode: 362 Total reward: 196.0 Training loss: 27.0177 Explore P: 0.2887\n", - "Episode: 363 Total reward: 178.0 Training loss: 1.4625 Explore P: 0.2838\n", - "Episode: 364 Total reward: 114.0 Training loss: 256.4741 Explore P: 0.2807\n", - "Episode: 365 Total reward: 181.0 Training loss: 2.1684 Explore P: 0.2758\n", - "Episode: 366 Total reward: 56.0 Training loss: 2.2329 Explore P: 0.2743\n", - "Episode: 367 Total reward: 78.0 Training loss: 1.7197 Explore P: 0.2723\n", - "Episode: 368 Total reward: 72.0 Training loss: 2.8824 Explore P: 0.2704\n", - "Episode: 369 Total reward: 99.0 Training loss: 2.0544 Explore P: 0.2678\n", - "Episode: 370 Total reward: 144.0 Training loss: 0.6920 Explore P: 0.2641\n", - "Episode: 371 Total reward: 115.0 Training loss: 81.5059 Explore P: 0.2612\n", - "Episode: 372 Total reward: 74.0 Training loss: 2.1665 Explore P: 0.2594\n", - "Episode: 373 Total reward: 81.0 Training loss: 1.7567 Explore P: 0.2574\n", - "Episode: 374 Total reward: 158.0 Training loss: 48.9284 Explore P: 0.2535\n", - "Episode: 375 Total reward: 93.0 Training loss: 74.1068 Explore P: 0.2512\n", - "Episode: 376 Total reward: 189.0 Training loss: 1.6984 Explore P: 0.2467\n", - "Episode: 377 Total reward: 102.0 Training loss: 1.7045 Explore P: 0.2443\n", - "Episode: 378 Total reward: 94.0 Training loss: 1.5877 Explore P: 0.2421\n", - "Episode: 379 Total reward: 60.0 Training loss: 1.5927 Explore P: 0.2407\n", - "Episode: 380 Total reward: 77.0 Training loss: 83.0749 Explore P: 0.2390\n", - "Episode: 381 Total reward: 71.0 Training loss: 36.4124 Explore P: 0.2374\n", - "Episode: 382 Total reward: 75.0 Training loss: 2.2654 Explore P: 0.2357\n", - "Episode: 383 Total reward: 43.0 Training loss: 3.6230 Explore P: 0.2347\n", - "Episode: 384 Total reward: 48.0 Training loss: 3.7998 Explore P: 0.2336\n", - "Episode: 385 Total reward: 42.0 Training loss: 63.1771 Explore P: 0.2327\n", - "Episode: 386 Total reward: 41.0 Training loss: 3.8370 Explore P: 0.2318\n", - "Episode: 387 Total reward: 61.0 Training loss: 3.0247 Explore P: 0.2304\n", - "Episode: 388 Total reward: 124.0 Training loss: 2.2461 Explore P: 0.2277\n", - "Episode: 389 Total reward: 56.0 Training loss: 2.7025 Explore P: 0.2265\n", - "Episode: 390 Total reward: 37.0 Training loss: 2.1066 Explore P: 0.2257\n", - "Episode: 391 Total reward: 53.0 Training loss: 2.6636 Explore P: 0.2245\n", - "Episode: 392 Total reward: 49.0 Training loss: 3.5180 Explore P: 0.2235\n", - "Episode: 393 Total reward: 16.0 Training loss: 2.0325 Explore P: 0.2232\n", - "Episode: 394 Total reward: 50.0 Training loss: 2.9087 Explore P: 0.2221\n", - "Episode: 395 Total reward: 53.0 Training loss: 3.0709 Explore P: 0.2210\n", - "Episode: 396 Total reward: 37.0 Training loss: 2.5012 Explore P: 0.2202\n", - "Episode: 397 Total reward: 43.0 Training loss: 2.0324 Explore P: 0.2193\n", - "Episode: 398 Total reward: 71.0 Training loss: 3.4181 Explore P: 0.2178\n", - "Episode: 399 Total reward: 120.0 Training loss: 2.0507 Explore P: 0.2153\n", - "Episode: 400 Total reward: 115.0 Training loss: 269.0092 Explore P: 0.2130\n", - "Episode: 401 Total reward: 82.0 Training loss: 2.9670 Explore P: 0.2113\n", - "Episode: 402 Total reward: 53.0 Training loss: 3.1322 Explore P: 0.2103\n", - "Episode: 403 Total reward: 53.0 Training loss: 2.5793 Explore P: 0.2092\n", - "Episode: 404 Total reward: 105.0 Training loss: 266.5888 Explore P: 0.2071\n", - "Episode: 405 Total reward: 58.0 Training loss: 1.9971 Explore P: 0.2060\n", - "Episode: 406 Total reward: 76.0 Training loss: 273.8277 Explore P: 0.2045\n", - "Episode: 407 Total reward: 52.0 Training loss: 1.8329 Explore P: 0.2035\n", - "Episode: 408 Total reward: 144.0 Training loss: 2.1835 Explore P: 0.2007\n", - "Episode: 409 Total reward: 196.0 Training loss: 1.6952 Explore P: 0.1970\n", - "Episode: 410 Total reward: 199.0 Training loss: 186.5703 Explore P: 0.1933\n", - "Episode: 411 Total reward: 97.0 Training loss: 2.1889 Explore P: 0.1916\n", - "Episode: 412 Total reward: 162.0 Training loss: 1.8270 Explore P: 0.1886\n", - "Episode: 413 Total reward: 199.0 Training loss: 1.5334 Explore P: 0.1851\n", - "Episode: 414 Total reward: 94.0 Training loss: 89.0644 Explore P: 0.1835\n", - "Episode: 415 Total reward: 172.0 Training loss: 177.3465 Explore P: 0.1805\n", - "Episode: 416 Total reward: 156.0 Training loss: 24.8199 Explore P: 0.1779\n", - "Episode: 417 Total reward: 120.0 Training loss: 203.1927 Explore P: 0.1759\n", - "Episode: 418 Total reward: 81.0 Training loss: 1.0007 Explore P: 0.1745\n", - "Episode: 419 Total reward: 177.0 Training loss: 1.1296 Explore P: 0.1717\n", - "Episode: 420 Total reward: 127.0 Training loss: 1.4790 Explore P: 0.1696\n", - "Episode: 421 Total reward: 123.0 Training loss: 148.7142 Explore P: 0.1677\n", - "Episode: 422 Total reward: 76.0 Training loss: 1.5432 Explore P: 0.1665\n", - "Episode: 423 Total reward: 109.0 Training loss: 0.9437 Explore P: 0.1648\n", - "Episode: 424 Total reward: 156.0 Training loss: 1.4428 Explore P: 0.1624\n", - "Episode: 425 Total reward: 183.0 Training loss: 1.0240 Explore P: 0.1596\n", - "Episode: 426 Total reward: 199.0 Training loss: 101.1221 Explore P: 0.1567\n", - "Episode: 427 Total reward: 189.0 Training loss: 0.6519 Explore P: 0.1539\n", - "Episode: 428 Total reward: 185.0 Training loss: 0.9902 Explore P: 0.1513\n", - "Episode: 429 Total reward: 112.0 Training loss: 346.1331 Explore P: 0.1497\n", - "Episode: 430 Total reward: 99.0 Training loss: 0.8713 Explore P: 0.1483\n", - "Episode: 431 Total reward: 199.0 Training loss: 1.9936 Explore P: 0.1456\n", - "Episode: 432 Total reward: 174.0 Training loss: 153.9807 Explore P: 0.1433\n", - "Episode: 433 Total reward: 199.0 Training loss: 150.5226 Explore P: 0.1406\n", - "Episode: 434 Total reward: 166.0 Training loss: 0.6997 Explore P: 0.1385\n", - "Episode: 435 Total reward: 150.0 Training loss: 1.1301 Explore P: 0.1366\n", - "Episode: 436 Total reward: 194.0 Training loss: 131.2808 Explore P: 0.1342\n", - "Episode: 437 Total reward: 194.0 Training loss: 0.9597 Explore P: 0.1318\n", - "Episode: 438 Total reward: 100.0 Training loss: 0.3289 Explore P: 0.1306\n", - "Episode: 439 Total reward: 199.0 Training loss: 0.5204 Explore P: 0.1282\n", - "Episode: 440 Total reward: 156.0 Training loss: 1.1504 Explore P: 0.1264\n", - "Episode: 441 Total reward: 100.0 Training loss: 0.4487 Explore P: 0.1252\n", - "Episode: 442 Total reward: 140.0 Training loss: 48.6918 Explore P: 0.1236\n", - "Episode: 443 Total reward: 105.0 Training loss: 1.0646 Explore P: 0.1224\n", - "Episode: 444 Total reward: 99.0 Training loss: 0.4097 Explore P: 0.1213\n", - "Episode: 445 Total reward: 133.0 Training loss: 0.5101 Explore P: 0.1198\n", - "Episode: 446 Total reward: 77.0 Training loss: 0.6569 Explore P: 0.1190\n", - "Episode: 447 Total reward: 119.0 Training loss: 0.4804 Explore P: 0.1177\n", - "Episode: 448 Total reward: 93.0 Training loss: 0.5663 Explore P: 0.1167\n", - "Episode: 449 Total reward: 122.0 Training loss: 0.4641 Explore P: 0.1154\n", - "Episode: 450 Total reward: 82.0 Training loss: 0.4969 Explore P: 0.1145\n", - "Episode: 451 Total reward: 83.0 Training loss: 0.3306 Explore P: 0.1137\n", - "Episode: 452 Total reward: 97.0 Training loss: 131.7292 Explore P: 0.1127\n", - "Episode: 453 Total reward: 98.0 Training loss: 0.2198 Explore P: 0.1117\n", - "Episode: 454 Total reward: 70.0 Training loss: 0.5275 Explore P: 0.1110\n", - "Episode: 455 Total reward: 93.0 Training loss: 0.7534 Explore P: 0.1100\n", - "Episode: 456 Total reward: 199.0 Training loss: 0.6405 Explore P: 0.1081\n", - "Episode: 457 Total reward: 180.0 Training loss: 15.9532 Explore P: 0.1063\n", - "Episode: 458 Total reward: 199.0 Training loss: 7.7940 Explore P: 0.1044\n", - "Episode: 459 Total reward: 99.0 Training loss: 0.2215 Explore P: 0.1035\n", - "Episode: 460 Total reward: 146.0 Training loss: 1.3165 Explore P: 0.1021\n", - "Episode: 461 Total reward: 68.0 Training loss: 0.7169 Explore P: 0.1015\n", - "Episode: 462 Total reward: 130.0 Training loss: 124.4015 Explore P: 0.1003\n", - "Episode: 463 Total reward: 154.0 Training loss: 0.3014 Explore P: 0.0989\n", - "Episode: 464 Total reward: 199.0 Training loss: 1.0692 Explore P: 0.0972\n", - "Episode: 465 Total reward: 115.0 Training loss: 0.8992 Explore P: 0.0962\n", - "Episode: 466 Total reward: 128.0 Training loss: 0.3343 Explore P: 0.0951\n", - "Episode: 467 Total reward: 199.0 Training loss: 0.3627 Explore P: 0.0934\n", - "Episode: 468 Total reward: 183.0 Training loss: 0.5766 Explore P: 0.0919\n", - "Episode: 469 Total reward: 199.0 Training loss: 1.0745 Explore P: 0.0903\n", - "Episode: 470 Total reward: 199.0 Training loss: 0.1914 Explore P: 0.0887\n", - "Episode: 471 Total reward: 91.0 Training loss: 0.9570 Explore P: 0.0880\n", - "Episode: 472 Total reward: 109.0 Training loss: 0.4631 Explore P: 0.0872\n", - "Episode: 473 Total reward: 199.0 Training loss: 0.4437 Explore P: 0.0856\n", - "Episode: 474 Total reward: 199.0 Training loss: 0.2497 Explore P: 0.0841\n", - "Episode: 475 Total reward: 199.0 Training loss: 0.1998 Explore P: 0.0827\n", - "Episode: 476 Total reward: 122.0 Training loss: 0.5317 Explore P: 0.0818\n", - "Episode: 477 Total reward: 199.0 Training loss: 0.2521 Explore P: 0.0804\n", - "Episode: 478 Total reward: 199.0 Training loss: 2.0478 Explore P: 0.0790\n", - "Episode: 479 Total reward: 199.0 Training loss: 0.5112 Explore P: 0.0776\n", - "Episode: 480 Total reward: 199.0 Training loss: 1.5277 Explore P: 0.0763\n", - "Episode: 481 Total reward: 199.0 Training loss: 0.5674 Explore P: 0.0750\n", - "Episode: 482 Total reward: 199.0 Training loss: 442.6313 Explore P: 0.0737\n", - "Episode: 483 Total reward: 143.0 Training loss: 0.3567 Explore P: 0.0728\n", - "Episode: 484 Total reward: 199.0 Training loss: 0.1174 Explore P: 0.0716\n", - "Episode: 485 Total reward: 199.0 Training loss: 0.2728 Explore P: 0.0704\n", - "Episode: 486 Total reward: 199.0 Training loss: 0.1145 Explore P: 0.0692\n", - "Episode: 487 Total reward: 120.0 Training loss: 0.3687 Explore P: 0.0685\n", - "Episode: 488 Total reward: 144.0 Training loss: 0.2456 Explore P: 0.0676\n", - "Episode: 489 Total reward: 199.0 Training loss: 0.2180 Explore P: 0.0665\n", - "Episode: 490 Total reward: 199.0 Training loss: 0.5846 Explore P: 0.0654\n", - "Episode: 491 Total reward: 199.0 Training loss: 0.2266 Explore P: 0.0643\n", - "Episode: 492 Total reward: 199.0 Training loss: 0.3522 Explore P: 0.0632\n", - "Episode: 493 Total reward: 199.0 Training loss: 0.5072 Explore P: 0.0622\n", - "Episode: 494 Total reward: 199.0 Training loss: 1.6455 Explore P: 0.0611\n", - "Episode: 495 Total reward: 199.0 Training loss: 0.3686 Explore P: 0.0601\n", - "Episode: 496 Total reward: 199.0 Training loss: 0.3517 Explore P: 0.0592\n", - "Episode: 497 Total reward: 199.0 Training loss: 3.3342 Explore P: 0.0582\n", - "Episode: 498 Total reward: 199.0 Training loss: 0.3798 Explore P: 0.0572\n", - "Episode: 499 Total reward: 199.0 Training loss: 0.3504 Explore P: 0.0563\n", - "Episode: 500 Total reward: 199.0 Training loss: 0.9721 Explore P: 0.0554\n", - "Episode: 501 Total reward: 199.0 Training loss: 0.4730 Explore P: 0.0545\n", - "Episode: 502 Total reward: 199.0 Training loss: 0.7233 Explore P: 0.0536\n", - "Episode: 503 Total reward: 199.0 Training loss: 0.5176 Explore P: 0.0528\n", - "Episode: 504 Total reward: 199.0 Training loss: 0.3896 Explore P: 0.0519\n", - "Episode: 505 Total reward: 199.0 Training loss: 0.2758 Explore P: 0.0511\n", - "Episode: 506 Total reward: 199.0 Training loss: 0.5355 Explore P: 0.0503\n", - "Episode: 507 Total reward: 199.0 Training loss: 0.6228 Explore P: 0.0495\n", - "Episode: 508 Total reward: 199.0 Training loss: 0.7505 Explore P: 0.0487\n", - "Episode: 509 Total reward: 199.0 Training loss: 0.3618 Explore P: 0.0479\n", - "Episode: 510 Total reward: 199.0 Training loss: 0.5037 Explore P: 0.0472\n", - "Episode: 511 Total reward: 199.0 Training loss: 0.3503 Explore P: 0.0465\n", - "Episode: 512 Total reward: 199.0 Training loss: 0.4080 Explore P: 0.0457\n", - "Episode: 513 Total reward: 199.0 Training loss: 0.3247 Explore P: 0.0450\n", - "Episode: 514 Total reward: 199.0 Training loss: 0.4891 Explore P: 0.0444\n", - "Episode: 515 Total reward: 132.0 Training loss: 0.2252 Explore P: 0.0439\n", - "Episode: 516 Total reward: 199.0 Training loss: 0.5318 Explore P: 0.0432\n", - "Episode: 517 Total reward: 199.0 Training loss: 0.4207 Explore P: 0.0426\n", - "Episode: 518 Total reward: 199.0 Training loss: 0.3851 Explore P: 0.0419\n", - "Episode: 519 Total reward: 199.0 Training loss: 0.3571 Explore P: 0.0413\n", - "Episode: 520 Total reward: 199.0 Training loss: 0.3445 Explore P: 0.0407\n", - "Episode: 521 Total reward: 191.0 Training loss: 0.3592 Explore P: 0.0401\n", - "Episode: 522 Total reward: 199.0 Training loss: 161.0777 Explore P: 0.0395\n", - "Episode: 523 Total reward: 148.0 Training loss: 0.2385 Explore P: 0.0391\n", - "Episode: 524 Total reward: 197.0 Training loss: 0.4102 Explore P: 0.0385\n", - "Episode: 525 Total reward: 115.0 Training loss: 0.5136 Explore P: 0.0382\n", - "Episode: 526 Total reward: 144.0 Training loss: 0.1992 Explore P: 0.0378\n", - "Episode: 527 Total reward: 199.0 Training loss: 0.5193 Explore P: 0.0372\n", - "Episode: 528 Total reward: 123.0 Training loss: 0.3048 Explore P: 0.0369\n", - "Episode: 529 Total reward: 107.0 Training loss: 265.0688 Explore P: 0.0366\n", - "Episode: 530 Total reward: 142.0 Training loss: 0.3150 Explore P: 0.0362\n", - "Episode: 531 Total reward: 88.0 Training loss: 0.3592 Explore P: 0.0360\n", - "Episode: 532 Total reward: 101.0 Training loss: 0.2379 Explore P: 0.0358\n", - "Episode: 533 Total reward: 109.0 Training loss: 0.2066 Explore P: 0.0355\n", - "Episode: 534 Total reward: 85.0 Training loss: 0.3785 Explore P: 0.0353\n", - "Episode: 535 Total reward: 170.0 Training loss: 0.3857 Explore P: 0.0348\n", - "Episode: 536 Total reward: 100.0 Training loss: 0.2388 Explore P: 0.0346\n", - "Episode: 537 Total reward: 173.0 Training loss: 0.3175 Explore P: 0.0342\n", - "Episode: 538 Total reward: 119.0 Training loss: 0.8095 Explore P: 0.0339\n", - "Episode: 539 Total reward: 149.0 Training loss: 0.4718 Explore P: 0.0335\n", - "Episode: 540 Total reward: 199.0 Training loss: 0.2053 Explore P: 0.0331\n", - "Episode: 541 Total reward: 92.0 Training loss: 0.2423 Explore P: 0.0329\n", - "Episode: 542 Total reward: 118.0 Training loss: 0.1868 Explore P: 0.0326\n", - "Episode: 543 Total reward: 102.0 Training loss: 0.2320 Explore P: 0.0324\n", - "Episode: 544 Total reward: 166.0 Training loss: 0.2058 Explore P: 0.0320\n", - "Episode: 545 Total reward: 199.0 Training loss: 0.2577 Explore P: 0.0316\n", - "Episode: 546 Total reward: 199.0 Training loss: 0.5126 Explore P: 0.0311\n", - "Episode: 547 Total reward: 197.0 Training loss: 0.4180 Explore P: 0.0307\n", - "Episode: 548 Total reward: 126.0 Training loss: 0.2888 Explore P: 0.0305\n", - "Episode: 549 Total reward: 140.0 Training loss: 0.1868 Explore P: 0.0302\n", - "Episode: 550 Total reward: 193.0 Training loss: 0.2071 Explore P: 0.0298\n", - "Episode: 551 Total reward: 199.0 Training loss: 0.1574 Explore P: 0.0294\n", - "Episode: 552 Total reward: 199.0 Training loss: 0.3217 Explore P: 0.0290\n", - "Episode: 553 Total reward: 129.0 Training loss: 66.6600 Explore P: 0.0288\n", - "Episode: 554 Total reward: 198.0 Training loss: 0.2420 Explore P: 0.0284\n", - "Episode: 555 Total reward: 199.0 Training loss: 0.2514 Explore P: 0.0280\n", - "Episode: 556 Total reward: 153.0 Training loss: 0.2211 Explore P: 0.0278\n", - "Episode: 557 Total reward: 169.0 Training loss: 0.2779 Explore P: 0.0275\n", - "Episode: 558 Total reward: 199.0 Training loss: 0.1513 Explore P: 0.0271\n", - "Episode: 559 Total reward: 150.0 Training loss: 0.1887 Explore P: 0.0269\n", - "Episode: 560 Total reward: 199.0 Training loss: 0.3669 Explore P: 0.0265\n", - "Episode: 561 Total reward: 199.0 Training loss: 0.1444 Explore P: 0.0262\n", - "Episode: 562 Total reward: 199.0 Training loss: 0.2820 Explore P: 0.0259\n", - "Episode: 563 Total reward: 199.0 Training loss: 0.1695 Explore P: 0.0256\n", - "Episode: 564 Total reward: 199.0 Training loss: 0.1208 Explore P: 0.0253\n", - "Episode: 565 Total reward: 181.0 Training loss: 0.3566 Explore P: 0.0250\n", - "Episode: 566 Total reward: 134.0 Training loss: 19.3122 Explore P: 0.0248\n", - "Episode: 567 Total reward: 199.0 Training loss: 0.4493 Explore P: 0.0245\n", - "Episode: 568 Total reward: 199.0 Training loss: 0.1615 Explore P: 0.0242\n", - "Episode: 569 Total reward: 199.0 Training loss: 0.2993 Explore P: 0.0239\n", - "Episode: 570 Total reward: 199.0 Training loss: 0.2165 Explore P: 0.0237\n", - "Episode: 571 Total reward: 199.0 Training loss: 0.1909 Explore P: 0.0234\n", - "Episode: 572 Total reward: 199.0 Training loss: 0.1677 Explore P: 0.0231\n", - "Episode: 573 Total reward: 199.0 Training loss: 0.2876 Explore P: 0.0229\n", - "Episode: 574 Total reward: 199.0 Training loss: 0.1348 Explore P: 0.0226\n", - "Episode: 575 Total reward: 199.0 Training loss: 0.2104 Explore P: 0.0224\n", - "Episode: 576 Total reward: 199.0 Training loss: 0.1656 Explore P: 0.0221\n", - "Episode: 577 Total reward: 187.0 Training loss: 0.0771 Explore P: 0.0219\n", - "Episode: 578 Total reward: 199.0 Training loss: 0.0697 Explore P: 0.0217\n", - "Episode: 579 Total reward: 199.0 Training loss: 0.1110 Explore P: 0.0214\n", - "Episode: 580 Total reward: 199.0 Training loss: 0.2245 Explore P: 0.0212\n", - "Episode: 581 Total reward: 199.0 Training loss: 0.1553 Explore P: 0.0210\n", - "Episode: 582 Total reward: 199.0 Training loss: 0.1718 Explore P: 0.0208\n", - "Episode: 583 Total reward: 199.0 Training loss: 0.1715 Explore P: 0.0206\n", - "Episode: 584 Total reward: 173.0 Training loss: 0.1197 Explore P: 0.0204\n", - "Episode: 585 Total reward: 199.0 Training loss: 0.1747 Explore P: 0.0202\n", - "Episode: 586 Total reward: 199.0 Training loss: 0.1082 Explore P: 0.0200\n", - "Episode: 587 Total reward: 164.0 Training loss: 0.1912 Explore P: 0.0198\n", - "Episode: 588 Total reward: 199.0 Training loss: 0.1618 Explore P: 0.0196\n", - "Episode: 589 Total reward: 199.0 Training loss: 0.2976 Explore P: 0.0194\n", - "Episode: 590 Total reward: 199.0 Training loss: 0.1695 Explore P: 0.0192\n", - "Episode: 591 Total reward: 199.0 Training loss: 0.2299 Explore P: 0.0191\n", - "Episode: 592 Total reward: 199.0 Training loss: 0.2095 Explore P: 0.0189\n", - "Episode: 593 Total reward: 199.0 Training loss: 0.2393 Explore P: 0.0187\n", - "Episode: 594 Total reward: 199.0 Training loss: 0.1403 Explore P: 0.0185\n", - "Episode: 595 Total reward: 187.0 Training loss: 0.3047 Explore P: 0.0184\n", - "Episode: 596 Total reward: 193.0 Training loss: 0.2958 Explore P: 0.0182\n", - "Episode: 597 Total reward: 107.0 Training loss: 0.2838 Explore P: 0.0181\n", - "Episode: 598 Total reward: 156.0 Training loss: 264.9241 Explore P: 0.0180\n", - "Episode: 599 Total reward: 199.0 Training loss: 121.2079 Explore P: 0.0178\n", - "Episode: 600 Total reward: 163.0 Training loss: 0.1022 Explore P: 0.0177\n", - "Episode: 601 Total reward: 98.0 Training loss: 268.8141 Explore P: 0.0176\n", - "Episode: 602 Total reward: 154.0 Training loss: 0.2155 Explore P: 0.0175\n", - "Episode: 603 Total reward: 199.0 Training loss: 0.1730 Explore P: 0.0174\n", - "Episode: 604 Total reward: 175.0 Training loss: 0.3001 Explore P: 0.0173\n", - "Episode: 605 Total reward: 199.0 Training loss: 0.2370 Explore P: 0.0171\n", - "Episode: 606 Total reward: 164.0 Training loss: 0.2679 Explore P: 0.0170\n", - "Episode: 607 Total reward: 199.0 Training loss: 0.1331 Explore P: 0.0169\n", - "Episode: 608 Total reward: 175.0 Training loss: 0.1852 Explore P: 0.0167\n", - "Episode: 609 Total reward: 199.0 Training loss: 0.1131 Explore P: 0.0166\n", - "Episode: 610 Total reward: 199.0 Training loss: 0.2251 Explore P: 0.0165\n", - "Episode: 611 Total reward: 175.0 Training loss: 0.2115 Explore P: 0.0164\n", - "Episode: 612 Total reward: 199.0 Training loss: 0.1187 Explore P: 0.0162\n", - "Episode: 613 Total reward: 199.0 Training loss: 0.2060 Explore P: 0.0161\n", - "Episode: 614 Total reward: 135.0 Training loss: 0.0743 Explore P: 0.0160\n", - "Episode: 615 Total reward: 169.0 Training loss: 0.2691 Explore P: 0.0159\n", - "Episode: 616 Total reward: 111.0 Training loss: 0.1496 Explore P: 0.0159\n", - "Episode: 617 Total reward: 199.0 Training loss: 0.3814 Explore P: 0.0158\n", - "Episode: 618 Total reward: 125.0 Training loss: 0.3321 Explore P: 0.0157\n", - "Episode: 619 Total reward: 127.0 Training loss: 276.1676 Explore P: 0.0156\n", - "Episode: 620 Total reward: 199.0 Training loss: 279.7686 Explore P: 0.0155\n", - "Episode: 621 Total reward: 199.0 Training loss: 0.1653 Explore P: 0.0154\n", - "Episode: 622 Total reward: 94.0 Training loss: 0.2949 Explore P: 0.0153\n", - "Episode: 623 Total reward: 199.0 Training loss: 0.2496 Explore P: 0.0152\n", - "Episode: 624 Total reward: 125.0 Training loss: 0.2326 Explore P: 0.0152\n", - "Episode: 625 Total reward: 137.0 Training loss: 0.1459 Explore P: 0.0151\n", - "Episode: 626 Total reward: 182.0 Training loss: 0.1479 Explore P: 0.0150\n", - "Episode: 627 Total reward: 199.0 Training loss: 0.2285 Explore P: 0.0149\n", - "Episode: 628 Total reward: 199.0 Training loss: 0.1959 Explore P: 0.0148\n", - "Episode: 629 Total reward: 199.0 Training loss: 0.1892 Explore P: 0.0147\n", - "Episode: 630 Total reward: 199.0 Training loss: 0.2210 Explore P: 0.0146\n", - "Episode: 631 Total reward: 199.0 Training loss: 0.1756 Explore P: 0.0145\n", - "Episode: 632 Total reward: 199.0 Training loss: 0.2079 Explore P: 0.0144\n", - "Episode: 633 Total reward: 199.0 Training loss: 0.2662 Explore P: 0.0144\n", - "Episode: 634 Total reward: 199.0 Training loss: 0.1618 Explore P: 0.0143\n", - "Episode: 635 Total reward: 199.0 Training loss: 0.0950 Explore P: 0.0142\n", - "Episode: 636 Total reward: 199.0 Training loss: 0.3159 Explore P: 0.0141\n", - "Episode: 637 Total reward: 199.0 Training loss: 0.1576 Explore P: 0.0140\n", - "Episode: 638 Total reward: 199.0 Training loss: 0.2316 Explore P: 0.0139\n", - "Episode: 639 Total reward: 199.0 Training loss: 0.1536 Explore P: 0.0139\n", - "Episode: 640 Total reward: 199.0 Training loss: 0.1452 Explore P: 0.0138\n", - "Episode: 641 Total reward: 199.0 Training loss: 0.1300 Explore P: 0.0137\n", - "Episode: 642 Total reward: 199.0 Training loss: 0.1305 Explore P: 0.0136\n", - "Episode: 643 Total reward: 199.0 Training loss: 0.1132 Explore P: 0.0136\n", - "Episode: 644 Total reward: 199.0 Training loss: 0.1014 Explore P: 0.0135\n", - "Episode: 645 Total reward: 199.0 Training loss: 0.2034 Explore P: 0.0134\n", - "Episode: 646 Total reward: 199.0 Training loss: 0.1455 Explore P: 0.0134\n", - "Episode: 647 Total reward: 199.0 Training loss: 0.2939 Explore P: 0.0133\n", - "Episode: 648 Total reward: 199.0 Training loss: 0.1605 Explore P: 0.0132\n", - "Episode: 649 Total reward: 199.0 Training loss: 0.2137 Explore P: 0.0132\n", - "Episode: 650 Total reward: 199.0 Training loss: 0.1560 Explore P: 0.0131\n", - "Episode: 651 Total reward: 199.0 Training loss: 0.1156 Explore P: 0.0130\n", - "Episode: 652 Total reward: 199.0 Training loss: 302.5273 Explore P: 0.0130\n", - "Episode: 653 Total reward: 199.0 Training loss: 0.1242 Explore P: 0.0129\n", - "Episode: 654 Total reward: 199.0 Training loss: 0.1443 Explore P: 0.0129\n", - "Episode: 655 Total reward: 199.0 Training loss: 0.1707 Explore P: 0.0128\n", - "Episode: 656 Total reward: 199.0 Training loss: 0.1338 Explore P: 0.0128\n", - "Episode: 657 Total reward: 199.0 Training loss: 0.2094 Explore P: 0.0127\n", - "Episode: 658 Total reward: 199.0 Training loss: 0.1659 Explore P: 0.0126\n", - "Episode: 659 Total reward: 199.0 Training loss: 0.1740 Explore P: 0.0126\n", - "Episode: 660 Total reward: 199.0 Training loss: 0.0645 Explore P: 0.0125\n", - "Episode: 661 Total reward: 199.0 Training loss: 0.2673 Explore P: 0.0125\n", - "Episode: 662 Total reward: 199.0 Training loss: 276.5634 Explore P: 0.0124\n", - "Episode: 663 Total reward: 199.0 Training loss: 0.2976 Explore P: 0.0124\n", - "Episode: 664 Total reward: 199.0 Training loss: 0.3700 Explore P: 0.0123\n", - "Episode: 665 Total reward: 199.0 Training loss: 0.1215 Explore P: 0.0123\n", - "Episode: 666 Total reward: 199.0 Training loss: 0.2097 Explore P: 0.0123\n", - "Episode: 667 Total reward: 199.0 Training loss: 0.1809 Explore P: 0.0122\n", - "Episode: 668 Total reward: 199.0 Training loss: 0.2194 Explore P: 0.0122\n", - "Episode: 669 Total reward: 199.0 Training loss: 0.1520 Explore P: 0.0121\n", - "Episode: 670 Total reward: 199.0 Training loss: 0.2190 Explore P: 0.0121\n", - "Episode: 671 Total reward: 199.0 Training loss: 0.3460 Explore P: 0.0120\n", - "Episode: 672 Total reward: 199.0 Training loss: 0.4117 Explore P: 0.0120\n", - "Episode: 673 Total reward: 199.0 Training loss: 0.3018 Explore P: 0.0120\n", - "Episode: 674 Total reward: 199.0 Training loss: 0.1809 Explore P: 0.0119\n", - "Episode: 675 Total reward: 199.0 Training loss: 0.3239 Explore P: 0.0119\n", - "Episode: 676 Total reward: 199.0 Training loss: 0.2188 Explore P: 0.0119\n", - "Episode: 677 Total reward: 199.0 Training loss: 251.4696 Explore P: 0.0118\n", - "Episode: 678 Total reward: 199.0 Training loss: 276.1304 Explore P: 0.0118\n", - "Episode: 679 Total reward: 199.0 Training loss: 274.9297 Explore P: 0.0117\n", - "Episode: 680 Total reward: 199.0 Training loss: 0.1281 Explore P: 0.0117\n", - "Episode: 681 Total reward: 199.0 Training loss: 0.2222 Explore P: 0.0117\n", - "Episode: 682 Total reward: 199.0 Training loss: 0.2037 Explore P: 0.0116\n", - "Episode: 683 Total reward: 199.0 Training loss: 0.2463 Explore P: 0.0116\n", - "Episode: 684 Total reward: 199.0 Training loss: 0.2739 Explore P: 0.0116\n", - "Episode: 685 Total reward: 199.0 Training loss: 0.3237 Explore P: 0.0115\n", - "Episode: 686 Total reward: 199.0 Training loss: 0.2533 Explore P: 0.0115\n", - "Episode: 687 Total reward: 199.0 Training loss: 0.1558 Explore P: 0.0115\n", - "Episode: 688 Total reward: 199.0 Training loss: 0.1950 Explore P: 0.0115\n", - "Episode: 689 Total reward: 199.0 Training loss: 0.1954 Explore P: 0.0114\n", - "Episode: 690 Total reward: 199.0 Training loss: 0.2714 Explore P: 0.0114\n", - "Episode: 691 Total reward: 199.0 Training loss: 0.2191 Explore P: 0.0114\n", - "Episode: 692 Total reward: 199.0 Training loss: 0.4375 Explore P: 0.0113\n", - "Episode: 693 Total reward: 199.0 Training loss: 0.2202 Explore P: 0.0113\n", - "Episode: 694 Total reward: 199.0 Training loss: 0.2222 Explore P: 0.0113\n", - "Episode: 695 Total reward: 199.0 Training loss: 0.2798 Explore P: 0.0113\n", - "Episode: 696 Total reward: 199.0 Training loss: 0.6002 Explore P: 0.0112\n", - "Episode: 697 Total reward: 199.0 Training loss: 0.2640 Explore P: 0.0112\n", - "Episode: 698 Total reward: 199.0 Training loss: 0.1669 Explore P: 0.0112\n", - "Episode: 699 Total reward: 199.0 Training loss: 252.0655 Explore P: 0.0112\n", - "Episode: 700 Total reward: 199.0 Training loss: 0.1837 Explore P: 0.0111\n", - "Episode: 701 Total reward: 199.0 Training loss: 0.1534 Explore P: 0.0111\n", - "Episode: 702 Total reward: 199.0 Training loss: 0.1900 Explore P: 0.0111\n", - "Episode: 703 Total reward: 199.0 Training loss: 0.2640 Explore P: 0.0111\n", - "Episode: 704 Total reward: 199.0 Training loss: 0.1902 Explore P: 0.0111\n", - "Episode: 705 Total reward: 199.0 Training loss: 0.2412 Explore P: 0.0110\n", - "Episode: 706 Total reward: 199.0 Training loss: 0.1321 Explore P: 0.0110\n", - "Episode: 707 Total reward: 199.0 Training loss: 0.1851 Explore P: 0.0110\n", - "Episode: 708 Total reward: 199.0 Training loss: 0.2158 Explore P: 0.0110\n", - "Episode: 709 Total reward: 199.0 Training loss: 0.1619 Explore P: 0.0110\n", - "Episode: 710 Total reward: 199.0 Training loss: 0.2560 Explore P: 0.0109\n", - "Episode: 711 Total reward: 199.0 Training loss: 0.1775 Explore P: 0.0109\n", - "Episode: 712 Total reward: 199.0 Training loss: 0.1768 Explore P: 0.0109\n", - "Episode: 713 Total reward: 199.0 Training loss: 232.6123 Explore P: 0.0109\n", - "Episode: 714 Total reward: 199.0 Training loss: 0.1538 Explore P: 0.0109\n", - "Episode: 715 Total reward: 199.0 Training loss: 0.1527 Explore P: 0.0109\n", - "Episode: 716 Total reward: 199.0 Training loss: 0.1468 Explore P: 0.0108\n", - "Episode: 717 Total reward: 199.0 Training loss: 0.1575 Explore P: 0.0108\n", - "Episode: 718 Total reward: 199.0 Training loss: 0.1759 Explore P: 0.0108\n", - "Episode: 719 Total reward: 199.0 Training loss: 0.2107 Explore P: 0.0108\n", - "Episode: 720 Total reward: 199.0 Training loss: 0.2620 Explore P: 0.0108\n", - "Episode: 721 Total reward: 199.0 Training loss: 0.4076 Explore P: 0.0108\n", - "Episode: 722 Total reward: 199.0 Training loss: 219.6660 Explore P: 0.0107\n", - "Episode: 723 Total reward: 199.0 Training loss: 0.2111 Explore P: 0.0107\n", - "Episode: 724 Total reward: 199.0 Training loss: 0.2733 Explore P: 0.0107\n", - "Episode: 725 Total reward: 199.0 Training loss: 0.2408 Explore P: 0.0107\n", - "Episode: 726 Total reward: 199.0 Training loss: 0.2102 Explore P: 0.0107\n", - "Episode: 727 Total reward: 199.0 Training loss: 0.1467 Explore P: 0.0107\n", - "Episode: 728 Total reward: 199.0 Training loss: 0.2066 Explore P: 0.0107\n", - "Episode: 729 Total reward: 199.0 Training loss: 216.0451 Explore P: 0.0106\n", - "Episode: 730 Total reward: 199.0 Training loss: 0.2248 Explore P: 0.0106\n", - "Episode: 731 Total reward: 199.0 Training loss: 0.2352 Explore P: 0.0106\n", - "Episode: 732 Total reward: 199.0 Training loss: 0.2236 Explore P: 0.0106\n", - "Episode: 733 Total reward: 199.0 Training loss: 219.8582 Explore P: 0.0106\n", - "Episode: 734 Total reward: 199.0 Training loss: 0.2165 Explore P: 0.0106\n", - "Episode: 735 Total reward: 199.0 Training loss: 0.3920 Explore P: 0.0106\n", - "Episode: 736 Total reward: 199.0 Training loss: 0.2034 Explore P: 0.0106\n", - "Episode: 737 Total reward: 199.0 Training loss: 0.1787 Explore P: 0.0105\n", - "Episode: 738 Total reward: 199.0 Training loss: 0.1407 Explore P: 0.0105\n", - "Episode: 739 Total reward: 199.0 Training loss: 0.2066 Explore P: 0.0105\n", - "Episode: 740 Total reward: 199.0 Training loss: 0.3307 Explore P: 0.0105\n", - "Episode: 741 Total reward: 199.0 Training loss: 0.5348 Explore P: 0.0105\n", - "Episode: 742 Total reward: 199.0 Training loss: 202.3616 Explore P: 0.0105\n", - "Episode: 743 Total reward: 199.0 Training loss: 0.2714 Explore P: 0.0105\n", - "Episode: 744 Total reward: 199.0 Training loss: 0.2981 Explore P: 0.0105\n", - "Episode: 745 Total reward: 199.0 Training loss: 0.4512 Explore P: 0.0105\n", - "Episode: 746 Total reward: 199.0 Training loss: 0.2276 Explore P: 0.0105\n", - "Episode: 747 Total reward: 199.0 Training loss: 0.2910 Explore P: 0.0105\n", - "Episode: 748 Total reward: 199.0 Training loss: 0.3679 Explore P: 0.0104\n", - "Episode: 749 Total reward: 199.0 Training loss: 0.2004 Explore P: 0.0104\n", - "Episode: 750 Total reward: 199.0 Training loss: 0.1658 Explore P: 0.0104\n", - "Episode: 751 Total reward: 199.0 Training loss: 0.3183 Explore P: 0.0104\n", - "Episode: 752 Total reward: 199.0 Training loss: 0.2062 Explore P: 0.0104\n", - "Episode: 753 Total reward: 199.0 Training loss: 0.1929 Explore P: 0.0104\n", - "Episode: 754 Total reward: 199.0 Training loss: 0.2472 Explore P: 0.0104\n", - "Episode: 755 Total reward: 199.0 Training loss: 0.1489 Explore P: 0.0104\n", - "Episode: 756 Total reward: 199.0 Training loss: 0.4016 Explore P: 0.0104\n", - "Episode: 757 Total reward: 199.0 Training loss: 0.1371 Explore P: 0.0104\n", - "Episode: 758 Total reward: 199.0 Training loss: 0.4689 Explore P: 0.0104\n", - "Episode: 759 Total reward: 199.0 Training loss: 224.6042 Explore P: 0.0104\n", - "Episode: 760 Total reward: 199.0 Training loss: 0.2527 Explore P: 0.0103\n", - "Episode: 761 Total reward: 199.0 Training loss: 0.1639 Explore P: 0.0103\n", - "Episode: 762 Total reward: 199.0 Training loss: 0.1627 Explore P: 0.0103\n", - "Episode: 763 Total reward: 199.0 Training loss: 252.3887 Explore P: 0.0103\n", - "Episode: 764 Total reward: 199.0 Training loss: 0.1338 Explore P: 0.0103\n", - "Episode: 765 Total reward: 199.0 Training loss: 0.1479 Explore P: 0.0103\n", - "Episode: 766 Total reward: 199.0 Training loss: 0.2270 Explore P: 0.0103\n", - "Episode: 767 Total reward: 199.0 Training loss: 0.1981 Explore P: 0.0103\n", - "Episode: 768 Total reward: 199.0 Training loss: 0.2518 Explore P: 0.0103\n", - "Episode: 769 Total reward: 199.0 Training loss: 0.2652 Explore P: 0.0103\n", - "Episode: 770 Total reward: 199.0 Training loss: 0.2673 Explore P: 0.0103\n", - "Episode: 771 Total reward: 199.0 Training loss: 0.1782 Explore P: 0.0103\n", - "Episode: 772 Total reward: 199.0 Training loss: 0.2841 Explore P: 0.0103\n", - "Episode: 773 Total reward: 199.0 Training loss: 0.1076 Explore P: 0.0103\n", - "Episode: 774 Total reward: 199.0 Training loss: 236.1696 Explore P: 0.0103\n", - "Episode: 775 Total reward: 199.0 Training loss: 0.2617 Explore P: 0.0103\n", - "Episode: 776 Total reward: 199.0 Training loss: 0.2751 Explore P: 0.0103\n", - "Episode: 777 Total reward: 199.0 Training loss: 0.5686 Explore P: 0.0102\n", - "Episode: 778 Total reward: 199.0 Training loss: 221.1067 Explore P: 0.0102\n", - "Episode: 779 Total reward: 199.0 Training loss: 0.2785 Explore P: 0.0102\n", - "Episode: 780 Total reward: 199.0 Training loss: 0.3039 Explore P: 0.0102\n", - "Episode: 781 Total reward: 199.0 Training loss: 0.2379 Explore P: 0.0102\n", - "Episode: 782 Total reward: 199.0 Training loss: 0.2758 Explore P: 0.0102\n", - "Episode: 783 Total reward: 199.0 Training loss: 230.4681 Explore P: 0.0102\n", - "Episode: 784 Total reward: 199.0 Training loss: 0.3540 Explore P: 0.0102\n", - "Episode: 785 Total reward: 120.0 Training loss: 0.3178 Explore P: 0.0102\n", - "Episode: 786 Total reward: 199.0 Training loss: 0.3128 Explore P: 0.0102\n", - "Episode: 787 Total reward: 199.0 Training loss: 0.2162 Explore P: 0.0102\n", - "Episode: 788 Total reward: 199.0 Training loss: 0.4373 Explore P: 0.0102\n", - "Episode: 789 Total reward: 199.0 Training loss: 0.3792 Explore P: 0.0102\n", - "Episode: 790 Total reward: 199.0 Training loss: 0.4819 Explore P: 0.0102\n", - "Episode: 791 Total reward: 199.0 Training loss: 0.4004 Explore P: 0.0102\n", - "Episode: 792 Total reward: 199.0 Training loss: 257.7542 Explore P: 0.0102\n", - "Episode: 793 Total reward: 199.0 Training loss: 0.2199 Explore P: 0.0102\n", - "Episode: 794 Total reward: 199.0 Training loss: 0.2827 Explore P: 0.0102\n", - "Episode: 795 Total reward: 199.0 Training loss: 0.3288 Explore P: 0.0102\n", - "Episode: 796 Total reward: 199.0 Training loss: 0.1966 Explore P: 0.0102\n", - "Episode: 797 Total reward: 199.0 Training loss: 0.2905 Explore P: 0.0102\n", - "Episode: 798 Total reward: 199.0 Training loss: 0.3663 Explore P: 0.0102\n", - "Episode: 799 Total reward: 199.0 Training loss: 0.3002 Explore P: 0.0102\n", - "Episode: 800 Total reward: 199.0 Training loss: 248.7214 Explore P: 0.0102\n", - "Episode: 801 Total reward: 199.0 Training loss: 0.2644 Explore P: 0.0102\n", - "Episode: 802 Total reward: 199.0 Training loss: 0.1708 Explore P: 0.0102\n", - "Episode: 803 Total reward: 199.0 Training loss: 0.3009 Explore P: 0.0101\n", - "Episode: 804 Total reward: 199.0 Training loss: 0.2092 Explore P: 0.0101\n", - "Episode: 805 Total reward: 199.0 Training loss: 0.1142 Explore P: 0.0101\n", - "Episode: 806 Total reward: 199.0 Training loss: 0.2415 Explore P: 0.0101\n", - "Episode: 807 Total reward: 199.0 Training loss: 0.3501 Explore P: 0.0101\n", - "Episode: 808 Total reward: 199.0 Training loss: 0.9538 Explore P: 0.0101\n", - "Episode: 809 Total reward: 199.0 Training loss: 277.5528 Explore P: 0.0101\n", - "Episode: 810 Total reward: 199.0 Training loss: 0.2134 Explore P: 0.0101\n", - "Episode: 811 Total reward: 199.0 Training loss: 207.1723 Explore P: 0.0101\n", - "Episode: 812 Total reward: 199.0 Training loss: 0.3194 Explore P: 0.0101\n", - "Episode: 813 Total reward: 199.0 Training loss: 0.2294 Explore P: 0.0101\n", - "Episode: 814 Total reward: 199.0 Training loss: 0.3345 Explore P: 0.0101\n", - "Episode: 815 Total reward: 199.0 Training loss: 210.0273 Explore P: 0.0101\n", - "Episode: 816 Total reward: 199.0 Training loss: 0.2139 Explore P: 0.0101\n", - "Episode: 817 Total reward: 199.0 Training loss: 0.2843 Explore P: 0.0101\n", - "Episode: 818 Total reward: 199.0 Training loss: 0.2491 Explore P: 0.0101\n", - "Episode: 819 Total reward: 199.0 Training loss: 0.1913 Explore P: 0.0101\n", - "Episode: 820 Total reward: 199.0 Training loss: 0.1809 Explore P: 0.0101\n", - "Episode: 821 Total reward: 199.0 Training loss: 0.2152 Explore P: 0.0101\n", - "Episode: 822 Total reward: 199.0 Training loss: 0.2995 Explore P: 0.0101\n", - "Episode: 823 Total reward: 199.0 Training loss: 0.1697 Explore P: 0.0101\n", - "Episode: 824 Total reward: 199.0 Training loss: 0.2348 Explore P: 0.0101\n", - "Episode: 825 Total reward: 199.0 Training loss: 0.3189 Explore P: 0.0101\n", - "Episode: 826 Total reward: 199.0 Training loss: 0.1758 Explore P: 0.0101\n", - "Episode: 827 Total reward: 199.0 Training loss: 253.9098 Explore P: 0.0101\n", - "Episode: 828 Total reward: 199.0 Training loss: 0.2906 Explore P: 0.0101\n", - "Episode: 829 Total reward: 199.0 Training loss: 0.1719 Explore P: 0.0101\n", - "Episode: 830 Total reward: 199.0 Training loss: 0.2064 Explore P: 0.0101\n", - "Episode: 831 Total reward: 199.0 Training loss: 0.2222 Explore P: 0.0101\n", - "Episode: 832 Total reward: 199.0 Training loss: 259.6678 Explore P: 0.0101\n", - "Episode: 833 Total reward: 199.0 Training loss: 0.2378 Explore P: 0.0101\n", - "Episode: 834 Total reward: 199.0 Training loss: 0.1514 Explore P: 0.0101\n", - "Episode: 835 Total reward: 199.0 Training loss: 0.1746 Explore P: 0.0101\n", - "Episode: 836 Total reward: 199.0 Training loss: 248.9290 Explore P: 0.0101\n", - "Episode: 837 Total reward: 199.0 Training loss: 0.1560 Explore P: 0.0101\n", - "Episode: 838 Total reward: 199.0 Training loss: 0.4237 Explore P: 0.0101\n", - "Episode: 839 Total reward: 199.0 Training loss: 0.2009 Explore P: 0.0101\n", - "Episode: 840 Total reward: 199.0 Training loss: 184.5951 Explore P: 0.0101\n", - "Episode: 841 Total reward: 199.0 Training loss: 0.1352 Explore P: 0.0101\n", - "Episode: 842 Total reward: 199.0 Training loss: 0.1728 Explore P: 0.0101\n", - "Episode: 843 Total reward: 199.0 Training loss: 0.2078 Explore P: 0.0101\n", - "Episode: 844 Total reward: 199.0 Training loss: 0.2525 Explore P: 0.0101\n", - "Episode: 845 Total reward: 199.0 Training loss: 0.1926 Explore P: 0.0101\n", - "Episode: 846 Total reward: 199.0 Training loss: 0.1771 Explore P: 0.0101\n", - "Episode: 847 Total reward: 199.0 Training loss: 0.1893 Explore P: 0.0101\n", - "Episode: 848 Total reward: 199.0 Training loss: 0.2303 Explore P: 0.0101\n", - "Episode: 849 Total reward: 199.0 Training loss: 0.3267 Explore P: 0.0101\n", - "Episode: 850 Total reward: 199.0 Training loss: 0.4307 Explore P: 0.0101\n", - "Episode: 851 Total reward: 199.0 Training loss: 0.2216 Explore P: 0.0101\n", - "Episode: 852 Total reward: 199.0 Training loss: 0.2879 Explore P: 0.0101\n", - "Episode: 853 Total reward: 199.0 Training loss: 0.2610 Explore P: 0.0101\n", - "Episode: 854 Total reward: 199.0 Training loss: 0.2365 Explore P: 0.0101\n", - "Episode: 855 Total reward: 199.0 Training loss: 0.1976 Explore P: 0.0101\n", - "Episode: 856 Total reward: 199.0 Training loss: 0.2970 Explore P: 0.0101\n", - "Episode: 857 Total reward: 199.0 Training loss: 0.2416 Explore P: 0.0101\n", - "Episode: 858 Total reward: 199.0 Training loss: 0.1273 Explore P: 0.0100\n", - "Episode: 859 Total reward: 199.0 Training loss: 0.2467 Explore P: 0.0100\n", - "Episode: 860 Total reward: 199.0 Training loss: 0.1622 Explore P: 0.0100\n", - "Episode: 861 Total reward: 199.0 Training loss: 0.1708 Explore P: 0.0100\n", - "Episode: 862 Total reward: 199.0 Training loss: 0.2332 Explore P: 0.0100\n", - "Episode: 863 Total reward: 199.0 Training loss: 214.5099 Explore P: 0.0100\n", - "Episode: 864 Total reward: 199.0 Training loss: 0.2439 Explore P: 0.0100\n", - "Episode: 865 Total reward: 199.0 Training loss: 0.2691 Explore P: 0.0100\n", - "Episode: 866 Total reward: 199.0 Training loss: 0.1566 Explore P: 0.0100\n", - "Episode: 867 Total reward: 199.0 Training loss: 0.2568 Explore P: 0.0100\n", - "Episode: 868 Total reward: 199.0 Training loss: 0.1400 Explore P: 0.0100\n", - "Episode: 869 Total reward: 199.0 Training loss: 0.2020 Explore P: 0.0100\n", - "Episode: 870 Total reward: 199.0 Training loss: 0.2250 Explore P: 0.0100\n", - "Episode: 871 Total reward: 199.0 Training loss: 0.2768 Explore P: 0.0100\n", - "Episode: 872 Total reward: 199.0 Training loss: 0.4439 Explore P: 0.0100\n", - "Episode: 873 Total reward: 199.0 Training loss: 0.3228 Explore P: 0.0100\n", - "Episode: 874 Total reward: 199.0 Training loss: 0.2968 Explore P: 0.0100\n", - "Episode: 875 Total reward: 199.0 Training loss: 0.3035 Explore P: 0.0100\n", - "Episode: 876 Total reward: 199.0 Training loss: 0.2627 Explore P: 0.0100\n", - "Episode: 877 Total reward: 199.0 Training loss: 0.1749 Explore P: 0.0100\n", - "Episode: 878 Total reward: 199.0 Training loss: 0.1980 Explore P: 0.0100\n", - "Episode: 879 Total reward: 199.0 Training loss: 0.1091 Explore P: 0.0100\n", - "Episode: 880 Total reward: 199.0 Training loss: 0.1564 Explore P: 0.0100\n", - "Episode: 881 Total reward: 199.0 Training loss: 221.0951 Explore P: 0.0100\n", - "Episode: 882 Total reward: 199.0 Training loss: 0.2991 Explore P: 0.0100\n", - "Episode: 883 Total reward: 199.0 Training loss: 0.2701 Explore P: 0.0100\n", - "Episode: 884 Total reward: 199.0 Training loss: 0.2532 Explore P: 0.0100\n", - "Episode: 885 Total reward: 199.0 Training loss: 0.1929 Explore P: 0.0100\n", - "Episode: 886 Total reward: 199.0 Training loss: 0.2560 Explore P: 0.0100\n", - "Episode: 887 Total reward: 199.0 Training loss: 0.3255 Explore P: 0.0100\n", - "Episode: 888 Total reward: 199.0 Training loss: 0.2749 Explore P: 0.0100\n", - "Episode: 889 Total reward: 199.0 Training loss: 0.3031 Explore P: 0.0100\n", - "Episode: 890 Total reward: 199.0 Training loss: 0.1755 Explore P: 0.0100\n", - "Episode: 891 Total reward: 199.0 Training loss: 0.2363 Explore P: 0.0100\n", - "Episode: 892 Total reward: 199.0 Training loss: 0.2809 Explore P: 0.0100\n", - "Episode: 893 Total reward: 199.0 Training loss: 0.3881 Explore P: 0.0100\n", - "Episode: 894 Total reward: 199.0 Training loss: 0.2718 Explore P: 0.0100\n", - "Episode: 895 Total reward: 199.0 Training loss: 0.2352 Explore P: 0.0100\n", - "Episode: 896 Total reward: 199.0 Training loss: 0.3286 Explore P: 0.0100\n", - "Episode: 897 Total reward: 199.0 Training loss: 0.2536 Explore P: 0.0100\n", - "Episode: 898 Total reward: 199.0 Training loss: 0.3427 Explore P: 0.0100\n", - "Episode: 899 Total reward: 199.0 Training loss: 0.2033 Explore P: 0.0100\n", - "Episode: 900 Total reward: 199.0 Training loss: 0.1665 Explore P: 0.0100\n", - "Episode: 901 Total reward: 199.0 Training loss: 0.2069 Explore P: 0.0100\n", - "Episode: 902 Total reward: 199.0 Training loss: 0.2207 Explore P: 0.0100\n", - "Episode: 903 Total reward: 199.0 Training loss: 0.2462 Explore P: 0.0100\n", - "Episode: 904 Total reward: 199.0 Training loss: 0.4641 Explore P: 0.0100\n", - "Episode: 905 Total reward: 199.0 Training loss: 0.2322 Explore P: 0.0100\n", - "Episode: 906 Total reward: 199.0 Training loss: 0.2513 Explore P: 0.0100\n", - "Episode: 907 Total reward: 199.0 Training loss: 0.3005 Explore P: 0.0100\n", - "Episode: 908 Total reward: 199.0 Training loss: 0.1780 Explore P: 0.0100\n", - "Episode: 909 Total reward: 199.0 Training loss: 0.2631 Explore P: 0.0100\n", - "Episode: 910 Total reward: 199.0 Training loss: 0.1676 Explore P: 0.0100\n", - "Episode: 911 Total reward: 199.0 Training loss: 0.2065 Explore P: 0.0100\n", - "Episode: 912 Total reward: 199.0 Training loss: 0.2247 Explore P: 0.0100\n", - "Episode: 913 Total reward: 199.0 Training loss: 0.2489 Explore P: 0.0100\n", - "Episode: 914 Total reward: 199.0 Training loss: 0.2946 Explore P: 0.0100\n", - "Episode: 915 Total reward: 199.0 Training loss: 0.1134 Explore P: 0.0100\n", - "Episode: 916 Total reward: 199.0 Training loss: 164.2444 Explore P: 0.0100\n", - "Episode: 917 Total reward: 199.0 Training loss: 0.2205 Explore P: 0.0100\n", - "Episode: 918 Total reward: 199.0 Training loss: 0.2216 Explore P: 0.0100\n", - "Episode: 919 Total reward: 199.0 Training loss: 0.2455 Explore P: 0.0100\n", - "Episode: 920 Total reward: 199.0 Training loss: 0.1803 Explore P: 0.0100\n", - "Episode: 921 Total reward: 199.0 Training loss: 0.3260 Explore P: 0.0100\n", - "Episode: 922 Total reward: 199.0 Training loss: 0.2077 Explore P: 0.0100\n", - "Episode: 923 Total reward: 199.0 Training loss: 0.3096 Explore P: 0.0100\n", - "Episode: 924 Total reward: 199.0 Training loss: 0.2290 Explore P: 0.0100\n", - "Episode: 925 Total reward: 199.0 Training loss: 0.5476 Explore P: 0.0100\n", - "Episode: 926 Total reward: 199.0 Training loss: 0.2150 Explore P: 0.0100\n", - "Episode: 927 Total reward: 199.0 Training loss: 0.1507 Explore P: 0.0100\n", - "Episode: 928 Total reward: 199.0 Training loss: 0.1846 Explore P: 0.0100\n", - "Episode: 929 Total reward: 199.0 Training loss: 0.2062 Explore P: 0.0100\n", - "Episode: 930 Total reward: 199.0 Training loss: 0.2489 Explore P: 0.0100\n", - "Episode: 931 Total reward: 199.0 Training loss: 265.6849 Explore P: 0.0100\n", - "Episode: 932 Total reward: 199.0 Training loss: 0.1021 Explore P: 0.0100\n", - "Episode: 933 Total reward: 199.0 Training loss: 0.1830 Explore P: 0.0100\n", - "Episode: 934 Total reward: 199.0 Training loss: 0.2064 Explore P: 0.0100\n", - "Episode: 935 Total reward: 199.0 Training loss: 124.9705 Explore P: 0.0100\n", - "Episode: 936 Total reward: 199.0 Training loss: 0.2213 Explore P: 0.0100\n", - "Episode: 937 Total reward: 199.0 Training loss: 0.1976 Explore P: 0.0100\n", - "Episode: 938 Total reward: 199.0 Training loss: 0.1698 Explore P: 0.0100\n", - "Episode: 939 Total reward: 199.0 Training loss: 0.1256 Explore P: 0.0100\n", - "Episode: 940 Total reward: 199.0 Training loss: 0.2449 Explore P: 0.0100\n", - "Episode: 941 Total reward: 199.0 Training loss: 0.2213 Explore P: 0.0100\n", - "Episode: 942 Total reward: 199.0 Training loss: 0.1152 Explore P: 0.0100\n", - "Episode: 943 Total reward: 199.0 Training loss: 203.7172 Explore P: 0.0100\n", - "Episode: 944 Total reward: 199.0 Training loss: 0.2377 Explore P: 0.0100\n", - "Episode: 945 Total reward: 199.0 Training loss: 0.1926 Explore P: 0.0100\n", - "Episode: 946 Total reward: 199.0 Training loss: 0.1441 Explore P: 0.0100\n", - "Episode: 947 Total reward: 199.0 Training loss: 0.1352 Explore P: 0.0100\n", - "Episode: 948 Total reward: 199.0 Training loss: 0.1469 Explore P: 0.0100\n", - "Episode: 949 Total reward: 199.0 Training loss: 0.2769 Explore P: 0.0100\n", - "Episode: 950 Total reward: 199.0 Training loss: 0.1295 Explore P: 0.0100\n", - "Episode: 951 Total reward: 199.0 Training loss: 0.0762 Explore P: 0.0100\n", - "Episode: 952 Total reward: 199.0 Training loss: 0.3513 Explore P: 0.0100\n", - "Episode: 953 Total reward: 199.0 Training loss: 0.1848 Explore P: 0.0100\n", - "Episode: 954 Total reward: 199.0 Training loss: 0.1068 Explore P: 0.0100\n", - "Episode: 955 Total reward: 199.0 Training loss: 0.1407 Explore P: 0.0100\n", - "Episode: 956 Total reward: 199.0 Training loss: 0.0827 Explore P: 0.0100\n", - "Episode: 957 Total reward: 199.0 Training loss: 0.1507 Explore P: 0.0100\n", - "Episode: 958 Total reward: 199.0 Training loss: 0.1290 Explore P: 0.0100\n", - "Episode: 959 Total reward: 199.0 Training loss: 0.1627 Explore P: 0.0100\n", - "Episode: 960 Total reward: 199.0 Training loss: 0.3708 Explore P: 0.0100\n", - "Episode: 961 Total reward: 199.0 Training loss: 0.5660 Explore P: 0.0100\n", - "Episode: 962 Total reward: 199.0 Training loss: 0.3176 Explore P: 0.0100\n", - "Episode: 963 Total reward: 199.0 Training loss: 0.2528 Explore P: 0.0100\n", - "Episode: 964 Total reward: 199.0 Training loss: 0.1964 Explore P: 0.0100\n", - "Episode: 965 Total reward: 199.0 Training loss: 0.2376 Explore P: 0.0100\n", - "Episode: 966 Total reward: 199.0 Training loss: 0.3047 Explore P: 0.0100\n", - "Episode: 967 Total reward: 199.0 Training loss: 0.1229 Explore P: 0.0100\n", - "Episode: 968 Total reward: 199.0 Training loss: 0.1962 Explore P: 0.0100\n", - "Episode: 969 Total reward: 199.0 Training loss: 0.1275 Explore P: 0.0100\n", - "Episode: 970 Total reward: 199.0 Training loss: 0.1846 Explore P: 0.0100\n", - "Episode: 971 Total reward: 199.0 Training loss: 0.0820 Explore P: 0.0100\n", - "Episode: 972 Total reward: 199.0 Training loss: 0.3275 Explore P: 0.0100\n", - "Episode: 973 Total reward: 199.0 Training loss: 0.2327 Explore P: 0.0100\n", - "Episode: 974 Total reward: 199.0 Training loss: 0.2778 Explore P: 0.0100\n", - "Episode: 975 Total reward: 199.0 Training loss: 0.1259 Explore P: 0.0100\n", - "Episode: 976 Total reward: 199.0 Training loss: 0.4643 Explore P: 0.0100\n", - "Episode: 977 Total reward: 199.0 Training loss: 0.3287 Explore P: 0.0100\n", - "Episode: 978 Total reward: 199.0 Training loss: 0.5134 Explore P: 0.0100\n", - "Episode: 979 Total reward: 199.0 Training loss: 0.1525 Explore P: 0.0100\n", - "Episode: 980 Total reward: 199.0 Training loss: 0.1874 Explore P: 0.0100\n", - "Episode: 981 Total reward: 199.0 Training loss: 0.1779 Explore P: 0.0100\n", - "Episode: 982 Total reward: 199.0 Training loss: 0.5901 Explore P: 0.0100\n", - "Episode: 983 Total reward: 199.0 Training loss: 0.2271 Explore P: 0.0100\n", - "Episode: 984 Total reward: 199.0 Training loss: 0.1310 Explore P: 0.0100\n", - "Episode: 985 Total reward: 199.0 Training loss: 0.5868 Explore P: 0.0100\n", - "Episode: 986 Total reward: 199.0 Training loss: 0.0855 Explore P: 0.0100\n", - "Episode: 987 Total reward: 199.0 Training loss: 0.1786 Explore P: 0.0100\n", - "Episode: 988 Total reward: 199.0 Training loss: 0.1669 Explore P: 0.0100\n", - "Episode: 989 Total reward: 199.0 Training loss: 0.1403 Explore P: 0.0100\n", - "Episode: 990 Total reward: 199.0 Training loss: 223.3425 Explore P: 0.0100\n", - "Episode: 991 Total reward: 199.0 Training loss: 0.1557 Explore P: 0.0100\n", - "Episode: 992 Total reward: 199.0 Training loss: 0.1683 Explore P: 0.0100\n", - "Episode: 993 Total reward: 199.0 Training loss: 0.1106 Explore P: 0.0100\n", - "Episode: 994 Total reward: 199.0 Training loss: 0.1079 Explore P: 0.0100\n", - "Episode: 995 Total reward: 199.0 Training loss: 0.1316 Explore P: 0.0100\n", - "Episode: 996 Total reward: 199.0 Training loss: 261.9552 Explore P: 0.0100\n", - "Episode: 997 Total reward: 199.0 Training loss: 0.1740 Explore P: 0.0100\n", - "Episode: 998 Total reward: 199.0 Training loss: 0.1739 Explore P: 0.0100\n", - "Episode: 999 Total reward: 199.0 Training loss: 0.1595 Explore P: 0.0100\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Now train with experiences\n", "saver = tf.train.Saver()\n", @@ -1522,9 +455,7 @@ "cell_type": "code", "execution_count": 167, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1539,11 +470,7 @@ { "cell_type": "code", "execution_count": 181, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -1588,9 +515,7 @@ "cell_type": "code", "execution_count": 183, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ diff --git a/sentiment-rnn/Sentiment_RNN_Solution.ipynb b/sentiment-rnn/Sentiment_RNN_Solution.ipynb index b82b8573be..f9932b6f94 100644 --- a/sentiment-rnn/Sentiment_RNN_Solution.ipynb +++ b/sentiment-rnn/Sentiment_RNN_Solution.ipynb @@ -2,10 +2,7 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Sentiment Analysis with an RNN\n", "\n", @@ -26,9 +23,7 @@ "cell_type": "code", "execution_count": 29, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -39,11 +34,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with open('../sentiment-network/reviews.txt', 'r') as f:\n", @@ -55,11 +46,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -78,10 +65,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Data preprocessing\n", "\n", @@ -96,9 +80,7 @@ "cell_type": "code", "execution_count": 32, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -113,11 +95,7 @@ { "cell_type": "code", "execution_count": 33, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -137,11 +115,7 @@ { "cell_type": "code", "execution_count": 34, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -259,10 +233,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Encoding the words\n", "\n", @@ -276,9 +247,7 @@ "cell_type": "code", "execution_count": 35, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -294,11 +263,7 @@ }, { "cell_type": "markdown", - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Encoding the labels\n", "\n", @@ -310,11 +275,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "labels = labels.split('\\n')\n", @@ -324,11 +285,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -347,10 +304,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Okay, a couple issues here. We seem to have one review with zero length. And, the maximum review length is way too many steps for our RNN. Let's truncate to 200 steps. For reviews shorter than 200, we'll pad with 0s. For reviews longer than 200, we can truncate them to the first 200 characters.\n", "\n", @@ -360,11 +314,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -385,11 +335,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -408,10 +354,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Turns out its the final review that has zero length. But that might not always be the case, so let's make it more general." ] @@ -419,11 +362,7 @@ { "cell_type": "code", "execution_count": 42, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "reviews_ints = [reviews_ints[ii] for ii in non_zero_idx]\n", @@ -432,10 +371,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "> **Exercise:** Now, create an array `features` that contains the data we'll pass to the network. The data should come from `review_ints`, since we want to feed integers to the network. Each row should be 200 elements long. For reviews shorter than 200 words, left pad with 0s. That is, if the review is `['best', 'movie', 'ever']`, `[117, 18, 128]` as integers, the row will look like `[0, 0, 0, ..., 0, 117, 18, 128]`. For reviews longer than 200, use on the first 200 words as the feature vector.\n", "\n", @@ -446,11 +382,7 @@ { "cell_type": "code", "execution_count": 46, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "seq_len = 200\n", @@ -462,11 +394,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "data": { @@ -604,10 +532,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Training, Validation, Test\n", "\n" @@ -615,10 +540,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "With our data in nice shape, we'll split it into training, validation, and test sets.\n", "\n", @@ -628,11 +550,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -663,10 +581,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "With train, validation, and text fractions of 0.8, 0.1, 0.1, the final shapes should look like:\n", "```\n", @@ -679,10 +594,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Build the graph\n", "\n", @@ -698,9 +610,7 @@ "cell_type": "code", "execution_count": 31, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -712,20 +622,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "For the network itself, we'll be passing in our 200 element long review vectors. Each batch will be `batch_size` vectors. We'll also be using dropout on the LSTM layer, so we'll make a placeholder for the keep probability." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "> **Exercise:** Create the `inputs_`, `labels_`, and drop out `keep_prob` placeholders using `tf.placeholder`. `labels_` needs to be two-dimensional to work with some functions later. Since `keep_prob` is a scalar (a 0-dimensional tensor), you shouldn't provide a size to `tf.placeholder`." ] @@ -733,11 +637,7 @@ { "cell_type": "code", "execution_count": 32, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_words = len(vocab_to_int)\n", @@ -753,10 +653,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Embedding\n", "\n", @@ -769,11 +666,7 @@ { "cell_type": "code", "execution_count": 33, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "# Size of the embedding vectors (number of units in the embedding layer)\n", @@ -786,10 +679,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### LSTM cell\n", "\n", @@ -833,11 +723,7 @@ { "cell_type": "code", "execution_count": 34, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with graph.as_default():\n", @@ -856,10 +742,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### RNN forward pass\n", "\n", @@ -880,11 +763,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with graph.as_default():\n", @@ -894,10 +773,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Output\n", "\n", @@ -907,11 +783,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with graph.as_default():\n", @@ -923,10 +795,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Validation accuracy\n", "\n", @@ -936,11 +805,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with graph.as_default():\n", @@ -950,10 +815,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Batching\n", "\n", @@ -964,9 +826,7 @@ "cell_type": "code", "execution_count": 38, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -980,10 +840,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Training\n", "\n", @@ -992,116 +849,9 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch: 0/10 Iteration: 5 Train loss: 0.244\n", - "Epoch: 0/10 Iteration: 10 Train loss: 0.237\n", - "Epoch: 0/10 Iteration: 15 Train loss: 0.198\n", - "Epoch: 0/10 Iteration: 20 Train loss: 0.211\n", - "Epoch: 0/10 Iteration: 25 Train loss: 0.197\n", - "Val acc: 0.713\n", - "Epoch: 0/10 Iteration: 30 Train loss: 0.211\n", - "Epoch: 0/10 Iteration: 35 Train loss: 0.176\n", - "Epoch: 0/10 Iteration: 40 Train loss: 0.170\n", - "Epoch: 1/10 Iteration: 45 Train loss: 0.166\n", - "Epoch: 1/10 Iteration: 50 Train loss: 0.165\n", - "Val acc: 0.749\n", - "Epoch: 1/10 Iteration: 55 Train loss: 0.144\n", - "Epoch: 1/10 Iteration: 60 Train loss: 0.134\n", - "Epoch: 1/10 Iteration: 65 Train loss: 0.128\n", - "Epoch: 1/10 Iteration: 70 Train loss: 0.151\n", - "Epoch: 1/10 Iteration: 75 Train loss: 0.121\n", - "Val acc: 0.744\n", - "Epoch: 1/10 Iteration: 80 Train loss: 0.136\n", - "Epoch: 2/10 Iteration: 85 Train loss: 0.116\n", - "Epoch: 2/10 Iteration: 90 Train loss: 0.134\n", - "Epoch: 2/10 Iteration: 95 Train loss: 0.103\n", - "Epoch: 2/10 Iteration: 100 Train loss: 0.106\n", - "Val acc: 0.807\n", - "Epoch: 2/10 Iteration: 105 Train loss: 0.088\n", - "Epoch: 2/10 Iteration: 110 Train loss: 0.135\n", - "Epoch: 2/10 Iteration: 115 Train loss: 0.102\n", - "Epoch: 2/10 Iteration: 120 Train loss: 0.150\n", - "Epoch: 3/10 Iteration: 125 Train loss: 0.162\n", - "Val acc: 0.682\n", - "Epoch: 3/10 Iteration: 130 Train loss: 0.186\n", - "Epoch: 3/10 Iteration: 135 Train loss: 0.220\n", - "Epoch: 3/10 Iteration: 140 Train loss: 0.209\n", - "Epoch: 3/10 Iteration: 145 Train loss: 0.199\n", - "Epoch: 3/10 Iteration: 150 Train loss: 0.180\n", - "Val acc: 0.701\n", - "Epoch: 3/10 Iteration: 155 Train loss: 0.151\n", - "Epoch: 3/10 Iteration: 160 Train loss: 0.156\n", - "Epoch: 4/10 Iteration: 165 Train loss: 0.127\n", - "Epoch: 4/10 Iteration: 170 Train loss: 0.150\n", - "Epoch: 4/10 Iteration: 175 Train loss: 0.148\n", - "Val acc: 0.739\n", - "Epoch: 4/10 Iteration: 180 Train loss: 0.108\n", - "Epoch: 4/10 Iteration: 185 Train loss: 0.074\n", - "Epoch: 4/10 Iteration: 190 Train loss: 0.096\n", - "Epoch: 4/10 Iteration: 195 Train loss: 0.103\n", - "Epoch: 4/10 Iteration: 200 Train loss: 0.094\n", - "Val acc: 0.810\n", - "Epoch: 5/10 Iteration: 205 Train loss: 0.090\n", - "Epoch: 5/10 Iteration: 210 Train loss: 0.111\n", - "Epoch: 5/10 Iteration: 215 Train loss: 0.108\n", - "Epoch: 5/10 Iteration: 220 Train loss: 0.077\n", - "Epoch: 5/10 Iteration: 225 Train loss: 0.075\n", - "Val acc: 0.802\n", - "Epoch: 5/10 Iteration: 230 Train loss: 0.072\n", - "Epoch: 5/10 Iteration: 235 Train loss: 0.070\n", - "Epoch: 5/10 Iteration: 240 Train loss: 0.084\n", - "Epoch: 6/10 Iteration: 245 Train loss: 0.058\n", - "Epoch: 6/10 Iteration: 250 Train loss: 0.073\n", - "Val acc: 0.801\n", - "Epoch: 6/10 Iteration: 255 Train loss: 0.078\n", - "Epoch: 6/10 Iteration: 260 Train loss: 0.062\n", - "Epoch: 6/10 Iteration: 265 Train loss: 0.080\n", - "Epoch: 6/10 Iteration: 270 Train loss: 0.067\n", - "Epoch: 6/10 Iteration: 275 Train loss: 0.053\n", - "Val acc: 0.788\n", - "Epoch: 6/10 Iteration: 280 Train loss: 0.070\n", - "Epoch: 7/10 Iteration: 285 Train loss: 0.108\n", - "Epoch: 7/10 Iteration: 290 Train loss: 0.059\n", - "Epoch: 7/10 Iteration: 295 Train loss: 0.065\n", - "Epoch: 7/10 Iteration: 300 Train loss: 0.113\n", - "Val acc: 0.783\n", - "Epoch: 7/10 Iteration: 305 Train loss: 0.261\n", - "Epoch: 7/10 Iteration: 310 Train loss: 0.219\n", - "Epoch: 7/10 Iteration: 315 Train loss: 0.086\n", - "Epoch: 7/10 Iteration: 320 Train loss: 0.124\n", - "Epoch: 8/10 Iteration: 325 Train loss: 0.098\n", - "Val acc: 0.762\n", - "Epoch: 8/10 Iteration: 330 Train loss: 0.081\n", - "Epoch: 8/10 Iteration: 335 Train loss: 0.056\n", - "Epoch: 8/10 Iteration: 340 Train loss: 0.055\n", - "Epoch: 8/10 Iteration: 345 Train loss: 0.078\n", - "Epoch: 8/10 Iteration: 350 Train loss: 0.058\n", - "Val acc: 0.845\n", - "Epoch: 8/10 Iteration: 355 Train loss: 0.050\n", - "Epoch: 8/10 Iteration: 360 Train loss: 0.050\n", - "Epoch: 9/10 Iteration: 365 Train loss: 0.049\n", - "Epoch: 9/10 Iteration: 370 Train loss: 0.055\n", - "Epoch: 9/10 Iteration: 375 Train loss: 0.061\n", - "Val acc: 0.842\n", - "Epoch: 9/10 Iteration: 380 Train loss: 0.065\n", - "Epoch: 9/10 Iteration: 385 Train loss: 0.069\n", - "Epoch: 9/10 Iteration: 390 Train loss: 0.075\n", - "Epoch: 9/10 Iteration: 395 Train loss: 0.051\n", - "Epoch: 9/10 Iteration: 400 Train loss: 0.075\n", - "Val acc: 0.826\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "epochs = 10\n", "\n", @@ -1143,10 +893,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Testing" ] @@ -1154,11 +901,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [ { "name": "stdout",