Skip to content

Commit

Permalink
Merge pull request #9 from GoodMeasuresLLC/make_it_current
Browse files Browse the repository at this point in the history
Update the gem for new rubies, new website formats
  • Loading branch information
iancanderson committed Dec 28, 2015
2 parents d82ac1d + 646c643 commit 3110cda
Show file tree
Hide file tree
Showing 60 changed files with 49,815 additions and 40,137 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
language: ruby
rvm:
- 2.2.0
- 2.1.2
- 2.0.0
- 1.9.3
#- jruby-19mode # JRuby in 1.9 mode - disabled for now due to differences in JRuby Nokogiri results...
- rbx-19mode
47 changes: 47 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,54 @@
require "bundler/gem_tasks"

require 'rspec/core/rake_task'
require 'httparty'

RSpec::Core::RakeTask.new(:spec)

task :default => :spec

desc 'Fetch the current versions of all the fixtures'
task :update_fixtures do
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
[
['http://www.chow.com/recipes/30700-strawberry-rhubarb-pie-with-sour-cream-crust', 'spec/fixtures/schema.org/data-vocabulary_org/chow.com.html'],
['http://www.myrecipes.com/recipe/best-carrot-cake-10000000257583/', 'spec/fixtures/schema.org/data-vocabulary_org/myrecipes.com.html'],
['http://www.saveur.com/article/Recipes/Smoked-Trout-Blinis-with-Creme-Fraiche-and-Dill', 'spec/fixtures/schema.org/data-vocabulary_org/saveur.com.html'],
['http://www.tarladalal.com/5-Spice-Vegetable-Fried-Rice-8631r', 'spec/fixtures/schema.org/data-vocabulary_org/tarladalal.com.html'],
['http://www.taste.com.au/recipes/24586/lemon+melting+moments', 'spec/fixtures/schema.org/data-vocabulary_org/taste.com.au.html'],
['http://allrecipes.com/Recipe/Roasted-Vegetable-and-Couscous-Salad/Detail.aspx', 'spec/fixtures/schema_org/allrecipes.html'],
['http://www.bettycrocker.com/recipes/skillet-chicken-nachos/9bf0c3be-09dd-4b1b-8cf4-a9cfa979b232', 'spec/fixtures/schema_org/betty_crocker.html'],
['http://www.copykat.com/2014/12/03/low-fat-scalloped-potatoes/', 'spec/fixtures/schema_org/copykat.com.html'],
['http://www.eatingwell.com/recipes/sauteed_chicken_breasts_with_creamy_chive_sauce.html', 'spec/fixtures/schema_org/eatingwell.com.html'],
['http://www.food.com/recipe/panda-express-orange-chicken-103215', 'spec/fixtures/schema_org/food.com.html'],
['http://www.foodnetwork.com/recipes/rachael-ray/spinach-and-mushroom-stuffed-chicken-breasts-recipe.html', 'spec/fixtures/schema_org/food_network_schema_org.html'],
['http://www.foodnetwork.com/recipes/food-network-kitchens/easter-bunny-cake-recipe/index.html', 'spec/fixtures/schema_org/food_network_with_blank_ingredients.html'],
['http://www.foodandwine.com/recipes/honey-glazed-roasted-root-vegetables','spec/fixtures/schema_org/foodandwine.com.html'],
['http://www.pillsbury.com/recipes/big-cheesy-pepperoni-hand-pies/a17766e6-30ce-4a0c-af08-72533bb9b449', 'spec/fixtures/schema_org/pillsbury.com.html'],
['http://www.bbc.co.uk/food/recipes/paella_7100', 'spec/fixtures/hrecipe/bbc.co.uk.html'],
['http://www.bigoven.com/recipe/steves-fish-tacos/178920', 'spec/fixtures/hrecipe/bigoven.html'],
['http://www.campbellskitchen.com/recipes/recipedetails?recipeid=60821', 'spec/fixtures/hrecipe/campbellskitchen.com.html'],
['http://www.cooking.com/recipes-and-more/recipes/garlic-shrimp-recipe-41.aspx', 'spec/fixtures/hrecipe/cooking.com.html'],
['http://www.cooks.com/recipe/oc0mk7z3/lemon-bars-deluxe.html', 'spec/fixtures/hrecipe/cooks.com.html'],
['http://www.drinksmixer.com/drink2438.html', 'spec/fixtures/hrecipe/drinksmixer.com.html'],
['http://www.epicurious.com/recipes/food/views/grilled-turkey-burgers-with-cheddar-and-smoky-aioli-354289', 'spec/fixtures/hrecipe/epicurious.html'],
['http://www.grouprecipes.com/135867/deep-dark-chocolate-cheesecake.html', 'spec/fixtures/hrecipe/grouprecipes.com.html'],
['http://homecooking.about.com/od/muffinrecipes/r/blmuff23.htm', 'spec/fixtures/hrecipe/homecooking.about.com.html'],
['http://www.jamieoliver.com/recipes/pork-recipes/neck-fillet-steak/', 'spec/fixtures/hrecipe/jamieoliver.com.html'],
['http://www.mrfood.com/Slow-Cooker-Recipes/Saucy-Italian-Pot-Roast-4268', 'spec/fixtures/hrecipe/mrfood.com.html'],
['http://southernfood.about.com/od/collardgreens/r/Kale-Saute-Recipe.htm', 'spec/fixtures/hrecipe/southernfood.about.com.html'],
['http://www.tasteofhome.com/recipes/rhubarb-popover-pie', 'spec/fixtures/hrecipe/tasteofhome.com.html'],
].each do |source, fixture|
begin
puts "fetching #{source} to update #{fixture}"
response = HTTParty.get(source, headers: {"User-Agent" => USER_AGENT})
if response.code == 200
path = File.join(Dir.pwd, fixture)
File.open(path, 'w') { |file| file.write(response.body) }
else
puts "failed"
end
rescue
end
end
end
10 changes: 6 additions & 4 deletions hangry.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ Gem::Specification.new do |gem|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
gem.add_development_dependency('rake')
gem.add_development_dependency('rspec')
gem.add_development_dependency('rspec', '>= 3.0')
gem.add_development_dependency('pry')
gem.add_dependency('activesupport', '~> 3.0')
gem.add_dependency('iso8601', '~> 0.4.0')
gem.add_dependency('nokogiri', '~> 1.5')
gem.add_development_dependency('httparty')
gem.add_dependency('activesupport', '>= 3.0')
gem.add_dependency('iso8601', '>= 0.4.0')
gem.add_dependency('nokogiri', '>= 1.5')
gem.add_development_dependency('rspec-its')
gem.name = "hangry"
gem.require_paths = ["lib"]
gem.version = Hangry::VERSION
Expand Down
4 changes: 4 additions & 0 deletions lib/hangry/data_vocabulary_recipe_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ def parse_yield
node_with_itemprop(:yield).content
end

def parse_total_time
recipe_ast.css(".duration").first.content.gsub(/\D/,'').to_i unless recipe_ast.css(".duration").empty?
end

end

end
Expand Down
15 changes: 15 additions & 0 deletions lib/hangry/parser_class_selecter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@
require 'hangry/hrecipe_parser'
require 'hangry/schema_org_recipe_parser'
require 'hangry/data_vocabulary_recipe_parser'

require 'hangry/parsers/non_standard/all_recipes_parser'
require 'hangry/parsers/non_standard/bigoven_parser'
require 'hangry/parsers/non_standard/copykat_parser'
require 'hangry/parsers/non_standard/eating_well_parser'
require 'hangry/parsers/non_standard/epicurious_parser'
require 'hangry/parsers/non_standard/food_network_parser'
require 'hangry/parsers/non_standard/home_cooking_parser'
require 'hangry/parsers/non_standard/jamie_oliver_parser'
require 'hangry/parsers/non_standard/southern_food_parser'
require 'hangry/parsers/non_standard/taste_of_home_parser'

module Hangry
Expand All @@ -17,7 +25,14 @@ def parser_class
# Prefer the more specific parsers
parser_classes = [
Parsers::NonStandard::AllRecipesParser,
Parsers::NonStandard::BigOvenParser,
Parsers::NonStandard::CopykatParser,
Parsers::NonStandard::EatingWellParser,
Parsers::NonStandard::EpicuriousParser,
Parsers::NonStandard::FoodNetworkParser,
Parsers::NonStandard::HomeCookingParser,
Parsers::NonStandard::JamieOliverParser,
Parsers::NonStandard::SouthernFoodParser,
Parsers::NonStandard::TasteOfHomeParser
]
parser_classes += [SchemaOrgRecipeParser, HRecipeParser, DataVocabularyRecipeParser]
Expand Down
18 changes: 18 additions & 0 deletions lib/hangry/parsers/non_standard/bigoven_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module Hangry
module Parsers
module NonStandard
class BigOvenParser < DataVocabularyRecipeParser
def self.can_parse?(html)
canonical_url_matches_domain?(html, 'bigoven.com')
end

def parse_ingredients
nodes_with_itemprop(:ingredients).map do |i|
i.content.strip
end
end
end
end
end
end

18 changes: 18 additions & 0 deletions lib/hangry/parsers/non_standard/copykat_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module Hangry
module Parsers
module NonStandard
class CopykatParser < SchemaOrgRecipeParser

def self.can_parse?(html)
canonical_url_matches_domain?(html, 'copykat.com')
end

def parse_author
# => all from her?
"Stephanie Manley via CopyKat.com"
end

end
end
end
end
28 changes: 28 additions & 0 deletions lib/hangry/parsers/non_standard/epicurious2_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
module Hangry
module Parsers
module NonStandard
class Epicurious2Parser < SchemaOrgRecipeParser

def self.can_parse?(html)
canonical_url_matches_domain?(html, 'epicurious.com') && canonical_url_contains_path?(html,'recipes/member/views')
end

def parse_description
recipe_ast.css("#recipeIntroText").css(".truncatedTextModuleText").first.content
end

def parse_ingredients
# => ingredients are in the #ingredients div, separated by <br>
recipe_ast.css("#ingredients").children.map do |node|
if node.text.strip.blank? || node.text.strip.downcase == "ingredients"
nil
else
node.text.strip
end
end.compact
end

end
end
end
end
21 changes: 21 additions & 0 deletions lib/hangry/parsers/non_standard/epicurious_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module Hangry
module Parsers
module NonStandard
class EpicuriousParser < SchemaOrgRecipeParser

def self.can_parse?(html)
canonical_url_matches_domain?(html, 'epicurious.com')
end

def parse_ingredients
recipe_ast.css(".ingredient").map(&:content)
end

def parse_instructions
node_with_itemprop("recipeInstructions").css(">p").map(&:content).join("\n")
end

end
end
end
end
18 changes: 18 additions & 0 deletions lib/hangry/parsers/non_standard/food_network_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
module Hangry
module Parsers
module NonStandard
class FoodNetworkParser < SchemaOrgRecipeParser

def self.can_parse?(html)
canonical_url_matches_domain?(html, 'foodnetwork.com')
end

def parse_instructions
node_with_itemprop(:recipeInstructions).css("p").map(&:content).join("\n")
end

end
end
end
end

20 changes: 20 additions & 0 deletions lib/hangry/parsers/non_standard/home_cooking_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module Hangry
module Parsers
module NonStandard
class HomeCookingParser < SchemaOrgRecipeParser
def self.can_parse?(html)
!CanonicalUrlParser.new(html).canonical_url.nil? && CanonicalUrlParser.new(html).canonical_url.include?('homecooking.about.com')
end

def parse_description
recipe_ast.css(".expert-content-text").first.content.strip
end

def parse_name
node_with_itemprop("headline name").content
end

end
end
end
end
20 changes: 20 additions & 0 deletions lib/hangry/parsers/non_standard/jamie_oliver_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module Hangry
module Parsers
module NonStandard
class JamieOliverParser < HRecipeParser

def self.can_parse?(html)
html.include?('jamieoliver.com')
end

def parse_description
recipe_ast.css(".instructions").css("i").first.content
end

def parse_instructions
recipe_ast.css(".content").css(".instructions").map(&:content).join("\n")
end
end
end
end
end
26 changes: 26 additions & 0 deletions lib/hangry/parsers/non_standard/southern_food_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module Hangry
module Parsers
module NonStandard
class SouthernFoodParser < SchemaOrgRecipeParser

def self.can_parse?(html)
!CanonicalUrlParser.new(html).canonical_url.nil? && CanonicalUrlParser.new(html).canonical_url.include?('southernfood.about.com')
end

def parse_name
node_with_itemprop("headline name").content
end

def parse_description
recipe_ast.css(".expert-content-text").first.content.strip
end

def parse_instructions
node_with_itemprop("recipeInstructions").css("li").map(&:content).join("\n")
end

end
end
end
end

19 changes: 12 additions & 7 deletions lib/hangry/parsers/non_standard/taste_of_home_parser.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
module Hangry
module Parsers
module NonStandard
class TasteOfHomeParser < HRecipeParser
class TasteOfHomeParser < SchemaOrgRecipeParser

def self.root_selector
'[itemtype="http://schema.org/recipe"]'
end

def self.can_parse?(html)
canonical_url_matches_domain?(html, 'tasteofhome.com')
end

def nodes_with_class(klass)
super.reject { |node|
# Taste of Home has nested elements with the 'ingredient' class.
# So reject all nodes with a child that has the same class.
node.css(".#{klass}").any?
}
def parse_yield
value(node_with_itemprop(:recipeyield).content)
end

def parse_instructions
# => rd_directions.rd_ingredient each
recipe_ast.css(".rd_directions").first.css(".rd_ingredient").map(&:content).join("\n")
end

end
Expand Down
11 changes: 9 additions & 2 deletions lib/hangry/schema_org_recipe_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def nutrition_property_value(itemprop)
def parse_author
author_node = node_with_itemprop(:author)
author = if author_node['itemtype'] == "http://schema.org/Person"
author_node.css('[itemprop = "name"]').first['content']
#author_node.css('[itemprop = "name"]').first['content']
author_node.css('[itemprop = "name"]').first.content
else
author_node.content
end
Expand All @@ -56,7 +57,13 @@ def parse_ingredients
}.reject(&:blank?)
end
def parse_instructions
nodes_with_itemprop(:recipeInstructions).map(&:content).join("\n")
# => NOT THIS:
#nodes_with_itemprop(:recipeInstructions).map(&:content).join("\n")
inst = ""
nodes_with_itemprop(:recipeInstructions).map { |i|
inst += i.content.strip + "\n" unless inst.include?(i.content.strip)
}
inst
end
def parse_name
node_with_itemprop(:name).content
Expand Down
Loading

0 comments on commit 3110cda

Please sign in to comment.