Skip to content

Commit

Permalink
Made logging changes
Browse files Browse the repository at this point in the history
  • Loading branch information
rxtan2 committed Nov 13, 2017
1 parent e58951a commit a2cd67d
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
import de.tudarmstadt.ukp.wikipedia.wikimachine.factory.IEnvironmentFactory;
import de.tudarmstadt.ukp.wikipedia.wikimachine.factory.SpringFactory;
import edu.illinois.cs.cogcomp.wikiparser.constants.JWPLConstants;
import edu.illinois.cs.cogcomp.wikiparser.wikiparse.WikiExtractParser;
import java.io.File;
import java.nio.file.Paths;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import java.util.Calendar;
import java.text.SimpleDateFormat;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParserFactory;
Expand All @@ -26,36 +31,28 @@
public class DataMachine {
private static final long startTime = System.currentTimeMillis();
private static final IEnvironmentFactory environmentFactory = SpringFactory.getInstance();
private static final ILogger logger;

static {
logger = environmentFactory.getLogger();
}



public static void main(String [] args){
System.out.println("Running JWPL DataMachine parser to generate 11 output files");
String jwplInputDir = args[0];
String jwplOutputDir = Paths.get(jwplInputDir, "output").toString();
File f = new File(jwplOutputDir);
WikiExtractParser wikiparser = new WikiExtractParser();
ParserLogger logger = new ParserLogger("DataMachine");

System.out.println("Wiki Dump Files Dir: " + jwplInputDir);
System.out.println("JWPL Output Dir: " + jwplOutputDir);

if(!f.exists()){ // Runs DataMachine if output folder does not exist
try{
logger.log.info("Running JWPL DataMachine");
runDM(jwplInputDir);
} catch(Exception e){
System.out.println("Failed!!");
wikiparser.logger.severe(e.toString());
logger.log.severe("Exception: " + e.toString());
}
} else {
System.out.println("Output folder already exists.");
}

System.out.println("Done!");
logger.log.info("DataMachine done");
}

public static void runDM(String jwplInputDir) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

/**
*
Expand All @@ -21,11 +22,11 @@ public class CategoryPagesParser {
private static Set<Integer> disambPageResCurIds; // Stores resolved Ids of disambiguation pages
private static Set<Integer> nondisambPageResCurIds; // Stores resolved Ids of non-disambiguation pages
private static Set<Integer> nondisambPageResCurIds_noList; // Stores resolved Ids of non-disambiguation pages

private ParserLogger logger = new ParserLogger("CategoryPagesParser");

public CategoryPagesParser(String outputDir) {
this.outputDir = outputDir;

logger.log.info("Parsing CategoryPages.txt");
System.out.println("CategoryPages Parser");
System.out.println("Parses category_pages.txt to produce:");
System.out.println("[1] redCurIds -> Category Tiles: " + JWPLConstants.resCurIdToCatTitles);
Expand All @@ -43,6 +44,7 @@ public CategoryPagesParser(String outputDir) {

private void writeResIdsToCatTitles(){
// Writes map from resolved cur ids to set of category titles
logger.log.info("Writes map from resolved cur ids to set of category titles");
Path filePath = Paths.get(outputDir, JWPLConstants.resCurIdToCatTitles);
File file = new File(filePath.toString());
try{
Expand All @@ -61,6 +63,7 @@ private void writeResIdsToCatTitles(){
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
Expand All @@ -69,6 +72,7 @@ private void writeResIdsToCatTitles(){
private void writeCurId2Title(Set<Integer> curIds, Path filePath) {
// Writes map from page ids to page titles
// Path filePath = Paths.get(outputDir, JWPLConstants.resCurId2ResTitle);
logger.log.info("Writes map from page ids to page titles");
System.out.println("Number of titles in resCurIdsToTitles: " + PageMapLineParser.resCurIdsToTitles.keySet().size());

File file = new File(filePath.toString());
Expand All @@ -87,6 +91,7 @@ private void writeCurId2Title(Set<Integer> curIds, Path filePath) {
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
Expand Down Expand Up @@ -173,7 +178,9 @@ public void parseCategoryPages(String CategoryPages){
nondisambPageResCurIds_noList.removeAll(disambPageResCurIds);

writeToFiles(); // Writes outputs to files
logger.log.info("Parsing of CategoryPages.txt done");
} catch (IOException e) {
logger.log.severe(e.toString());
e.printStackTrace();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

/**
* This class receives the Category.txt file as input.
Expand All @@ -21,8 +22,10 @@ public class CategoryParser {
public static Map<Integer, String> disambigCatIdToDisambCatTitle; // Maps category id to disambiguation categories
public static Map<Integer, String> idToCat; // Maps category id to category titles
private static Map<String, Integer> catToId; // Maps category titles to category id
private ParserLogger logger = new ParserLogger("CategoryParser");

public CategoryParser(String outputDir){
logger.log.info("Parsing Category.txt");
this.outputDir = outputDir;
System.out.println("Category Parser");
System.out.println("Parses Category.txt to produce:");
Expand All @@ -37,6 +40,7 @@ public CategoryParser(String outputDir){

private void writeToFiles(){
// Writes map from category id (first column) to category title (second column)
logger.log.info("Writes map from category id to category title");
Path filePath = Paths.get(outputDir, JWPLConstants.catIdToCatTitle);
File file = new File(filePath.toString());
try{
Expand All @@ -48,6 +52,7 @@ private void writeToFiles(){
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
Expand Down Expand Up @@ -75,6 +80,7 @@ public void parseCategory(String CategoryFile){
fileReader.close();
writeToFiles(); // Writes output to file
} catch (IOException e) {
logger.log.severe(e.toString());
e.printStackTrace();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.io.IOException;
import java.util.*;
import edu.illinois.cs.cogcomp.wikiparser.constants.JWPLConstants;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

/**
* This class receives the PageMapLine.txt file as input. It will
Expand All @@ -25,6 +26,7 @@ public class PageMapLineParser {
public static Map<Integer, String> curidsToTitles; // Map from all Cur Ids to page titles
public static Map<Integer, String> resCurIdsToTitles; // Map from all Cur Ids to page titles
public static Map<Integer, String> resCurIdsToTitles_nonList; // Map from all Cur Ids to page titles
private ParserLogger logger = new ParserLogger("PageMapLineParser");

public PageMapLineParser(String outputDir){
this.outputDir = outputDir;
Expand All @@ -37,7 +39,7 @@ public PageMapLineParser(String outputDir){
System.out.println("[5] Resolved List Page Ids: "+ JWPLConstants.resListPages);

System.out.println("[#] Output Folder: " + outputDir);

logger.log.info("Parsing PageMapLine.txt");
curIds = new HashSet(); // Stores all CurIds, both resolved and unresolved
resolvedCurIds = new HashSet<Integer>(); // Stores resolved Cur Ids
listPages = new HashSet<Integer>(); // Stores Cur Ids which are list pages
Expand Down Expand Up @@ -103,6 +105,7 @@ public PageMapLineParser(String outputDir){

private void writeCurId2Title(){
// Writes map from page ids to page titles
logger.log.info("Writes map from page ids to page titles");
Path filePath = Paths.get(outputDir, JWPLConstants.curId2Title);
File file = new File(filePath.toString());
try{
Expand All @@ -114,13 +117,15 @@ private void writeCurId2Title(){
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
}

private void writeResCurId2ResTitle(){
// Writes map from page ids to page titles
logger.log.info("Writes map from page ids to page titles");
Path filePath = Paths.get(outputDir, JWPLConstants.resCurId2ResTitle);
File file = new File(filePath.toString());
try{
Expand All @@ -132,13 +137,15 @@ private void writeResCurId2ResTitle(){
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
}

private void writeResCurId2ResTitle_nonList() {
// Writes map from page ids to page titles
logger.log.info("Writes map from page ids to page titles");
Path filePath = Paths.get(outputDir, JWPLConstants.resCurId2ResTitle_nonList);
File file = new File(filePath.toString());
try{
Expand All @@ -150,6 +157,7 @@ private void writeResCurId2ResTitle_nonList() {
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
Expand All @@ -175,6 +183,7 @@ private void writeResCurId2ResTitle_nonList() {

private void writeListPageIdsTitle(){
// Writes list of ids which belong to list pages
logger.log.info("Writes list of ids which belong to list pages");
Path filePath = Paths.get(outputDir, JWPLConstants.resListPageCurId2ResTitle);
File file = new File(filePath.toString());
try{
Expand All @@ -187,6 +196,7 @@ private void writeListPageIdsTitle(){
bw.close();
}
catch (IOException e){
logger.log.severe(e.toString());
e.printStackTrace();
System.exit(-1);
}
Expand Down Expand Up @@ -247,7 +257,9 @@ public void parsePageMap(String pageMapFile){
fileReader.close();
mapUnresolvedToResolved();
writeToFiles(); // Writes outputs to files
logger.log.info("Parsing of PageMapLine.txt done");
} catch (IOException e) {
logger.log.severe(e.toString());
e.printStackTrace();
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.illinois.cs.cogcomp.wikiparser.utils;

import edu.illinois.cs.cogcomp.wikiparser.constants.JWPLConstants;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import java.util.Calendar;
import java.text.SimpleDateFormat;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.logging.FileHandler;

/**
*
* @author Reuben-PC
*/
public class ParserLogger {
public Logger log;
private FileHandler fh;

public ParserLogger(String className){
String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
log = Logger.getLogger(className);
String filename = "/logs/" + className + "_" + timeStamp + ".log";

// Checks if logs directory exists. If not, create it
Path filePath = Paths.get(System.getProperty("user.dir"), "logs");
File logDir = new File(filePath.toString());
if(!logDir.exists()){
logDir.mkdir();
}

filePath = Paths.get(System.getProperty("user.dir"), filename);
File f = new File(filePath.toString());
try {
fh = new FileHandler(filePath.toString());
log.addHandler(fh);
SimpleFormatter formatter = new SimpleFormatter();
fh.setFormatter(formatter);
log.setUseParentHandlers(false);
} catch (Exception e){
e.printStackTrace();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import edu.illinois.cs.cogcomp.wikiparser.utils.FileUtils;
import edu.illinois.cs.cogcomp.wikiparser.utils.Pair;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

/**
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ public static void main(String [] args){
WikiExtractParser wikiparser = new WikiExtractParser();
wikiparser.wikiDirectory = args[0];
wikiparser.outputDir = args[1];
wikiparser.logger.info("Starting to Parse Wiki Texts");
wikiparser.extractWiki();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import edu.illinois.cs.cogcomp.wikiparser.utils.ParserLogger;

/**
* This class receives as input the output of the python parser which is a folder of folder
* of wiki text files. It reads each text file in the directory and parses them into
* serialized lists of WikiPage objects
*/
public class WikiExtractParser {
public Logger logger = Logger.getLogger("WikiExtractParser");
private ParserLogger logger = new ParserLogger("WikiExtractParser");
public String logfile = System.getProperty("user.dir") + "/logs/ExtractedWiki.log";
public String wikiDirectory;
public String outputDir;
Expand All @@ -25,20 +26,6 @@ public class WikiExtractParser {

public WikiExtractParser() {
parser = getBoundedThreadPool();
try {
// This block configure the logger with handler and formatter
File dir = new File("logs");
dir.mkdir();
fh = new FileHandler(logfile);
logger.addHandler(fh);
SimpleFormatter formatter = new SimpleFormatter();
fh.setFormatter(formatter);
logger.setUseParentHandlers(false);
// the following statement is used to log any messages
logger.info("Static Function");
} catch (Exception e) {
e.printStackTrace();
}
}

public static ThreadPoolExecutor getBoundedThreadPool() {
Expand All @@ -65,18 +52,18 @@ public void extractWiki(){
Iterator<File> i = org.apache.commons.io.FileUtils.iterateFiles(inDir, null, true);

int totalFiles = 0;

logger.log.info("Starting to Parse Wiki Texts");
// Reads all of the files in the given directory
while(i.hasNext()){
totalFiles ++;
File file = i.next();
String infilepath = file.toString();
String outfilepath = outputDir + "/tmp" + Integer.toString(totalFiles) + ".ser";

logger.log.info("Parsing Wiki Text " + Integer.toString(totalFiles));
// Give this to thread runner
parser.execute(new FileParser(infilepath, outfilepath, logger));
parser.execute(new FileParser(infilepath, outfilepath, logger.log));
}

logger.log.info("Total Files: " + Integer.toString(totalFiles));
System.out.println("[#] Total Files: " + totalFiles);
}
}

0 comments on commit a2cd67d

Please sign in to comment.