Skip to content

Commit

Permalink
Merge pull request #75 from kermitt2/revert-72-master
Browse files Browse the repository at this point in the history
Revert "Changes to make generated xmls valid according to alto 3.1 schema."
  • Loading branch information
kermitt2 authored Sep 29, 2019
2 parents 8970d47 + ad51394 commit dea10fd
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 65 deletions.
2 changes: 1 addition & 1 deletion src/ConstantsXML.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace ConstantsXML {
// All tags ALTO XML dialect
const char *TAG_ALTO = "alto";

const char *ALTO_URI = "http://www.loc.gov/standards/alto/ns-v3#";
const char *ALTO_URI = "http://www.loc.gov/standards/alto/v3/alto.xsd";

const char *TAG_DESCRIPTION = "Description";
const char *TAG_MEASUREMENTUNIT = "MeasurementUnit";
Expand Down
6 changes: 0 additions & 6 deletions src/Parameters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,6 @@ void Parameters::setReadingOrder(GBool readingOrders) {
unlockGlobalParams;
}

void Parameters::setCharReadingOrderAttr(GBool charReadingOrderAttrs) {
lockGlobalParams;
charReadingOrderAttr = charReadingOrderAttrs;
unlockGlobalParams;
}

void Parameters::setOcr(GBool ocrA) {
lockGlobalParams;
ocr = ocrA;
Expand Down
14 changes: 0 additions & 14 deletions src/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ class Parameters {
*/
GBool getReadingOrder() {return readingOrder;}

/** PL: Return the boolean that controls whether to include TYPE attributes to String elements to
* indicate right-to-left reading order (produces non-valid ALTO)
* @return <code>true</code> if the charReadingOrderAttr option is selected, <code>false</code> otherwise
*/
GBool getCharReadingOrderAttr() {return charReadingOrderAttr;}

/** Return a boolean which inform if OCR should be applied to recognize non unicode glyphs
* @return <code>true</code> if the ocr option is selected, <code>false</code> otherwise
*/
Expand Down Expand Up @@ -128,12 +122,6 @@ class Parameters {
*/
void setReadingOrder(GBool readingOrders);

/** PL: Modifiy the boolean that controls whether to include TYPE attributes to String elements to indicate
* right-to-left reading order (produces non-valid ALTO)
* @param charReadingOrderAttr <code>true</code> if the charReadingOrderAttr option is selected, <code>false</code> otherwise
*/
void setCharReadingOrderAttr(GBool charReadingOrderAttrs);

/** Modifiy the boolean which inform ocr should be applied or not
* @param readingOrder <code>true</code> if the readingOrder option is selected, <code>false</code> otherwise
*/
Expand Down Expand Up @@ -161,8 +149,6 @@ class Parameters {
GBool imageInline;
/** PL: The value of the readingOrder option */
GBool readingOrder;
/** PL: The value of the charReadingOrderAttr option */
GBool charReadingOrderAttr;
/** The value of ocr option */
GBool ocr;
/** the count limit of files */
Expand Down
56 changes: 20 additions & 36 deletions src/XmlAltoOutputDev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2842,10 +2842,6 @@ void TextPage::addWord(TextRawWord *word) {

void TextPage::addAttributTypeReadingOrder(xmlNodePtr node, char *tmp,
IWord *word) {
if (parameters->getCharReadingOrderAttr() == gFalse) {
return;
}

int nbLeft = 0;
int nbRight = 0;

Expand Down Expand Up @@ -4935,15 +4931,12 @@ void TextPage::dumpInReadingOrder(GBool useBlocks, GBool fullFontName) {
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, listeImages[i]->getHeightImage());
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);

std::string rotation = std::to_string(listeImages[i]->getRotation());
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
//if (listeImages[i]->getRotation() > 0){
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
//}
//else{
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
//}

if (listeImages[i]->getRotation() > 0){
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
}
else{
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
}
// if (listeImages[i]->isImageInline()) {
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
// }
Expand Down Expand Up @@ -5792,15 +5785,12 @@ void TextPage::dump(GBool useBlocks, GBool fullFontName) {
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, listeImages[i]->getHeightImage());
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);

std::string rotation = std::to_string(listeImages[i]->getRotation());
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
//if (listeImages[i]->getRotation() > 0){
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
//}
//else{
// xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
//}

if (listeImages[i]->getRotation() > 0){
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sTRUE);
}
else{
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)sFALSE);
}
// if (listeImages[i]->isImageInline()) {
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
// }
Expand Down Expand Up @@ -5847,14 +5837,11 @@ void TextPage::dump(GBool useBlocks, GBool fullFontName) {
snprintf(tmp, sizeof(tmp), ATTR_NUMFORMAT, svg_ymax - svg_ymin);
xmlNewProp(node, (const xmlChar *) ATTR_HEIGHT, (const xmlChar *) tmp);

std::string rotation = std::to_string(r);
xmlNewProp(node,(const xmlChar*)ATTR_ROTATION,(const xmlChar*)rotation.c_str());
//if (r > 0) {
// xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sTRUE);
//} else {
// xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sFALSE);
//}

if (r > 0) {
xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sTRUE);
} else {
xmlNewProp(node, (const xmlChar *) ATTR_ROTATION, (const xmlChar *) sFALSE);
}
// if (listeImages[i]->isImageInline()) {
// xmlNewProp(node, (const xmlChar *) ATTR_INLINE, (const xmlChar *) sTRUE);
// }
Expand Down Expand Up @@ -7309,10 +7296,8 @@ XmlAltoOutputDev::XmlAltoOutputDev(GString *fileName, GString *fileNamePdf,
xmlAddChild(nodeOCRProcessingStep, nodeProcessingDate);
time_t t;
time(&t);
char tstamp[sizeof "YYYY-MM-DDTHH:MM:SSZ"];
strftime(tstamp, sizeof tstamp, "%FT%TZ", gmtime(&t));
xmlNodeSetContent(nodeProcessingDate, (const xmlChar *) xmlEncodeEntitiesReentrant(
nodeProcessingDate->doc, (const xmlChar *) tstamp));
nodeProcessingDate->doc, (const xmlChar *) ctime(&t)));

xmlNodePtr nodeProcessingSoftware = xmlNewNode(NULL, (const xmlChar *) TAG_PROCESSINGSOFTWARE);
nodeProcessingSoftware->type = XML_ELEMENT_NODE;
Expand Down Expand Up @@ -7527,7 +7512,7 @@ void XmlAltoOutputDev::addStyles() {
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTWIDTH, (const xmlChar *) tmp);

sprintf(tmp, "%s", fontStyleInfo->getFontColor()->getCString());
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTCOLOR, (const xmlChar *) (tmp+1));
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTCOLOR, (const xmlChar *) tmp);

delete fontStyleInfo->getFontColor();

Expand Down Expand Up @@ -7556,8 +7541,7 @@ void XmlAltoOutputDev::addStyles() {
}

sprintf(tmp, "%s", fontStyle->getCString());
if ( strcmp(tmp, "") )
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTSTYLE, (const xmlChar *) tmp);
xmlNewProp(textStyleNode, (const xmlChar *) ATTR_FONTSTYLE, (const xmlChar *) tmp);

delete fontStyle;

Expand Down
8 changes: 0 additions & 8 deletions src/pdfalto.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ static GBool fullFontName = gFalse;
static GBool noImageInline = gFalse;
static GBool annots = gFalse;
static GBool readingOrder = gFalse;
static GBool charReadingOrderAttr = gFalse;
static GBool ocr = gFalse;

static char ownerPassword[33] = "\001";
Expand Down Expand Up @@ -99,8 +98,6 @@ static ArgDesc argDesc[] = {
"add blocks informations within the structure"},
{"-readingOrder", argFlag, &readingOrder, 0,
"blocks follow the reading order"},
{"-charReadingOrderAttr", argFlag, &charReadingOrderAttr, 0,
"include TYPE attribute to String elements to indicate right-to-left reading order (not valid ALTO)"},
// {"-ocr", argFlag, &ocr, 0,
// "recognises all characters that are missing from unicode."},
{"-fullFontName", argFlag, &fullFontName, 0,
Expand Down Expand Up @@ -218,11 +215,6 @@ int main(int argc, char *argv[]) {
cmd->append("-readingOrder ");
}

if (charReadingOrderAttr) {
parameters->setCharReadingOrderAttr(gTrue);
cmd->append("-charReadingOrderAttr ");
}

if (ocr) {
parameters->setOcr(gTrue);
cmd->append("-ocr ");
Expand Down

0 comments on commit dea10fd

Please sign in to comment.