diff mbox

[PING] _Cilk_for for C and C++

Message ID BF230D13CA30DD48930C31D4099330003A4CCDA4@FMSMSX101.amr.corp.intel.com
State New
Headers show

Commit Message

Iyer, Balaji V Feb. 7, 2014, 10:14 p.m. UTC
Hi Jakub,
	Attached, please find a fixed patch. Along with it, I have also added 2 changelog files for C and C++ respectively.

Thanks,

Balaji V. Iyer.

> -----Original Message-----
> From: Jakub Jelinek [mailto:jakub@redhat.com]
> Sent: Friday, February 7, 2014 9:53 AM
> To: Iyer, Balaji V
> Cc: 'Jason Merrill'; 'Jeff Law'; 'Aldy Hernandez'; 'gcc-patches@gcc.gnu.org';
> 'rth@redhat.com'
> Subject: Re: [PING] [PATCH] _Cilk_for for C and C++
> 
> On Fri, Feb 07, 2014 at 02:33:41PM +0000, Iyer, Balaji V wrote:
> > > So, the issues I see:
> > > 1) what is iter.1, why do you have it at all, and, after all, the
> > > iterator is a class that needs to be constructed/destructed in the
> > > general way, so creating any further copies of something is both
> > > costly and undesirable
> > >
> >
> > Well, to get the loop count, I need to calculate it using operator-(array.end
> (), &iter).
> >
> > Now, if I do that iter is already set. I need to reset iter back to
> > the original one (array.begin ()) in the child function.  This is why
> > I used a temporary variable called iter1.
> 
> operator- shouldn't really change iter, if it does, it is purely the user's fault,
> isn't it?  It isn't operator -=, so it shouldn't really change array.end () either.
> 

This is fixed. Instead of creating a variable and doing the manual copying, I added a FIRSTPRIVATE clause.

> > > 2) the schedule clause doesn't belong on the omp parallel, but on
> > > the _Cilk_for
> > >
> >
> > What if grain is a variable say "x"? If I have it in the _Cilk_for,
> > then won't it create omp_data_i->x.  That is not correct.  It should
> > just emit "x." But let me look into this to make sure...
> 
> You certainly should gimplify the clause operand before the omp parallel, it
> must be an integral anyway, right?  So just use get_temp_regvar?
> Then simply use firstprivate on the #pragma omp parallel.  When you actually
> omp expand, you'll still be able to find the original variable and look it up on
> the parallel.  But, if you can't make it work, guess I could live with the clause
> on the parallel.
> 

This is fixed too.

> > > 3) iter should be firstprivate, and there should be no explicit
> > > private var with assignment during gimplification, just handle it
> > > like any other firstprivate during omp lowering
> > >

Yes this is what I did.

> >
> > Do you mean to say I should manually insert a firstprivate for iter
> > and not the system figure out that it is shared?
> 
> Yes.  The class iterator is quite special thing, because already the C++ FE
> lowers it to an integral iterator instead.  And when you make it firstprivate,
> omp lowering/expansion should take care of running the copy
> constructor/destructor in the parallel for you.
> 

I have also fixed the gimple/tree pretty print issue also. Is this OK?

Thanks,

Balaji V. Iyer.

Comments

Jakub Jelinek Feb. 10, 2014, 5:57 p.m. UTC | #1
On Fri, Feb 07, 2014 at 10:14:21PM +0000, Iyer, Balaji V wrote:
> 	Attached, please find a fixed patch. Along with it, I have also
> added 2 changelog files for C and C++ respectively.

Have you even looked at the second testcase I've posted?
gimplification ICEs on it with your latest patch, because firstprivate
clause is added for the same variable multiple times, and it seems parallel
still isn't around _Cilk_for.

	Jakub
diff mbox

Patch

diff --git a/gcc/c-family/c-cilkplus.c b/gcc/c-family/c-cilkplus.c
index 1a16f66..328f014 100644
--- a/gcc/c-family/c-cilkplus.c
+++ b/gcc/c-family/c-cilkplus.c
@@ -91,3 +91,53 @@  c_finish_cilk_clauses (tree clauses)
     }
   return clauses;
 }
+
+/* Structure used to pass information into a walk_tree function and
+   find_cilk_for.  */
+struct clause_struct
+{
+  bool is_set;
+  tree clauses;
+};
+
+/* Helper function for walk_tree used in cilk_for_move_clauses_upward.
+   If *TP is a CILK_FOR statement, then set *DATA (type-casted to 
+   struct clause_struct) with its clauses.  */
+
+static tree
+find_cilk_for (tree *tp, int *walk_subtrees, void *data)
+{
+  struct clause_struct *cstruct = (struct clause_struct *) data;
+  if (*tp && TREE_CODE (*tp) == CILK_FOR && !cstruct->is_set)
+    {
+      cstruct->is_set = true;
+      cstruct->clauses = OMP_FOR_CLAUSES (*tp);
+      *walk_subtrees = 0;
+      OMP_FOR_CLAUSES (*tp) = NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
+/* Moves the IF-CLAUSE and SCHEDULE clause from _CILK_FOR statment in
+   STMT into *PARALLEL_CLAUSES.  */
+ 
+void
+cilk_for_move_clauses_upward (tree *parallel_clauses, tree stmt)
+{
+  struct clause_struct cstruct;
+  cstruct.is_set = false;
+  cstruct.clauses = NULL_TREE;
+  walk_tree (&stmt, find_cilk_for, (void *) &cstruct, NULL);
+
+  tree clauses = cstruct.clauses;
+  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SCHEDULE
+	|| OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+      {
+	if (*parallel_clauses)
+	  OMP_CLAUSE_CHAIN (*parallel_clauses) = c;
+	else
+	  *parallel_clauses = c;
+      }
+}
+
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 50cc848..514a084 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -414,6 +414,7 @@  const struct c_common_resword c_common_reswords[] =
   { "_Complex",		RID_COMPLEX,	0 },
   { "_Cilk_spawn",      RID_CILK_SPAWN, 0 },
   { "_Cilk_sync",       RID_CILK_SYNC,  0 },
+  { "_Cilk_for",        RID_CILK_FOR,   0 },
   { "_Imaginary",	RID_IMAGINARY, D_CONLY },
   { "_Decimal32",       RID_DFLOAT32,  D_CONLY | D_EXT },
   { "_Decimal64",       RID_DFLOAT64,  D_CONLY | D_EXT },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index f074ab1..33e1929 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -149,7 +149,7 @@  enum rid
   RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
 
   /* Cilk Plus keywords.  */
-  RID_CILK_SPAWN, RID_CILK_SYNC,
+  RID_CILK_SPAWN, RID_CILK_SYNC, RID_CILK_FOR,
   
   /* Objective-C ("AT" reserved words - they are only keywords when
      they follow '@')  */
@@ -1203,7 +1203,7 @@  extern void c_finish_omp_flush (location_t);
 extern void c_finish_omp_taskwait (location_t);
 extern void c_finish_omp_taskyield (location_t);
 extern tree c_finish_omp_for (location_t, enum tree_code, tree, tree, tree,
-			      tree, tree, tree);
+			      tree, tree, tree, tree *);
 extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
 				 tree, tree *);
 extern tree c_omp_declare_simd_clauses_to_numbers (tree, tree);
@@ -1389,4 +1389,5 @@  extern tree make_cilk_frame (tree);
 extern tree create_cilk_function_exit (tree, bool, bool);
 extern tree cilk_install_body_pedigree_operations (tree);
 extern void cilk_outline (tree, tree *, void *);
+extern void cilk_for_move_clauses_upward (tree *, tree);
 #endif /* ! GCC_C_COMMON_H */
diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index dd0a45d..8259979 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -386,17 +386,18 @@  c_omp_for_incr_canonicalize_ptr (location_t loc, tree decl, tree incr)
    INITV, CONDV and INCRV are vectors containing initialization
    expressions, controlling predicates and increment expressions.
    BODY is the body of the loop and PRE_BODY statements that go before
-   the loop.  */
+   the loop.  *COUNT is the loop-count used solely by a _Cilk_for statment.  */
 
 tree
 c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
-		  tree initv, tree condv, tree incrv, tree body, tree pre_body)
+		  tree initv, tree condv, tree incrv, tree body,
+		  tree pre_body, tree *count)
 {
   location_t elocus;
   bool fail = false;
   int i;
-
-  if (code == CILK_SIMD
+  tree orig_init = NULL_TREE, orig_end = NULL_TREE, orig_step = NULL_TREE;
+  if ((code == CILK_SIMD || code == CILK_FOR) 
       && !c_check_cilk_loop (locus, TREE_VEC_ELT (declv, 0)))
     fail = true;
 
@@ -422,6 +423,8 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	  fail = true;
 	}
 
+      if (TREE_CODE (init) == MODIFY_EXPR)
+	orig_init = TREE_OPERAND (init, 1);
       /* In the case of "for (int i = 0...)", init will be a decl.  It should
 	 have a DECL_INITIAL that we can turn into an assignment.  */
       if (init == decl)
@@ -436,6 +439,7 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      fail = true;
 	    }
 
+	  orig_init = init;
 	  init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR,
 	      			    /* FIXME diagnostics: This should
 				       be the location of the INIT.  */
@@ -526,9 +530,20 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 					    0))
 		    TREE_SET_CODE (cond, TREE_CODE (cond) == NE_EXPR
 					 ? LT_EXPR : GE_EXPR);
-		  else if (code != CILK_SIMD)
+		  else if (code != CILK_SIMD && code != CILK_FOR)
 		    cond_ok = false;
 		}
+	      if (flag_cilkplus && code == CILK_FOR)
+		{ 
+		  orig_end = TREE_OPERAND (cond, 1); 
+		  tree add_expr = build_zero_cst (TREE_TYPE (orig_end)); 
+		  if (TREE_CODE (cond) == LE_EXPR) 
+		    add_expr = build_one_cst (TREE_TYPE (orig_end)); 
+		  else if (TREE_CODE (cond) == GE_EXPR) 
+		    add_expr = build_int_cst (TREE_TYPE (orig_end), -1); 
+		  orig_end = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_end), 
+					  orig_end, add_expr);
+		}
 	    }
 
 	  if (!cond_ok)
@@ -561,6 +576,18 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      if (TREE_OPERAND (incr, 0) != decl)
 		break;
 
+	      if (TREE_CODE (incr) == POSTINCREMENT_EXPR
+		  || TREE_CODE (incr) == PREINCREMENT_EXPR)
+		orig_step = build_one_cst (TREE_TYPE (incr));
+	      else
+		orig_step = integer_minus_one_node;
+ 
+	      if (POINTER_TYPE_P (TREE_TYPE (incr)))
+		{
+		  tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (incr)));
+		  orig_step = fold_build2 (MULT_EXPR, TREE_TYPE (orig_step),
+					   orig_step, unit);
+		}
 	      incr_ok = true;
 	      incr = c_omp_for_incr_canonicalize_ptr (elocus, decl, incr);
 	      break;
@@ -579,14 +606,24 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	      if (TREE_CODE (TREE_OPERAND (incr, 1)) == PLUS_EXPR
 		  && (TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl
 		      || TREE_OPERAND (TREE_OPERAND (incr, 1), 1) == decl))
-		incr_ok = true;
+		{
+		  if (TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl)
+		    orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 1);
+		  else
+		    orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 0);
+		  incr_ok = true;
+		}
 	      else if ((TREE_CODE (TREE_OPERAND (incr, 1)) == MINUS_EXPR
 			|| (TREE_CODE (TREE_OPERAND (incr, 1))
 			    == POINTER_PLUS_EXPR))
 		       && TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl)
-		incr_ok = true;
+		{
+		  orig_step = TREE_OPERAND (TREE_OPERAND (incr, 1), 1);
+		  incr_ok = true;
+		}
 	      else
 		{
+		  orig_step = TREE_OPERAND (incr, 1);
 		  tree t = check_omp_for_incr_expr (elocus,
 						    TREE_OPERAND (incr, 1),
 						    decl);
@@ -609,6 +646,17 @@  c_finish_omp_for (location_t locus, enum tree_code code, tree declv,
 	    }
 	}
 
+      /* These variables could be NULL if an error occurred.  */
+      if (flag_cilkplus && code == CILK_FOR 
+	  && orig_end && orig_init && orig_step)
+	{
+	  /* Count is used by _Cilk_for and that will always have
+	     collapse = 1.  */
+	  *count = fold_build2 (MINUS_EXPR, TREE_TYPE (orig_end), orig_end,
+				orig_init);
+	  *count = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (*count), *count,
+				orig_step);
+	}
       TREE_VEC_ELT (initv, i) = init;
       TREE_VEC_ELT (incrv, i) = incr;
     }
diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 07d23ac..e0f3561 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1394,6 +1394,11 @@  init_pragma (void)
 
   cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				false);
+
+  if (flag_cilkplus && !flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
+				  PRAGMA_CILK_GRAINSIZE, true, false);
+
 #ifdef HANDLE_PRAGMA_PACK_WITH_EXPANSION
   c_register_pragma_with_expansion (0, "pack", handle_pragma_pack);
 #else
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 6f1bf74..b9f09ba 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -55,6 +55,9 @@  typedef enum pragma_kind {
   /* Top level clause to handle all Cilk Plus pragma simd clauses.  */
   PRAGMA_CILK_SIMD,
 
+  /* This pragma handles setting of grainsize for a _Cilk_for.  */
+  PRAGMA_CILK_GRAINSIZE,
+
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 8a4868b..83e53fd 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -1248,10 +1248,11 @@  static bool c_parser_objc_diagnose_bad_element_prefix
   (c_parser *, struct c_declspecs *);
 
 /* Cilk Plus supporting routines.  */
-static void c_parser_cilk_simd (c_parser *);
+static void c_parser_cilk_simd (c_parser *, bool, tree);
 static bool c_parser_cilk_verify_simd (c_parser *, enum pragma_context);
 static tree c_parser_array_notation (location_t, c_parser *, tree, tree);
 static tree c_parser_cilk_clause_vectorlength (c_parser *, tree, bool);
+static void c_parser_cilk_grainsize (c_parser *);
 
 /* Parse a translation unit (C90 6.7, C99 6.9).
 
@@ -4878,6 +4879,16 @@  c_parser_statement_after_labels (c_parser *parser)
 	case RID_FOR:
 	  c_parser_for_statement (parser, false);
 	  break;
+	case RID_CILK_FOR:
+	  if (!flag_cilkplus)
+	    {
+	      error_at (c_parser_peek_token (parser)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      c_parser_skip_to_end_of_block_or_statement (parser);
+	    }
+	  else
+	    c_parser_cilk_simd (parser, true, integer_zero_node);
+	  break;
 	case RID_CILK_SYNC:
 	  c_parser_consume_token (parser);
 	  c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
@@ -9496,7 +9507,24 @@  c_parser_pragma (c_parser *parser, enum pragma_context context)
       if (!c_parser_cilk_verify_simd (parser, context))
 	return false;
       c_parser_consume_pragma (parser);
-      c_parser_cilk_simd (parser);
+      c_parser_cilk_simd (parser, false, NULL_TREE);
+      return false;
+    case PRAGMA_CILK_GRAINSIZE:
+      if (!flag_cilkplus)
+	{
+	  warning (0, "%<#pragma grainsize%> ignored because -fcilkplus is not"
+		   " enabled");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      if (context == pragma_external)
+	{
+	  error_at (c_parser_peek_token (parser)->location,
+		    "%<#pragma grainsize%> must be inside a function");
+	  c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
+	  return false;
+	}
+      c_parser_cilk_grainsize (parser);
       return false;
 
     default:
@@ -11591,7 +11619,7 @@  c_parser_omp_flush (c_parser *parser)
 
 static tree
 c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
-		       tree clauses, tree *cclauses)
+		       tree clauses, tree grain, tree *cclauses)
 {
   tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl;
   tree declv, condv, incrv, initv, ret = NULL;
@@ -11599,6 +11627,7 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   int i, collapse = 1, nbraces = 0;
   location_t for_loc;
   vec<tree, va_gc> *for_block = make_tree_vector ();
+  tree count = NULL_TREE;
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
@@ -11611,11 +11640,18 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   condv = make_tree_vec (collapse);
   incrv = make_tree_vec (collapse);
 
-  if (!c_parser_next_token_is_keyword (parser, RID_FOR))
+  if (code != CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_FOR))
     {
       c_parser_error (parser, "for statement expected");
       return NULL;
     }
+  if (code == CILK_FOR
+      && !c_parser_next_token_is_keyword (parser, RID_CILK_FOR))
+    {
+      c_parser_error (parser, "_Cilk_for statement expected");
+      return NULL;
+    }
   for_loc = c_parser_peek_token (parser)->location;
   c_parser_consume_token (parser);
 
@@ -11693,7 +11729,7 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 	    case LE_EXPR:
 	      break;
 	    case NE_EXPR:
-	      if (code == CILK_SIMD)
+	      if (code == CILK_SIMD || code == CILK_FOR)
 		break;
 	      /* FALLTHRU.  */
 	    default:
@@ -11827,7 +11863,7 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
   if (!fail)
     {
       stmt = c_finish_omp_for (loc, code, declv, initv, condv,
-			       incrv, body, NULL);
+			       incrv, body, NULL, &count);
       if (stmt)
 	{
 	  if (cclauses != NULL
@@ -11867,6 +11903,24 @@  c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
 		  }
 	    }
 	  OMP_FOR_CLAUSES (stmt) = clauses;
+	  /* If it is a _Cilk_for statement, then the OMP_FOR_CLAUSES location
+	     stores the user-defined grain value or an integer_zero_node 
+	     indicating that the runtime must compute a suitable grain, inside
+	     a SCHEDULE clause.  Similarly the loop-count is also stored in
+	     a IF clause.  These clauses do not make sense for _Cilk_for but
+	     it is just used to transmit information.  */
+	  if (code == CILK_FOR)
+	    {
+	      tree l = build_omp_clause (EXPR_LOCATION (grain),
+					 OMP_CLAUSE_SCHEDULE);
+	      OMP_CLAUSE_SCHEDULE_KIND (l) = OMP_CLAUSE_SCHEDULE_CILKFOR;
+	      OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (l) = grain;
+	      OMP_CLAUSE_CHAIN (l) = OMP_FOR_CLAUSES (stmt);
+	      tree c = build_omp_clause (EXPR_LOCATION (count), OMP_CLAUSE_IF);
+	      OMP_CLAUSE_IF_EXPR (c) = count;
+	      OMP_CLAUSE_CHAIN (c) = l;
+	      OMP_FOR_CLAUSES (stmt) = c;
+	    }
 	}
       ret = stmt;
     }
@@ -11931,7 +11985,8 @@  c_parser_omp_simd (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_SIMD, clauses, cclauses);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_SIMD, clauses, NULL_TREE,
+			       cclauses);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -12011,7 +12066,8 @@  c_parser_omp_for (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_FOR, clauses, cclauses);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_FOR, clauses, NULL_TREE,
+			       cclauses);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -12494,7 +12550,8 @@  c_parser_omp_distribute (location_t loc, c_parser *parser,
     }
 
   block = c_begin_compound_stmt (true);
-  ret = c_parser_omp_for_loop (loc, parser, OMP_DISTRIBUTE, clauses, NULL);
+  ret = c_parser_omp_for_loop (loc, parser, OMP_DISTRIBUTE, clauses, NULL_TREE,
+			       NULL);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
 
@@ -13771,18 +13828,84 @@  c_parser_cilk_all_clauses (c_parser *parser)
   return c_finish_cilk_clauses (clauses);
 }
 
-/* Main entry point for parsing Cilk Plus <#pragma simd> for
-   loops.  */
+/* This function helps parse the grainsize pragma for a _Cilk_for statement. 
+   Here is the correct syntax of this pragma: 
+	    #pragma cilk grainsize = <EXP> 
+ */
 
 static void
-c_parser_cilk_simd (c_parser *parser)
+c_parser_cilk_grainsize (c_parser *parser)
 {
-  tree clauses = c_parser_cilk_all_clauses (parser);
+  extern tree convert_to_integer (tree, tree);
+
+  /* consume the 'grainsize' keyword.  */
+  c_parser_consume_pragma (parser);
+
+  if (c_parser_require (parser, CPP_EQ, "expected %<=%>") != 0)
+    {
+      struct c_expr g_expr = c_parser_binary_expression (parser, NULL, NULL);
+      if (g_expr.value && TREE_CODE (g_expr.value) == C_MAYBE_CONST_EXPR)
+	{
+	  error_at (input_location, "cannot convert grain to long integer.\n");
+	  c_parser_skip_to_pragma_eol (parser);
+	}   
+      else if (g_expr.value && g_expr.value != error_mark_node)
+	{
+	  c_parser_skip_to_pragma_eol (parser);
+	  c_token *token = c_parser_peek_token (parser);
+	  if (token && token->type == CPP_KEYWORD
+	      && token->keyword == RID_CILK_FOR)
+	    {
+	      /* Remove EXCESS_PRECISION_EXPR since we are going to convert
+		 it to long int.  */
+	      if (TREE_CODE (g_expr.value) == EXCESS_PRECISION_EXPR)
+		g_expr.value = TREE_OPERAND (g_expr.value, 0);
+	      tree grain = convert_to_integer (long_integer_type_node,
+					       g_expr.value);
+	      if (grain && grain != error_mark_node) 
+		c_parser_cilk_simd (parser, true, grain);
+	    }
+	  else
+	    warning (0, "grainsize pragma is not followed by %<_Cilk_for%>");
+	}
+      else
+	c_parser_skip_to_pragma_eol (parser);
+    }
+  else
+    c_parser_skip_to_pragma_eol (parser);
+}
+
+/* Main entry point for parsing Cilk Plus <#pragma simd> for and
+   _Cilk_for loops.  If IS_CILK_FOR is true then it is a _Cilk_for loop 
+   and GRAIN is the grain value passed in through pragma or 0.  */
+
+static void
+c_parser_cilk_simd (c_parser *parser, bool is_cilk_for, tree grain)
+{
+  tree super_block = NULL_TREE;
+  tree clauses = NULL_TREE;
+  
+  if (!is_cilk_for)
+    clauses = c_parser_cilk_all_clauses (parser);
+  else
+    super_block = c_begin_omp_parallel ();
   tree block = c_begin_compound_stmt (true);
   location_t loc = c_parser_peek_token (parser)->location;
-  c_parser_omp_for_loop (loc, parser, CILK_SIMD, clauses, NULL);
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  c_parser_omp_for_loop (loc, parser, code, clauses, grain, NULL);
   block = c_end_compound_stmt (loc, block, true);
   add_stmt (block);
+  if (is_cilk_for)
+    {
+      /* Move all the clauses from the #pragma OMP for to #pragma omp parallel.
+	 This is because if these values are not integers and it is placed in
+	 OMP_FOR then the compiler will insert value chains for them.  */
+      tree parallel_clauses = NULL_TREE;
+      cilk_for_move_clauses_upward (&parallel_clauses, super_block);
+    /* The term super_block is not used in scheduling terms but in 
+       set-theory, i.e. set vs. super-set.  */ 
+      c_finish_omp_parallel (loc, parallel_clauses, super_block);
+    }
 }
 
 /* Parse a transaction attribute (GCC Extension).
diff --git a/gcc/cilk-builtins.def b/gcc/cilk-builtins.def
index 9f3240a..bf319d5 100644
--- a/gcc/cilk-builtins.def
+++ b/gcc/cilk-builtins.def
@@ -31,3 +31,5 @@  DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SYNC, "__cilkrts_sync")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_LEAVE_FRAME, "__cilkrts_leave_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_POP_FRAME, "__cilkrts_pop_frame")
 DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_SAVE_FP, "__cilkrts_save_fp_ctrl_state")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_32, "__cilkrts_cilk_for_32")
+DEF_CILK_BUILTIN_STUB (BUILT_IN_CILK_FOR_64, "__cilkrts_cilk_for_64")
diff --git a/gcc/cilk-common.c b/gcc/cilk-common.c
index afe88c9..bf4e83a 100644
--- a/gcc/cilk-common.c
+++ b/gcc/cilk-common.c
@@ -106,6 +106,27 @@  install_builtin (const char *name, tree fntype, enum built_in_function code,
   return fndecl;
 }
 
+/* Returns a FUNCTION_DECL of type TYPE whose name is *NAME.  */
+
+static tree 
+declare_cilk_for_builtin (const char *name, tree type, 
+			  enum built_in_function code)
+{
+  tree cb, ft, fn;
+
+  cb = build_function_type_list (void_type_node,
+                                 ptr_type_node, type, type,
+                                 NULL_TREE);
+  cb = build_pointer_type (cb);
+  ft = build_function_type_list (void_type_node,
+                                 cb, ptr_type_node, type,
+                                 integer_type_node, NULL_TREE);
+  fn = install_builtin (name, ft, code, false);
+  TREE_NOTHROW (fn) = 0;
+
+  return fn;
+}
+
 /* Creates and initializes all the built-in Cilk keywords functions and three
    structures: __cilkrts_stack_frame, __cilkrts_pedigree and __cilkrts_worker.
    Detailed information about __cilkrts_stack_frame and
@@ -269,6 +290,14 @@  cilk_init_builtins (void)
   cilk_save_fp_fndecl = install_builtin ("__cilkrts_save_fp_ctrl_state", 
 					 fptr_fun, BUILT_IN_CILK_SAVE_FP,
 					 false);
+  /* __cilkrts_cilk_for_32 (...);  */
+  cilk_for_32_fndecl = declare_cilk_for_builtin ("__cilkrts_cilk_for_32", 
+						 unsigned_intSI_type_node, 
+						 BUILT_IN_CILK_FOR_32);
+  /* __cilkrts_cilk_for_64 (...);  */
+  cilk_for_64_fndecl = declare_cilk_for_builtin ("__cilkrts_cilk_for_64", 
+						 unsigned_intDI_type_node, 
+						 BUILT_IN_CILK_FOR_64);
 }
 
 /* Get the appropriate frame arguments for CALL that is of type CALL_EXPR.  */
diff --git a/gcc/cilk.h b/gcc/cilk.h
index ae96f53..1fee929 100644
--- a/gcc/cilk.h
+++ b/gcc/cilk.h
@@ -40,6 +40,9 @@  enum cilk_tree_index  {
   CILK_TI_F_POP,                      /* __cilkrts_pop_frame (...).  */
   CILK_TI_F_RETHROW,                  /* __cilkrts_rethrow (...).  */
   CILK_TI_F_SAVE_FP,                  /* __cilkrts_save_fp_ctrl_state (...).  */
+  CILK_TI_F_LOOP_32,                  /* __cilkrts_cilk_for_32 (...).  */
+  CILK_TI_F_LOOP_64,                  /* __cilkrts_cilk_for_64 (...).  */
+
   /* __cilkrts_stack_frame struct fields.  */
   CILK_TI_FRAME_FLAGS,                /* stack_frame->flags.  */
   CILK_TI_FRAME_PARENT,               /* stack_frame->parent.  */
@@ -77,6 +80,8 @@  extern GTY (()) tree cilk_trees[CILK_TI_MAX];
 #define cilk_rethrow_fndecl           cilk_trees[CILK_TI_F_RETHROW]
 #define cilk_pop_fndecl               cilk_trees[CILK_TI_F_POP]
 #define cilk_save_fp_fndecl           cilk_trees[CILK_TI_F_SAVE_FP]
+#define cilk_for_32_fndecl            cilk_trees[CILK_TI_F_LOOP_32]
+#define cilk_for_64_fndecl            cilk_trees[CILK_TI_F_LOOP_64]
 
 #define cilk_worker_type_fndecl       cilk_trees[CILK_TI_WORKER_TYPE]
 #define cilk_frame_type_decl          cilk_trees[CILK_TI_FRAME_TYPE]
diff --git a/gcc/cp/cp-cilkplus.c b/gcc/cp/cp-cilkplus.c
index f3a2aff..29661ab 100644
--- a/gcc/cp/cp-cilkplus.c
+++ b/gcc/cp/cp-cilkplus.c
@@ -143,3 +143,122 @@  cilk_install_body_with_frame_cleanup (tree fndecl, tree orig_body, void *wd)
 			    &list);
 }
 
+/* Returns all the statments till CILK_FOR statement in *STMT_LIST.  Removes
+   those statements from STMT_LIST and upate STMT_LIST accordingly.  */
+
+tree
+copy_tree_till_cilk_for (tree *stmt_list)
+{
+  gcc_assert (TREE_CODE (*stmt_list) == STATEMENT_LIST);
+  tree new_stmt_list  = alloc_stmt_list ();
+  tree_stmt_iterator tsi;
+  for (tsi = tsi_start (*stmt_list); !tsi_end_p (tsi);)
+    if (TREE_CODE (tsi_stmt (tsi)) != CILK_FOR)
+      {
+	append_to_statement_list (tsi_stmt (tsi), &new_stmt_list); 
+	tsi_delink (&tsi);
+      }
+    else
+      tsi_next (&tsi);
+    
+  return new_stmt_list;
+}
+
+/* Structure to hold the list of variables that are being killed in a
+   statement list.  This structure is only used in a WALK_TREE function.  */
+struct cilk_for_var_list
+{
+  vec <tree, va_gc> *list;
+};
+
+/* Helper function for WALK_TREE used in find_killed_vars function.  
+   Returns all the variables that are being killed (or set) in *TP.  
+   *DATA holds the structure to hold the variable list.  */
+
+static tree
+find_vars (tree *tp, int *walk_subtrees, void *data)
+{
+  struct cilk_for_var_list *vlist = (struct cilk_for_var_list *) data;
+
+  if (!tp || !*tp)
+    return NULL_TREE;
+
+  if (TREE_CODE (*tp) == INIT_EXPR || TREE_CODE (*tp) == MODIFY_EXPR)
+    {
+      vec_safe_push (vlist->list, TREE_OPERAND (*tp, 0));
+      *walk_subtrees = 0;
+    }
+  return NULL_TREE;
+}
+
+/* Returns a vector of TREES that will hold the variable that
+   is killed (i.e. written or set) in STMT_LIST.  */
+
+static vec <tree, va_gc> *
+find_killed_vars (tree stmt_list)
+{
+  struct cilk_for_var_list vlist;
+  memset (&vlist, 0, sizeof (vlist));
+  cp_walk_tree (&stmt_list, find_vars, &vlist, NULL);
+  return vlist.list;
+}
+
+/* Inserts OMP_CLAUSE_FIRSTPRIVATE clauses into *CLAUSES for each variables
+   in *LIST.  */
+
+static void
+insert_firstpriv_clauses (vec <tree, va_gc> *list, tree *clauses)
+{
+  if (vec_safe_is_empty (list))
+    return;
+
+  tree lhs;
+  unsigned ix;
+  FOR_EACH_VEC_SAFE_ELT (list, ix, lhs)
+    {
+      tree new_clause = build_omp_clause (EXPR_LOCATION (lhs),
+					  OMP_CLAUSE_FIRSTPRIVATE);
+      OMP_CLAUSE_DECL (new_clause) = lhs;
+      OMP_CLAUSE_CHAIN (new_clause) = *clauses;
+      *clauses = new_clause;
+    }
+}
+
+/* Returns a BIND_EXPR with BIND_EXPR_VARS holding VARS and BIND_EXPR_BODY
+   contains STMT_LIST and CFOR_PAR_LIST.  */
+
+tree
+cilk_for_create_bind_expr (tree vars, tree stmt_list, tree cfor_par_list)
+{
+  gcc_assert (TREE_CODE (stmt_list) == STATEMENT_LIST);
+  tree_stmt_iterator tsi;
+  tree return_expr = make_node (BIND_EXPR);
+  BIND_EXPR_BODY (return_expr) = alloc_stmt_list ();
+  bool found = false; 
+  vec <tree, va_gc> *cfor_vars = find_killed_vars (stmt_list);
+
+  insert_firstpriv_clauses (cfor_vars, &OMP_PARALLEL_CLAUSES (cfor_par_list));
+
+  /* If there is a supplied list of vars then there is no reason to find them 
+     again.  */
+  if (vars != NULL_TREE)
+    found = true;
+
+  BIND_EXPR_VARS (return_expr) = vars;
+  for (tsi = tsi_start (stmt_list); !tsi_end_p (tsi); tsi_next (&tsi))
+    {
+      /* Only do the adding of BIND_EXPR_VARS the first time since they are
+	 already "chained-on."  */
+      if (!found && TREE_CODE (tsi_stmt (tsi)) == DECL_EXPR)
+	{
+	  tree var = DECL_EXPR_DECL (tsi_stmt (tsi));
+	  BIND_EXPR_VARS (return_expr) = var;
+	  found = true;
+	}
+      else
+	append_to_statement_list (tsi_stmt (tsi),
+				  &BIND_EXPR_BODY (return_expr));
+    }
+  append_to_statement_list (cfor_par_list, &BIND_EXPR_BODY (return_expr));
+  return return_expr;
+}
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 7681b27..c665384 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6206,6 +6206,8 @@  extern void vtv_build_vtable_verify_fndecl      (void);
 
 /* In cp-cilkplus.c.  */
 extern bool cpp_validate_cilk_plus_loop		(tree);
+extern tree copy_tree_till_cilk_for             (tree *);
+extern tree cilk_for_create_bind_expr           (tree, tree, tree);
 
 /* In cp/cp-array-notations.c */
 extern tree expand_array_notation_exprs         (tree);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index f0722d6..d661d4b 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -237,8 +237,8 @@  static void cp_parser_initial_pragma
 static tree cp_literal_operator_id
   (const char *);
 
-static void cp_parser_cilk_simd
-  (cp_parser *, cp_token *);
+static tree cp_parser_cilk_simd
+  (cp_parser *, cp_token *, tree);
 static bool cp_parser_omp_declare_reduction_exprs
   (tree, cp_parser *);
 static tree cp_parser_cilk_simd_vectorlength 
@@ -9368,6 +9368,18 @@  cp_parser_statement (cp_parser* parser, tree in_statement_expr,
 	  statement = cp_parser_iteration_statement (parser, false);
 	  break;
 
+	case RID_CILK_FOR:
+	  if (!flag_cilkplus)
+	    {
+	      error_at (cp_lexer_peek_token (parser->lexer)->location,
+			"-fcilkplus must be enabled to use %<_Cilk_for%>");
+	      cp_lexer_consume_token (parser->lexer);
+	      statement = error_mark_node;
+	    }
+	  else
+	    statement = cp_parser_cilk_simd (parser, NULL, integer_zero_node);
+	  break;
+	  
 	case RID_BREAK:
 	case RID_CONTINUE:
 	case RID_RETURN:
@@ -28835,7 +28847,7 @@  cp_parser_omp_for_cond (cp_parser *parser, tree decl, enum tree_code code)
     case LE_EXPR:
       break;
     case NE_EXPR:
-      if (code == CILK_SIMD)
+      if (code == CILK_SIMD || code == CILK_FOR)
 	break;
       /* Fall through: OpenMP disallows NE_EXPR.  */
     default:
@@ -29131,7 +29143,7 @@  cp_parser_omp_for_loop_init (cp_parser *parser,
 
 static tree
 cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
-			tree *cclauses)
+			tree *cclauses, tree *cfor_block)
 {
   tree init, cond, incr, body, decl, pre_body = NULL_TREE, ret;
   tree real_decl, initv, condv, incrv, declv;
@@ -29160,11 +29172,18 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       bool add_private_clause = false;
       location_t loc;
 
-      if (!cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+      if (code == CILK_SIMD
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
 	{
 	  cp_parser_error (parser, "for statement expected");
 	  return NULL;
 	}
+      if (code == CILK_FOR
+	  && !cp_lexer_next_token_is_keyword (parser->lexer, RID_CILK_FOR))
+	{
+	  cp_parser_error (parser, "_Cilk_for statement expected");
+	  return NULL;
+	}
       loc = cp_lexer_consume_token (parser->lexer)->location;
 
       if (!cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN))
@@ -29173,13 +29192,26 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
       init = decl = real_decl = NULL;
       this_pre_body = push_stmt_list ();
 
+      if (code == CILK_FOR
+	  && cp_lexer_next_token_is_keyword (parser->lexer, RID_STATIC))
+	{
+	  error_at (cp_lexer_peek_token (parser->lexer)->location,
+		    "induction variable cannot be static");
+	  cp_lexer_consume_token (parser->lexer);
+	}
       add_private_clause
 	|= cp_parser_omp_for_loop_init (parser,
-					/*parsing_openmp=*/code != CILK_SIMD,
+					/*parsing_openmp=*/
+					(code != CILK_SIMD && code != CILK_FOR),
 					this_pre_body, for_block,
 					init, decl, real_decl);
 
-      cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+      if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)
+	  && code == CILK_FOR)
+	{
+	  cp_parser_skip_to_end_of_statement (parser);
+	  cp_parser_consume_semicolon_at_end_of_statement (parser);
+	}
       if (this_pre_body)
 	{
 	  this_pre_body = pop_stmt_list (this_pre_body);
@@ -29337,7 +29369,7 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
 
   /* Note that we saved the original contents of this flag when we entered
      the structured block, and so we don't need to re-save it here.  */
-  if (code == CILK_SIMD)
+  if (code == CILK_SIMD || code == CILK_FOR)
     parser->in_statement = IN_CILK_SIMD_FOR;
   else
     parser->in_statement = IN_OMP_FOR;
@@ -29378,7 +29410,17 @@  cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
     }
 
   while (!for_block->is_empty ())
-    add_stmt (pop_stmt_list (for_block->pop ()));
+    {
+      tree t = pop_stmt_list (for_block->pop ());
+
+      /* Remove all the statements between the head of statement list and
+	 _Cilk_for statement and store them in *cfor_block.  These statements
+	 are hoisted above the #pragma parallel.  */
+      if (code == CILK_FOR && cfor_block != NULL)
+	*cfor_block = copy_tree_till_cilk_for (&t);
+      add_stmt (t);
+
+    }
   release_tree_vector (for_block);
 
   return ret;
@@ -29434,7 +29476,7 @@  cp_parser_omp_simd (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_SIMD, clauses, cclauses);
+  ret = cp_parser_omp_for_loop (parser, OMP_SIMD, clauses, cclauses, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -29522,7 +29564,7 @@  cp_parser_omp_for (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_FOR, clauses, cclauses);
+  ret = cp_parser_omp_for_loop (parser, OMP_FOR, clauses, cclauses, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -29994,7 +30036,7 @@  cp_parser_omp_distribute (cp_parser *parser, cp_token *pragma_tok,
   sb = begin_omp_structured_block ();
   save = cp_parser_begin_omp_structured_block (parser);
 
-  ret = cp_parser_omp_for_loop (parser, OMP_DISTRIBUTE, clauses, NULL);
+  ret = cp_parser_omp_for_loop (parser, OMP_DISTRIBUTE, clauses, NULL, NULL);
 
   cp_parser_end_omp_structured_block (parser, save);
   add_stmt (finish_omp_structured_block (sb));
@@ -31290,6 +31332,38 @@  cp_parser_initial_pragma (cp_token *first_token)
   cp_lexer_get_preprocessor_token (NULL, first_token);
 }
 
+/* Parses the grainsize pragma for the _Cilk_for statement.
+   Syntax:
+   #pragma cilk grainsize = <VALUE>.  */
+
+static void
+cp_parser_cilk_grainsize (cp_parser *parser, cp_token *pragma_tok)
+{
+  if (cp_parser_require (parser, CPP_EQ, RT_EQ))
+    {
+      tree exp = cp_parser_binary_expression (parser, false, false,
+                                              PREC_NOT_OPERATOR, NULL);
+      cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+      if (!exp || exp == error_mark_node)
+        {
+          error_at (pragma_tok->location, "invalid grainsize for _Cilk_for");
+          return;
+        }
+      cp_token *n_tok = cp_lexer_peek_token (parser->lexer);
+
+      /* Make sure the next token is _Cilk_for, it is invalid otherwise.  */
+      if (n_tok && n_tok->type == CPP_KEYWORD
+	  && n_tok->keyword == RID_CILK_FOR) 
+	cp_parser_cilk_simd (parser, NULL, exp);
+      else
+	warning_at (cp_lexer_peek_token (parser->lexer)->location, 0,
+		    "%<#pragma cilk grainsize%> is not followed by "
+		    "%<_Cilk_for%>");
+      return;
+    }
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -31469,9 +31543,30 @@  cp_parser_pragma (cp_parser *parser, enum pragma_context context)
 		    "%<#pragma simd%> must be inside a function");
 	  break;
 	}
-      cp_parser_cilk_simd (parser, pragma_tok);
+      cp_parser_cilk_simd (parser, pragma_tok, NULL_TREE);
       return true;
 
+    case PRAGMA_CILK_GRAINSIZE:
+      if (context == pragma_external)
+        {
+          error_at (pragma_tok->location,
+                    "%<#pragma cilk grainsize%> must be inside a function");
+          break;
+        }
+
+      /* Ignore the pragma if Cilk Plus is not enabled.  */
+      if (flag_cilkplus)
+        {
+          cp_parser_cilk_grainsize (parser, pragma_tok);
+          return true;
+        }
+      else
+        {
+          error_at (pragma_tok->location, "-fcilkplus must be enabled to use "
+                    "%<#pragma cilk grainsize%>");
+          break;
+	}
+
     default:
       gcc_assert (id >= PRAGMA_FIRST_EXTERNAL);
       c_invoke_pragma_handler (id);
@@ -31789,31 +31884,102 @@  cp_parser_cilk_simd_all_clauses (cp_parser *parser, cp_token *pragma_token)
     return c_finish_cilk_clauses (clauses);
 }
 
-/* Main entry-point for parsing Cilk Plus <#pragma simd> for loops.  */
+/* Main entry-point for parsing Cilk Plus <#pragma simd> for and _Cilk_for
+   loops.  This function returns NULL_TREE whenever it is parsing the
+   <#pragma simd> for because the caller does not check the return value.
+   _Cilk_for's caller checks this value and thus return error_mark_node
+   when errors happen and a valid value when things go well.  */
 
-static void
-cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token)
+static tree
+cp_parser_cilk_simd (cp_parser *parser, cp_token *pragma_token, tree grain)
 {
-  tree clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
-
+  bool is_cilk_for = !pragma_token ? true : false;
+  
+  tree clauses = NULL_TREE;
+  if (!is_cilk_for)
+    clauses = cp_parser_cilk_simd_all_clauses (parser, pragma_token);
+  
   if (clauses == error_mark_node)
-    return;
+    return NULL_TREE;
   
-  if (cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
+  if (!is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_FOR))
     {
       error_at (cp_lexer_peek_token (parser->lexer)->location,
 		"for statement expected");
-      return;
+      return NULL_TREE;
+    }
+  if (is_cilk_for
+      && cp_lexer_next_token_is_not_keyword (parser->lexer, RID_CILK_FOR))
+    {
+      error_at (cp_lexer_peek_token (parser->lexer)->location,
+		"_Cilk_for statement expected");
+      return error_mark_node;
     }
 
+  tree top_block = NULL_TREE, topmost_blk = NULL_TREE;
+  if (is_cilk_for)
+    {
+      topmost_blk = push_stmt_list ();
+      top_block = begin_omp_parallel ();
+    }
+  
   tree sb = begin_omp_structured_block ();
   int save = cp_parser_begin_omp_structured_block (parser);
-  tree ret = cp_parser_omp_for_loop (parser, CILK_SIMD, clauses, NULL);
+   
+  enum tree_code code = is_cilk_for ? CILK_FOR : CILK_SIMD;
+  tree cfor_blk = NULL_TREE;
+  tree ret = cp_parser_omp_for_loop (parser, code, clauses, NULL, &cfor_blk);
   if (ret)
     cpp_validate_cilk_plus_loop (OMP_FOR_BODY (ret));
+  
+  /* For _Cilk_for statements, the grain value is stored in a SCHEDULE
+     clause.  */
+  if (is_cilk_for && ret)
+    {
+      tree l = build_omp_clause (EXPR_LOCATION (grain), OMP_CLAUSE_SCHEDULE);
+      OMP_CLAUSE_SCHEDULE_KIND (l) = OMP_CLAUSE_SCHEDULE_CILKFOR;
+      OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (l) = grain;
+      OMP_CLAUSE_CHAIN (l) = OMP_FOR_CLAUSES (ret);
+      OMP_FOR_CLAUSES (ret) = l;
+    }
   cp_parser_end_omp_structured_block (parser, save);
-  add_stmt (finish_omp_structured_block (sb));
-  return;
+
+  if (!is_cilk_for)
+    {
+      add_stmt (finish_omp_structured_block (sb));
+      return NULL_TREE;
+    }
+
+  tree sb_block = finish_omp_structured_block (sb);
+  tree vars = NULL_TREE, sb_blk_body = sb_block;
+
+  /* For iterators, cfor_blk holds the mapping from orginal vector 
+     iterators to the integer ones that the c_finish_omp_for remaps.
+     This info. must be pushed above the #pragma omp parallel so that
+     the IF_CLAUSE (that holds the loop-count) can use them to compute the
+     loop-count.  */
+  if (TREE_CODE (sb_block) == BIND_EXPR && cfor_blk != NULL_TREE)
+    {
+      vars = BIND_EXPR_VARS (sb_block);
+      sb_blk_body = BIND_EXPR_BODY (sb_block);
+    }
+
+  add_stmt (sb_blk_body);
+  tree parallel_clauses = NULL_TREE;
+  cilk_for_move_clauses_upward (&parallel_clauses, ret);
+  tree stmt = finish_omp_parallel (parallel_clauses, top_block);
+  OMP_PARALLEL_COMBINED (stmt) = 1;
+  topmost_blk = pop_stmt_list (topmost_blk);
+
+  if (cfor_blk != NULL_TREE)
+    {
+      tree bind_expr = cilk_for_create_bind_expr (vars, cfor_blk, topmost_blk);
+      add_stmt (bind_expr);
+      return bind_expr;
+    }
+  add_stmt (topmost_blk);
+  return topmost_blk;
 }
 
 /* Create an identifier for a generic parameter type (a synthesized
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 7967db8..7b60b6e 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -13584,6 +13584,9 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
 				args, complain, in_decl);
       stmt = begin_omp_parallel ();
       RECUR (OMP_PARALLEL_BODY (t));
+      if (flag_cilkplus
+	  && TREE_CODE (OMP_PARALLEL_BODY (t)) == CILK_FOR)
+	cilk_for_move_clauses_upward (&tmp, stmt);
       OMP_PARALLEL_COMBINED (finish_omp_parallel (tmp, stmt))
 	= OMP_PARALLEL_COMBINED (t);
       break;
@@ -13599,6 +13602,7 @@  tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl,
     case OMP_FOR:
     case OMP_SIMD:
     case CILK_SIMD:
+    case CILK_FOR:
     case OMP_DISTRIBUTE:
       {
 	tree clauses, body, pre_body;
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 9fb4fc0..8388a6b 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6058,6 +6058,7 @@  handle_omp_for_class_iterator (int i, location_t locus, tree declv, tree initv,
     case GE_EXPR:
     case LT_EXPR:
     case LE_EXPR:
+    case NE_EXPR:
       if (TREE_OPERAND (cond, 1) == iter)
 	cond = build2 (swap_tree_comparison (TREE_CODE (cond)),
 		       TREE_TYPE (cond), iter, TREE_OPERAND (cond, 0));
@@ -6470,12 +6471,20 @@  finish_omp_for (location_t locus, enum tree_code code, tree declv, tree initv,
   if (IS_EMPTY_STMT (pre_body))
     pre_body = NULL;
 
+  tree count = NULL_TREE;
   omp_for = c_finish_omp_for (locus, code, declv, initv, condv, incrv,
-			      body, pre_body);
+			      body, pre_body, &count);
 
   if (omp_for == NULL)
     return NULL;
 
+  if (code == CILK_FOR)
+    {
+      tree c = build_omp_clause (EXPR_LOCATION (count), OMP_CLAUSE_IF);
+      OMP_CLAUSE_IF_EXPR (c) = count;
+      clauses = chainon (clauses, c);
+    }
+
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INCR (omp_for)); i++)
     {
       decl = TREE_OPERAND (TREE_VEC_ELT (OMP_FOR_INIT (omp_for), i), 0);
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 2d1e1c7..f87c0cf 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -45,6 +45,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "value-prof.h"
 #include "trans-mem.h"
 
+static void dump_gimple_omp_parallel (pretty_printer *, gimple, int, int,
+				      bool);
 #define INDENT(SPACE)							\
   do { int i; for (i = 0; i < SPACE; i++) pp_space (buffer); } while (0)
 
@@ -1124,6 +1126,10 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	case GF_OMP_FOR_KIND_DISTRIBUTE:
 	  kind = " distribute";
 	  break;
+	case GF_OMP_FOR_KIND_CILKFOR:
+	  gcc_assert (flag_cilkplus);
+	  kind = "";
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -1158,16 +1164,25 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	case GF_OMP_FOR_KIND_DISTRIBUTE:
 	  pp_string (buffer, "#pragma omp distribute");
 	  break;
+	case GF_OMP_FOR_KIND_CILKFOR:
+	  gcc_assert (flag_cilkplus);
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
-      dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags);
+      if (!flag_cilkplus
+	  || gimple_omp_for_kind (gs) != GF_OMP_FOR_KIND_CILKFOR) 
+	dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags);
       for (i = 0; i < gimple_omp_for_collapse (gs); i++)
 	{
 	  if (i)
 	    spc += 2;
 	  newline_and_indent (buffer, spc);
-	  pp_string (buffer, "for (");
+	  if (flag_cilkplus 
+	      && gimple_omp_for_kind (gs) == GF_OMP_FOR_KIND_CILKFOR)
+	    pp_string (buffer, "_Cilk_for (");
+	  else
+	    pp_string (buffer, "for (");
 	  dump_generic_node (buffer, gimple_omp_for_index (gs, i), spc,
 			     flags, false);
 	  pp_string (buffer, " = ");
@@ -1192,6 +1207,9 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 	    case GE_EXPR:
 	      pp_greater_equal (buffer);
 	      break;
+	    case NE_EXPR:
+	      pp_string (buffer, "!=");
+	      break;
 	    default:
 	      gcc_unreachable ();
 	    }
@@ -1210,6 +1228,9 @@  dump_gimple_omp_for (pretty_printer *buffer, gimple gs, int spc, int flags)
 
       if (!gimple_seq_empty_p (gimple_omp_body (gs)))
 	{
+	  if (flag_cilkplus
+	      && gimple_omp_for_kind (gs) == GF_OMP_FOR_KIND_CILKFOR) 
+	    dump_omp_clauses (buffer, gimple_omp_for_clauses (gs), spc, flags); 
 	  newline_and_indent (buffer, spc + 2);
 	  pp_left_brace (buffer);
 	  pp_newline (buffer);
@@ -1846,7 +1867,7 @@  dump_gimple_phi (pretty_printer *buffer, gimple phi, int spc, bool comment,
 
 static void
 dump_gimple_omp_parallel (pretty_printer *buffer, gimple gs, int spc,
-                          int flags)
+                          int flags, bool is_cilk_for)
 {
   if (flags & TDF_RAW)
     {
@@ -1860,7 +1881,10 @@  dump_gimple_omp_parallel (pretty_printer *buffer, gimple gs, int spc,
   else
     {
       gimple_seq body;
-      pp_string (buffer, "#pragma omp parallel");
+      if (is_cilk_for) 
+	pp_string (buffer, "compiler-inserted clauses for cilk-for body: ");
+      else
+	pp_string (buffer, "#pragma omp parallel");
       dump_omp_clauses (buffer, gimple_omp_parallel_clauses (gs), spc, flags);
       if (gimple_omp_parallel_child_fn (gs))
 	{
@@ -2137,7 +2161,7 @@  pp_gimple_stmt_1 (pretty_printer *buffer, gimple gs, int spc, int flags)
       break;
 
     case GIMPLE_OMP_PARALLEL:
-      dump_gimple_omp_parallel (buffer, gs, spc, flags);
+      dump_gimple_omp_parallel (buffer, gs, spc, flags, false);
       break;
 
     case GIMPLE_OMP_TASK:
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 0e80d2e..194045c 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -91,13 +91,14 @@  enum gf_mask {
     GF_CALL_ALLOCA_FOR_VAR	= 1 << 5,
     GF_CALL_INTERNAL		= 1 << 6,
     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
-    GF_OMP_FOR_KIND_MASK	= 3 << 0,
+    GF_OMP_FOR_KIND_MASK	= 7 << 0,
     GF_OMP_FOR_KIND_FOR		= 0 << 0,
     GF_OMP_FOR_KIND_DISTRIBUTE	= 1 << 0,
     GF_OMP_FOR_KIND_SIMD	= 2 << 0,
     GF_OMP_FOR_KIND_CILKSIMD	= 3 << 0,
-    GF_OMP_FOR_COMBINED		= 1 << 2,
-    GF_OMP_FOR_COMBINED_INTO	= 1 << 3,
+    GF_OMP_FOR_KIND_CILKFOR     = 4 << 0,
+    GF_OMP_FOR_COMBINED		= 1 << 3,
+    GF_OMP_FOR_COMBINED_INTO	= 1 << 4,
     GF_OMP_TARGET_KIND_MASK	= 3 << 0,
     GF_OMP_TARGET_KIND_REGION	= 0 << 0,
     GF_OMP_TARGET_KIND_DATA	= 1 << 0,
@@ -4563,6 +4564,16 @@  gimple_omp_for_set_pre_body (gimple gs, gimple_seq pre_body)
   omp_for_stmt->pre_body = pre_body;
 }
 
+/* Returns the induction variable of type TREE from GS that is of type 
+   GIMPLE_STATEMENT_OMP_FOR.  */
+
+static inline tree
+gimple_cilk_for_induction_var (const_gimple gs)
+{
+  const gimple_statement_omp_for *cilk_for_stmt =
+    as_a <const gimple_statement_omp_for> (gs);
+  return cilk_for_stmt->iter->index;
+}
 
 /* Return the clauses associated with OMP_PARALLEL GS.  */
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9c9998d..d223b7a 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -5849,7 +5849,8 @@  omp_check_private (struct gimplify_omp_ctx *ctx, tree decl, bool copyprivate)
 
 static void
 gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
-			   enum omp_region_type region_type)
+			   enum omp_region_type region_type,
+			   bool is_cilk_for)
 {
   struct gimplify_omp_ctx *ctx, *outer_ctx;
   tree c;
@@ -6079,8 +6080,12 @@  gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 
 	case OMP_CLAUSE_FINAL:
 	case OMP_CLAUSE_IF:
-	  OMP_CLAUSE_OPERAND (c, 0)
-	    = gimple_boolify (OMP_CLAUSE_OPERAND (c, 0));
+	  /* In _Cilk_for we insert an IF clause as a mechanism to
+	     pass in the count information.  So, there is no reason to
+	     boolify them.  */
+	  if (!is_cilk_for) 
+	    OMP_CLAUSE_OPERAND (c, 0) 
+	      = gimple_boolify (OMP_CLAUSE_OPERAND (c, 0));
 	  /* Fall through.  */
 
 	case OMP_CLAUSE_SCHEDULE:
@@ -6447,6 +6452,20 @@  gimplify_adjust_omp_clauses (tree *list_p)
   delete_omp_context (ctx);
 }
 
+static void
+omp_remove_clause (tree c, tree *list_p)
+{
+  tree ii = NULL_TREE;
+  while ((ii = *list_p) != NULL)
+    {
+      if (simple_cst_equal (ii, c) == 1)
+	*list_p = OMP_CLAUSE_CHAIN (ii);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (ii);
+    }
+}
+	
+
 /* Gimplify the contents of an OMP_PARALLEL statement.  This involves
    gimplification of the body, as well as scanning the body for used
    variables.  We need to do this scan now, because variable-sized
@@ -6458,11 +6477,29 @@  gimplify_omp_parallel (tree *expr_p, gimple_seq *pre_p)
   tree expr = *expr_p;
   gimple g;
   gimple_seq body = NULL;
-
+  bool is_cilk_for = false;
+  tree c = NULL_TREE;
+  for (c = OMP_PARALLEL_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+    if (flag_cilkplus && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SCHEDULE
+	&& OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_CILKFOR)
+      {
+	/* The schedule clause is kept upto this point so that it can 
+	   indicate whether this #pragma omp parallel is something a 
+	   _Cilk_for statement inserted.  If so, then indicate
+	   is_cilk_for is true so that the gimplify_scan_omp_clauses does 
+	   not boolify the IF CLAUSE, which stores the count value.  */
+	gcc_assert (flag_cilkplus);
+	is_cilk_for = true;
+	break;
+      } 
+  
+  /* The SCHEDULE clause is not necessary anymore.  */
+  if (is_cilk_for) 
+    omp_remove_clause (c, &OMP_PARALLEL_CLAUSES (expr));
   gimplify_scan_omp_clauses (&OMP_PARALLEL_CLAUSES (expr), pre_p,
 			     OMP_PARALLEL_COMBINED (expr)
 			     ? ORT_COMBINED_PARALLEL
-			     : ORT_PARALLEL);
+			     : ORT_PARALLEL, is_cilk_for);
 
   push_gimplify_context ();
 
@@ -6498,7 +6535,7 @@  gimplify_omp_task (tree *expr_p, gimple_seq *pre_p)
   gimplify_scan_omp_clauses (&OMP_TASK_CLAUSES (expr), pre_p,
 			     find_omp_clause (OMP_TASK_CLAUSES (expr),
 					      OMP_CLAUSE_UNTIED)
-			     ? ORT_UNTIED_TASK : ORT_TASK);
+			     ? ORT_UNTIED_TASK : ORT_TASK, false);
 
   push_gimplify_context ();
 
@@ -6563,8 +6600,9 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 
   simd = TREE_CODE (for_stmt) == OMP_SIMD
     || TREE_CODE (for_stmt) == CILK_SIMD;
-  gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p,
-			     simd ? ORT_SIMD : ORT_WORKSHARE);
+    gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p,
+			       simd ? ORT_SIMD : ORT_WORKSHARE,
+			       TREE_CODE (for_stmt) == CILK_FOR);
 
   /* Handle OMP_FOR_INIT.  */
   for_pre_body = NULL;
@@ -6825,6 +6863,7 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
     case OMP_FOR: kind = GF_OMP_FOR_KIND_FOR; break;
     case OMP_SIMD: kind = GF_OMP_FOR_KIND_SIMD; break;
     case CILK_SIMD: kind = GF_OMP_FOR_KIND_CILKSIMD; break;
+    case CILK_FOR: kind = GF_OMP_FOR_KIND_CILKFOR; break;
     case OMP_DISTRIBUTE: kind = GF_OMP_FOR_KIND_DISTRIBUTE; break;
     default:
       gcc_unreachable ();
@@ -6895,7 +6934,7 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
     default:
       gcc_unreachable ();
     }
-  gimplify_scan_omp_clauses (&OMP_CLAUSES (expr), pre_p, ort);
+  gimplify_scan_omp_clauses (&OMP_CLAUSES (expr), pre_p, ort, false);
   if (ort == ORT_TARGET || ort == ORT_TARGET_DATA)
     {
       push_gimplify_context ();
@@ -6955,7 +6994,7 @@  gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p)
   gimple stmt;
 
   gimplify_scan_omp_clauses (&OMP_TARGET_UPDATE_CLAUSES (expr), pre_p,
-			     ORT_WORKSHARE);
+			     ORT_WORKSHARE, false);
   gimplify_adjust_omp_clauses (&OMP_TARGET_UPDATE_CLAUSES (expr));
   stmt = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_UPDATE,
 				  OMP_TARGET_UPDATE_CLAUSES (expr));
@@ -7897,6 +7936,7 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	case OMP_FOR:
 	case OMP_SIMD:
 	case CILK_SIMD:
+	case CILK_FOR:
 	case OMP_DISTRIBUTE:
 	  ret = gimplify_omp_for (expr_p, pre_p);
 	  break;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 2c35751..cbc8549
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -71,6 +71,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "tree-nested.h"
 #include "tree-eh.h"
+#include "cilk.h"
 
 
 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -198,6 +199,13 @@  struct omp_for_data
   struct omp_for_data_loop *loops;
 };
 
+/* A structure with necessary elements from _Cilk_for statement.  This
+   struct. node is passed in to WALK_STMT_INFO->INFO.  */
+struct cilk_for_info 
+{
+  bool found;
+  tree induction_var;
+};
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -314,6 +322,8 @@  extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
   fd->have_ordered = false;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
+  if (gimple_omp_for_kind (fd->for_stmt) ==  GF_OMP_FOR_KIND_CILKFOR)
+    fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
   collapse_iter = NULL;
   collapse_count = NULL;
 
@@ -392,7 +402,9 @@  extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
 	  break;
 	case NE_EXPR:
 	  gcc_assert (gimple_omp_for_kind (for_stmt)
-		      == GF_OMP_FOR_KIND_CILKSIMD);
+		      == GF_OMP_FOR_KIND_CILKSIMD
+		      || gimple_omp_for_kind (for_stmt)
+		      == GF_OMP_FOR_KIND_CILKFOR);
 	  break;
 	case LE_EXPR:
 	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
@@ -1818,27 +1830,120 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
 }
 
-/* Create a new name for omp child function.  Returns an identifier.  */
+/* Create a new name for omp child function.  Returns an identifier.  If 
+   IS_CILK_FOR is true then the suffix for the child function is 
+   "_cilk_for_fn."  */
 
 static tree
-create_omp_child_function_name (bool task_copy)
+create_omp_child_function_name (bool task_copy, bool is_cilk_for)
 {
+  if (is_cilk_for)
+    return clone_function_name (current_function_decl, "_cilk_for_fn");
   return (clone_function_name (current_function_decl,
 			       task_copy ? "_omp_cpyfn" : "_omp_fn"));
 }
 
+/* Helper function for walk_gimple_seq function.  *GSI_P is the gimple stmt.
+   iterator passed by walk_gimple_seq and *WI->INFO holds the CILK_FOR_INFO
+   structure.  This function sets the values inside this structure if it
+   finds a _Cilk_for statement in *GSI_P.  HANDLED_OPS_P is unused.  */
+
+static tree
+find_cilk_for_stmt (gimple_stmt_iterator *gsi_p,
+		    bool *handled_ops_p ATTRIBUTE_UNUSED,
+		    struct walk_stmt_info *wi)
+{
+  struct cilk_for_info *cf_info = (struct cilk_for_info *) wi->info;
+  gimple stmt = gsi_stmt (*gsi_p);
+
+  if (gimple_code (stmt) == GIMPLE_OMP_FOR
+      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_CILKFOR)
+      /* For nested _Cilk_for statments, just look into the
+	 outer-most one.  */
+      && cf_info->found == false)
+    {
+      cf_info->found = true;
+      cf_info->induction_var = gimple_cilk_for_induction_var (stmt);
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if STMT contains a CILK_FOR statement.  If found then
+   populate *IND_VAR and *LOOP_COUNT with induction variable
+   and loop-count value.  Otherwise these values remain untouched.  
+   IND_VAR and LOOP_COUNT can be NULL and if so then they are also 
+   left untouched.  */
+
+static bool
+is_cilk_for_stmt (gimple stmt, tree *ind_var)
+{
+  if (!flag_cilkplus)
+    return false;
+  if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
+    stmt = gimple_omp_body (stmt);
+  if (gimple_code (stmt) == GIMPLE_BIND)
+    {
+      gimple_seq body = gimple_bind_body (stmt);
+      struct walk_stmt_info wi;
+      struct cilk_for_info cf_info;
+      memset (&cf_info, 0, sizeof (struct cilk_for_info));
+      memset (&wi, 0, sizeof (wi));
+      wi.info = &cf_info;
+      walk_gimple_seq (body, find_cilk_for_stmt, NULL, &wi);
+      if (cf_info.found)
+	{
+	  if (ind_var)
+	    *ind_var = cf_info.induction_var;
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Returns the type of the induction variable for the child function for
+   _Cilk_for and the types for _high and _low variables based on TYPE.  */
+
+static tree
+cilk_for_check_loop_diff_type (tree type)
+{
+  if (type == integer_type_node)
+    return type;
+  else if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
+    { 
+      if (TYPE_UNSIGNED (type)) 
+	return uint32_type_node;
+      else
+	return integer_type_node;
+    }
+  else
+    {
+      if (TYPE_UNSIGNED (type)) 
+	return uint64_type_node;
+      else
+	return long_long_integer_type_node;
+    }
+  gcc_unreachable ();
+}
+
 /* Build a decl for the omp child function.  It'll not contain a body
    yet, just the bare decl.  */
 
 static void
 create_omp_child_function (omp_context *ctx, bool task_copy)
 {
-  tree decl, type, name, t;
+  tree decl, type, name, t, ind_var = NULL_TREE;
 
-  name = create_omp_child_function_name (task_copy);
+  bool is_cilk_for = is_cilk_for_stmt (ctx->stmt, &ind_var);
+  tree cilk_var_type = (is_cilk_for ?
+    cilk_for_check_loop_diff_type (TREE_TYPE (ind_var)) : NULL_TREE);
+  
+  name = create_omp_child_function_name (task_copy, is_cilk_for);
   if (task_copy)
     type = build_function_type_list (void_type_node, ptr_type_node,
 				     ptr_type_node, NULL_TREE);
+  else if (is_cilk_for)
+    type = build_function_type_list (void_type_node, ptr_type_node,
+				     cilk_var_type, cilk_var_type, NULL_TREE);
   else
     type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
 
@@ -1888,13 +1993,44 @@  create_omp_child_function (omp_context *ctx, bool task_copy)
   DECL_CONTEXT (t) = decl;
   DECL_RESULT (decl) = t;
 
-  t = build_decl (DECL_SOURCE_LOCATION (decl),
-		  PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
+  /* _Cilk_for's child function requires two extra parameters called 
+     __low and __high that are set the by Cilk runtime when it calls this 
+     function.  */
+  if (is_cilk_for)
+    {
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__high"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+
+      t = build_decl (DECL_SOURCE_LOCATION (decl),
+		      PARM_DECL, get_identifier ("__low"), cilk_var_type);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_NAMELESS (t) = 1;
+      DECL_ARG_TYPE (t) = ptr_type_node;
+      DECL_CONTEXT (t) = current_function_decl;
+      TREE_USED (t) = 1;
+      TREE_ADDRESSABLE (t) = 1;
+      DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
+      DECL_ARGUMENTS (decl) = t;
+    }
+
+  tree data_name = get_identifier (".omp_data_i");
+  t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
+		  ptr_type_node);
   DECL_ARTIFICIAL (t) = 1;
   DECL_NAMELESS (t) = 1;
   DECL_ARG_TYPE (t) = ptr_type_node;
   DECL_CONTEXT (t) = current_function_decl;
   TREE_USED (t) = 1;
+  if (is_cilk_for)
+    DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
   DECL_ARGUMENTS (decl) = t;
   if (!task_copy)
     ctx->receiver_decl = t;
@@ -4313,6 +4449,44 @@  expand_parallel_call (struct omp_region *region, basic_block bb,
 			    false, GSI_CONTINUE_LINKING);
 }
 
+/* Insert a function call whose name is FUNC_NAME with the information from
+   ENTRY_STMT into the basic_block BB.  */
+
+static void
+expand_cilk_for_call (basic_block bb, gimple entry_stmt,
+		      vec <tree, va_gc> *ws_args)
+{
+  tree t, t1, t2;
+  gimple_stmt_iterator gsi;
+  vec <tree, va_gc> *args;
+
+  gcc_assert (vec_safe_length (ws_args) == 2);
+  tree func_name = (*ws_args)[0];
+  tree grain = (*ws_args)[1];
+
+  tree clauses = gimple_omp_parallel_clauses (entry_stmt); 
+  tree count = find_omp_clause (clauses, OMP_CLAUSE_IF);
+  gcc_assert (count != NULL_TREE);
+  count = OMP_CLAUSE_IF_EXPR (count);
+  
+  gsi = gsi_last_bb (bb);
+  t = gimple_omp_parallel_data_arg (entry_stmt);
+  if (t == NULL)
+    t1 = null_pointer_node;
+  else
+    t1 = build_fold_addr_expr (t);
+  t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
+  
+  vec_alloc (args, 4);
+  args->quick_push (t2);
+  args->quick_push (t1);
+  args->quick_push (count);
+  args->quick_push (grain);
+  t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
+
+  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, 
+			    GSI_CONTINUE_LINKING);
+}
 
 /* Build the function call to GOMP_task to actually
    generate the task operation.  BB is the block where to insert the code.  */
@@ -4648,7 +4822,38 @@  expand_omp_taskreg (struct omp_region *region)
   entry_bb = region->entry;
   exit_bb = region->exit;
 
-  if (is_combined_parallel (region))
+  /* The way _Cilk_for is constructed in this compiler can be thought of
+     as a parallel omp_for.  But the inner workings between them are very
+     different so we need a way to differenciate between them.  Thus, we
+     added a new schedule type called OMP_CLAUSE_SCHEDULE_CILKFOR, which 
+     pretty much says that this is not a parallel omp for but a _Cilk_for
+     statement.  */
+  bool is_cilk_for =
+    (flag_cilkplus && region->inner &&
+     (region->inner->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR));
+
+  /* Extract the __high and __low parameter from the function.  */
+  tree high_arg = NULL_TREE, low_arg = NULL_TREE;
+  if (is_cilk_for)
+    {
+      for (tree ii_arg = DECL_ARGUMENTS (child_fn); ii_arg != NULL_TREE;
+	   ii_arg = TREE_CHAIN (ii_arg))
+	{
+	  if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)), "__high"))
+	    high_arg = ii_arg;
+	  if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_arg)), "__low"))
+	    low_arg = ii_arg;
+	}
+      gcc_assert (high_arg);
+      gcc_assert (low_arg);
+    }
+  
+  if (is_cilk_for) 
+    /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
+       and the inner statement contains the name of the built-in function
+       and grain.  */
+    ws_args = region->inner->ws_args;
+  else if (is_combined_parallel (region))
     ws_args = region->ws_args;
   else
     ws_args = NULL;
@@ -4755,6 +4960,49 @@  expand_omp_taskreg (struct omp_region *region)
 	    }
 	}
 
+      /* In here the calls to the GET_NUM_THREADS and GET_THREAD_NUM are
+	 removed.  Further, they will be replaced by __low and __high
+	 parameter values.  */
+      gimple high_assign = NULL, low_assign = NULL;
+      if (is_cilk_for)
+	{
+	  gimple_stmt_iterator gsi2 = gsi_start_bb (single_succ (entry_bb));
+	  while (!gsi_end_p (gsi2))
+	    {
+	      gimple stmt = gsi_stmt (gsi2);
+	
+	      if (gimple_call_builtin_p (stmt, BUILT_IN_OMP_GET_NUM_THREADS))
+		{
+		  /* There can only be one one call to these two functions
+		     If there are multiple, then something went wrong
+		     somewhere.  */
+		  gcc_assert (low_assign == NULL);
+		  tree ltype = TREE_TYPE (gimple_get_lhs (stmt));
+		  tree tmp2 = create_tmp_reg (TREE_TYPE (low_arg), NULL);
+		  low_assign = gimple_build_assign 
+		    (gimple_get_lhs (stmt), fold_convert (ltype, tmp2));
+		  gsi_remove (&gsi2, true);
+		  gimple tmp_stmt = gimple_build_assign (tmp2, low_arg);
+		  gsi_insert_before (&gsi2, low_assign, GSI_NEW_STMT);
+		  gsi_insert_before (&gsi2, tmp_stmt, GSI_NEW_STMT);
+		}
+	      else if (gimple_call_builtin_p (stmt,
+					      BUILT_IN_OMP_GET_THREAD_NUM))
+		{
+		  gcc_assert (high_assign == NULL);
+		  tree htype = TREE_TYPE (gimple_get_lhs (stmt));
+		  tree tmp2 = create_tmp_reg (TREE_TYPE (high_arg), NULL);
+		  
+		  high_assign = gimple_build_assign 
+		    (gimple_get_lhs (stmt), fold_convert (htype, tmp2));
+		  gsi_remove (&gsi2, true);
+		  gimple tmp_stmt = gimple_build_assign (tmp2, high_arg);
+		  gsi_insert_before (&gsi2, high_assign, GSI_NEW_STMT);
+		  gsi_insert_before (&gsi2, tmp_stmt, GSI_NEW_STMT);
+		}
+	      gsi_next (&gsi2);
+	    }
+	}      
       /* Declare local variables needed in CHILD_CFUN.  */
       block = DECL_INITIAL (child_fn);
       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
@@ -4862,7 +5110,9 @@  expand_omp_taskreg (struct omp_region *region)
     }
 
   /* Emit a library call to launch the children threads.  */
-  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
+  if (is_cilk_for)
+    expand_cilk_for_call (new_bb, entry_stmt, ws_args);
+  else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
     expand_parallel_call (region, new_bb, entry_stmt, ws_args);
   else
     expand_task_call (new_bb, entry_stmt);
@@ -6540,6 +6790,223 @@  expand_omp_for_static_chunk (struct omp_region *region,
     }
 }
 
+/* A subroutine of expand_omp_for.  Generate code for _Cilk_for loop.  
+   Given parameters: 
+   for (V = N1; V cond N2; V += STEP) BODY; 
+   
+   where COND is "<" or ">", we generate pseudocode
+    
+   for (ind_var = low; ind_var < high; ind_var++)
+   {  
+      if (n1 < n2)
+	V = n1 + (ind_var * STEP)
+      else
+        V = n2 - (ind_var * STEP);
+
+      <BODY>
+    }  
+  
+    In the above pseudocode, low and high are function parameters of the
+    child function.  In the function below, we are inserting a temp.
+    variable that will be making a call to two OMP functions that will not be
+    found in the body of _Cilk_for (since OMP_FOR cannot be mixed 
+    with _Cilk_for).  These functions are replaced with low and high 
+    by the function that handleds taskreg.  */
+
+
+static void
+expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
+{
+  bool broken_loop = region->cont == NULL;
+  tree type = cilk_for_check_loop_diff_type (TREE_TYPE (fd->loop.v));
+  basic_block entry_bb = region->entry;
+  basic_block cont_bb = region->cont;
+  
+  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
+  gcc_assert (broken_loop
+	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
+  basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
+  basic_block l1_bb, l2_bb;
+
+  if (!broken_loop)
+    {
+      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
+      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
+      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
+      l2_bb = BRANCH_EDGE (entry_bb)->dest;
+    }
+  else
+    {
+      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
+      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
+      l2_bb = single_succ (l1_bb);
+    }
+  basic_block exit_bb = region->exit;
+  basic_block l2_dom_bb = NULL;
+
+  gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
+
+  /* Below statements until the "tree high_val = ..." are pseudo statements 
+     used to pass information to be used by expand_omp_taskreg.
+     low_val and high_val will be replaced by the __low and __high
+     parameter from the child function.
+
+     The call_exprs part is a place-holder, it is mainly used 
+     to distinctly identify to the top-level part that this is
+     where we should put low and high (reasoning given in header 
+     comment).  */
+
+  tree t = build_call_expr
+    (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
+  t = fold_convert (type, t);
+  tree low_val = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+					   GSI_SAME_STMT);
+  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM),
+		       0);
+  t = fold_convert (TREE_TYPE (fd->loop.v), t);
+  tree high_val = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
+					   GSI_SAME_STMT);
+
+  tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
+  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+  
+  /* Not needed in SSA form right now.  */
+  gcc_assert (!gimple_in_ssa_p (cfun));
+  if (l2_dom_bb == NULL)
+    l2_dom_bb = l1_bb;
+
+  tree n1 = low_val;
+  tree n2 = high_val;
+  
+  expand_omp_build_assign (&gsi, ind_var, n1);
+
+  /* Remove the GIMPLE_OMP_FOR statement.  */
+  gsi_remove (&gsi, true);
+
+  gimple stmt;
+  if (!broken_loop)
+    {
+      /* Code to control the increment goes in the CONT_BB.  */
+      gsi = gsi_last_bb (cont_bb);
+      stmt = gsi_stmt (gsi);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+      enum tree_code code = PLUS_EXPR;
+      if (POINTER_TYPE_P (type))
+	t = fold_build_pointer_plus (ind_var, build_one_cst (type)); 
+      else
+	t = fold_build2 (code, type, ind_var, build_one_cst (type));
+      expand_omp_build_assign (&gsi, ind_var, t);
+
+      /* Remove GIMPLE_OMP_CONTINUE.  */
+      gsi_remove (&gsi, true);
+    }
+
+  /* Emit the condition in L1_BB.  */
+  gsi = gsi_start_bb (l1_bb);
+
+  tree step = fold_convert (type, fd->loop.step);
+  if ((TREE_CODE (step) == INTEGER_CST && tree_int_cst_sgn (step) < 1)) 
+    step = fold_build1_loc (UNKNOWN_LOCATION, NEGATE_EXPR, type, step);
+  
+  t = build2 (MULT_EXPR, type, ind_var, step);
+  tree tmp = create_tmp_reg (type, NULL);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp, t), GSI_NEW_STMT);
+
+  tree tmp2 = create_tmp_reg (type, NULL);
+  tree cvtd = fold_convert (type, fd->loop.n1);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp2, cvtd), GSI_NEW_STMT);
+  
+  if (fd->loop.cond_code == GE_EXPR || fd->loop.cond_code == GT_EXPR)
+    t = fold_build2 (MINUS_EXPR, type, tmp2, tmp);
+ else
+   t = fold_build2 (PLUS_EXPR, type, tmp2, tmp);
+
+  tmp = create_tmp_reg (type, NULL);
+  gsi_insert_after (&gsi, gimple_build_assign (tmp, t), GSI_NEW_STMT);
+
+  cvtd = fold_convert (TREE_TYPE (fd->loop.v), tmp);
+  gsi_insert_after (&gsi, gimple_build_assign (fd->loop.v, cvtd), 
+		    GSI_NEW_STMT);
+  
+  t = fold_convert (type, n2);
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+				false, GSI_CONTINUE_LINKING);
+  /* The condition is always '<' since the runtime will fill in the low
+     and high values.  */
+  t = build2 (LT_EXPR, boolean_type_node, ind_var, t);
+  stmt = gimple_build_cond_empty (t);
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+  if (walk_tree (gimple_cond_lhs_ptr (stmt), expand_omp_regimplify_p,
+		 NULL, NULL)
+      || walk_tree (gimple_cond_rhs_ptr (stmt), expand_omp_regimplify_p,
+		    NULL, NULL))
+    {
+      gsi = gsi_for_stmt (stmt);
+      gimple_regimplify_operands (stmt, &gsi);
+    }
+
+  /* Remove GIMPLE_OMP_RETURN.  */
+  gsi = gsi_last_bb (exit_bb);
+  gsi_remove (&gsi, true);
+
+  /* Connect the new blocks.  */
+  remove_edge (FALLTHRU_EDGE (entry_bb));
+
+  edge e, ne;
+  if (!broken_loop)
+    {
+      remove_edge (BRANCH_EDGE (entry_bb));
+      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
+
+      e = BRANCH_EDGE (l1_bb);
+      ne = FALLTHRU_EDGE (l1_bb);
+      e->flags = EDGE_TRUE_VALUE;
+    }
+  else
+    {
+      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+      ne = single_succ_edge (l1_bb);
+      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
+
+    }
+  ne->flags = EDGE_FALSE_VALUE;
+  e->probability = REG_BR_PROB_BASE * 7 / 8;
+  ne->probability = REG_BR_PROB_BASE / 8;
+
+  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
+
+  if (!broken_loop)
+    {
+      struct loop *loop = alloc_loop ();
+      loop->header = l1_bb;
+      loop->latch = cont_bb;
+      add_loop (loop, l1_bb->loop_father);
+      loop->safelen = INT_MAX;
+    }
+
+  /* Pick the correct library function based on the precision of the
+     induction variable type.  */
+  tree lib_fun = NULL_TREE;
+  if (TYPE_PRECISION (type) == 32)
+    lib_fun = cilk_for_32_fndecl;
+  else if (TYPE_PRECISION (type) == 64)
+    lib_fun = cilk_for_64_fndecl;
+  else
+    gcc_unreachable ();
+
+  gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
+  
+  /* WS_ARGS contains the library function flavor to call: 
+     __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
+     user-defined grain value.   If the user does not define one, then zero
+     is passed in by the parser.  */
+  vec_alloc (region->ws_args, 2);
+  region->ws_args->quick_push (lib_fun);
+  region->ws_args->quick_push (fd->chunk_size);
+}
 
 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
    loop.  Given parameters:
@@ -6880,6 +7347,8 @@  expand_omp_for (struct omp_region *region, gimple inner_stmt)
 
   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_SIMD)
     expand_omp_simd (region, &fd);
+  else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_CILKFOR)
+    expand_cilk_for (region, &fd);
   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
 	   && !fd.have_ordered)
     {
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
new file mode 100644
index 0000000..8b6112b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk-fors.c
@@ -0,0 +1,87 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+static void check (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start;  ii < end; ii = ii + incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+static void check_reverse (int *Array, int start, int end, int incr, int value)
+{
+  int ii = 0;
+  for (ii = start; ii >= end; ii = ii - incr)
+    if (Array[ii] != value)
+      __builtin_abort ();
+#if HAVE_IO
+  printf ("Passed\n");
+#endif
+}
+
+
+int main (void)
+{
+  int Array[10];
+  int x = 9, y = 0, z = 3;
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array[ii] = 1133;
+  check (Array, 0, 10, 1, 1133);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 9; ii > -1; ii--)
+    Array[ii] = 4433;
+  check_reverse (Array, 9, 0, 1, 4433);
+
+  _Cilk_for (int ii = 9; ii > -1; --ii)
+    Array[ii] = 9988;
+  check_reverse (Array, 9, 0, 1, 9988);
+
+  _Cilk_for (int ii = 0; ii < 10; ++ii)
+    Array[ii] = 3311;
+  check (Array, 0, 10, 1, 3311);
+
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    Array[ii] = 1328;
+  check (Array, 0, 10, 2, 1328);
+
+  _Cilk_for (int ii = 9; ii >= 0; ii -= 2)
+    Array[ii] = 1738;
+  check_reverse (Array, 9, 0, 2, 1738);
+
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    {
+      if (ii % 2)
+	Array[ii] = 1343;
+      else
+	Array[ii] = 3413;
+    }
+
+  check (Array, 1, 10, 2, 1343); 
+  check (Array, 0, 10, 2, 3413); 
+
+  _Cilk_for (short cc = 0; cc < 10; cc++) 
+    Array[cc] = 1343;
+  check (Array, 0, 10,  1,1343);
+
+  _Cilk_for (short cc = 9; cc >= 0; cc--)
+    Array[cc] = 1348;
+  check_reverse (Array, 9, 0, 1, 1348);
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
new file mode 100644
index 0000000..ed73c34
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_errors.c
@@ -0,0 +1,58 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+int main (void)
+{
+  int q = 0, ii = 0, jj = 0;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */
+    /* { dg-error "expected" "" { target c++ } 10 } */
+    q = 5;
+
+  _Cilk_for (; ii < 10; ii++) /* { dg-error "expected iteration declaration" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ; ii++) /* { dg-error "missing controlling predicate" } */
+    q = 2;
+
+  _Cilk_for (int ii = 0; ii < 10, jj < 10; ii++)  /* { dg-error "expected ';' before ',' token" "" { target c } } */
+    /* { dg-error "invalid controlling predicate" "" { target c++ }  20 } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ) /* { dg-error "missing increment" } */
+    q = 5;
+
+
+  _Cilk_for (int ii = 0, jj = 0; ii < 10; ii++) /* { dg-error "expected" } */ 
+    q = 5;
+
+  _Cilk_for (volatile int vii = 0; vii < 10; vii++) /* { dg-error "iteration variable cannot be volatile" } */
+    q = 5;
+
+ 
+  _Cilk_for (static int sii = 0; sii < 10; sii++) /* { dg-error "static" } */
+
+    q = 5;
+
+
+  _Cilk_for (float fii = 3.47; fii < 5.23; fii++) /* { dg-error "invalid type for iteration variable" } */
+    q = 5;
+
+
+  _Cilk_for (int ii = 0; 10 > jj; ii++) /* { dg-error "invalid controlling predicate" } */
+    q = 5;
+
+  _Cilk_for (int ii = 0; ii < 10; ii >> 1) /* { dg-error "invalid increment expression" } */
+    q = 5;
+
+  _Cilk_for (int ii = 10; ii >= 0; ii--) /* This is OK!  */
+    q = 5;
+
+  _Cilk_for (int ii; ii < 10; ii++) /* { dg-error "is not initialized" "" { target c } } */ 
+    /* { dg-error "expected" "" { target c++ }  53 } */
+    q = 5;
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
new file mode 100644
index 0000000..6cb9b03
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain.c
@@ -0,0 +1,35 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+int grain_value = 2;
+int main (void)
+{
+  int Array1[200], Array1_Serial[200];
+
+  for (int ii = 0; ii < 200; ii++)
+    {
+      Array1_Serial[ii] = 2;
+      Array1[ii] = 1;
+    }
+
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 200; ii++)
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+#pragma cilk grainsize = grain_value
+  _Cilk_for (int ii = 0; ii < 200; ii++) 
+    Array1[ii] = 2;
+
+  for (int ii = 0; ii < 200; ii++)
+    if (Array1[ii] != Array1_Serial[ii])
+      return (ii+1);
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
new file mode 100644
index 0000000..e1e3217
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_grain_errors.c
@@ -0,0 +1,48 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus -Wunknown-pragmas" } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+
+char Array1[26];
+
+#pragma cilk grainsize = 2 /* { dg-error "must be inside a function" } */
+
+int main(int argc, char **argv)
+{
+/* This is OK.  */
+#pragma cilk grainsize = 2
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize 2 /* { dg-error "expected '=' before numeric constant" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsiz = 2 /* { dg-warning "ignoring #pragma cilk grainsiz" } */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+
+/* This is OK, it will do a type conversion to long int.  */
+#pragma cilk grainsize = 0.5 
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    Array1[ii] = 0;
+
+#pragma cilk grainsize = 1 
+  while (Array1[5] != 0) /* { dg-warning "is not followed by" } */
+    {
+    /* Blah */
+    }
+
+#pragma cilk grainsize = 1 
+  int q = 0; /* { dg-warning "is not followed by" } */
+  _Cilk_for (q = 0; q < 10; q++)
+    Array1[q]  = 5;
+
+  while (Array1[5] != 0)
+    {
+    /* Blah */
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
new file mode 100644
index 0000000..8cf1b4e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/cilk_for_ptr_iter.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+
+/* <feature> loop control variable must have integer, pointer or class type
+   </feature>
+*/
+
+#define ARRAY_SIZE 10
+int a[ARRAY_SIZE];
+
+int main(void)
+{ 
+  int ii = 0;
+
+#if 1
+  for (ii =0; ii < ARRAY_SIZE; ii++)
+    a[ii] = 5;
+#endif
+  _Cilk_for(int *aa = a; aa < a + ARRAY_SIZE; aa++) 
+    *aa = 0;
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii++) 
+    if (a[ii] != 0) 
+      __builtin_abort ();
+#endif
+
+  _Cilk_for (int *aa = a; aa < a + ARRAY_SIZE; aa = aa + 2)
+    *aa = 4;
+
+#if 1
+  for (ii = 0; ii < ARRAY_SIZE; ii = ii + 2) 
+    if (a[ii] != 4) 
+      __builtin_abort ();
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
new file mode 100644
index 0000000..cffe17e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cilk-plus/CK/nested_cilk_for.c
@@ -0,0 +1,79 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99"  { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#if HAVE_IO
+#include <stdio.h>
+#endif
+
+int main (void)
+{
+  int Array[10][10];
+
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj++)
+	{
+	  Array[ii][jj] = 0;
+	}
+
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 5; jj++)
+      Array[ii][jj] = 5;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 5; jj++)
+      if (Array[ii][jj] != 5)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+
+  /* One goes up and one goes down.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 9; jj >= 0; jj--)
+      Array[ii][jj] = 7;
+
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 9; jj >= 0; jj--)
+      if (Array[ii][jj] != 7)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii++)
+    _Cilk_for (int jj = 0; jj < 10; jj += 2)
+      Array[ii][jj] = 9;
+  
+  for (int ii = 0; ii < 10; ii++)
+    for (int jj = 0; jj < 10; jj += 2)
+      if (Array[ii][jj] != 9)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  /* different step sizes.  */
+  _Cilk_for (int ii = 0; ii < 10; ii += 2)
+    _Cilk_for (int jj = 5; jj < 9; jj++)
+      Array[ii][jj] = 11; 
+  
+  for (int ii = 0; ii < 10; ii += 2)
+    for (int jj = 5; jj < 9; jj++)
+      if (Array[ii][jj] != 11)
+#if HAVE_IO
+	printf("Array[%d][%d] = %d\n", ii, jj, Array[ii][jj]);
+#else
+	__builtin_abort ();
+#endif
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
new file mode 100644
index 0000000..8221371
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/cilk-for-tplt.cc
@@ -0,0 +1,25 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 100
+#define CHECK_VALUE 5
+
+template <class T>
+int func (T start, T end)
+{
+  int Array[SIZE];
+  _Cilk_for (T ii = 0; ii < end; ii++)
+    Array[ii] = CHECK_VALUE;
+  
+  for (T ii = 0; ii < end; ii++)
+    if (Array[ii] != CHECK_VALUE)
+      __builtin_abort ();
+
+  return 0;
+}
+
+int main (void)
+{
+  return func <int> (0, 100) + func <long> (0, 100);
+}
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
new file mode 100644
index 0000000..2ac8c72
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_iter.cc
@@ -0,0 +1,52 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array;
+vector <int> array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back (ii);
+}
+#endif
+_Cilk_for (vector<int>::iterator iter = array.begin(); iter != array.end();
+          iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+for (vector<int>::iterator iter = array_serial.begin(); 
+     iter != array_serial.end(); iter++)
+{
+   if (*iter  == 6) 
+     *iter = 13;
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
new file mode 100644
index 0000000..1cf3301
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_rev_iter.cc
@@ -0,0 +1,72 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <vector>
+#include <cstdio>
+#include <iostream>
+#include <algorithm>
+
+using namespace std;
+
+
+int main(void)
+{
+vector <int> array,array_serial;
+
+#if 1
+for (int ii = -1; ii < 10; ii++)
+{   
+  array.push_back(ii);
+  array_serial.push_back(ii);
+}
+#endif
+_Cilk_for (vector<int>::reverse_iterator iter4 = array.rbegin(); 
+	   iter4 != array.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+
+_Cilk_for (vector<int>::reverse_iterator iter4 = array_serial.rbegin(); 
+	   iter4 != array_serial.rend(); iter4++)
+{
+  if (*iter4 == 0x8) {
+    *iter4 = 9;
+  }
+}
+_Cilk_for (vector<int>::reverse_iterator iter2 = array.rbegin(); 
+	   iter2 != array.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+for (vector<int>::reverse_iterator iter2 = array_serial.rbegin(); 
+     iter2 != array_serial.rend();
+          iter2 += 1) 
+{
+   if ((*iter2 == 0x4) || (*iter2 == 0x7)) {
+    *iter2 = 0x3;
+   }
+}
+sort (array.begin(), array.end());
+sort (array_serial.begin(), array_serial.end());
+
+vector <int>::iterator iter = array.begin ();
+vector <int>::iterator iter_serial = array_serial.begin ();
+while (iter != array.end () && iter_serial != array_serial.end ())
+{
+  if (*iter != *iter_serial)
+    __builtin_abort ();
+  iter++;
+  iter_serial++;
+}
+
+return 0;
+}   
+
+
diff --git a/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
new file mode 100644
index 0000000..8d2e61e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cilk-plus/CK/stl_test.cc
@@ -0,0 +1,50 @@ 
+/* { dg-do run  { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <list>
+
+using namespace std;
+
+
+int main(int argc, char **argv)
+{
+  vector <int> number_list, number_list_serial;
+  int new_number = 0;
+  int no_elements = 0;
+  
+  if (argc != 2)
+  {
+    no_elements = 10;
+  }
+
+
+  number_list.clear();
+  number_list_serial.clear();
+  for (int ii = 0; ii < no_elements; ii++)
+  {
+    number_list.push_back(new_number);
+    number_list_serial.push_back(new_number);
+  }
+
+  _Cilk_for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list[jj] = jj + no_elements;
+  }
+  for (int jj = 0; jj < no_elements; jj++)
+  {
+    number_list_serial[jj] = jj + no_elements;
+  }
+
+  for (int jj = 0; jj < no_elements; jj++)
+    if (number_list_serial[jj] != number_list[jj])
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index e548a0d..d8c14e3 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -351,6 +351,7 @@  enum omp_clause_schedule_kind {
   OMP_CLAUSE_SCHEDULE_GUIDED,
   OMP_CLAUSE_SCHEDULE_AUTO,
   OMP_CLAUSE_SCHEDULE_RUNTIME,
+  OMP_CLAUSE_SCHEDULE_CILKFOR,
   OMP_CLAUSE_SCHEDULE_LAST
 };
 
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 0595499..91efd9f 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -411,6 +411,9 @@  dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
 	case OMP_CLAUSE_SCHEDULE_AUTO:
 	  pp_string (buffer, "auto");
 	  break;
+	case OMP_CLAUSE_SCHEDULE_CILKFOR:
+	  pp_string (buffer, "cilk-for grain");
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -2392,6 +2395,12 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
       pp_string (buffer, "#pragma simd");
       goto dump_omp_loop;
 
+    case CILK_FOR:
+      /* This label points one line after dumping the clauses.  
+	 For _Cilk_for the clauses are dumped after the _Cilk_for (...) 
+	 parameters are printed out.  */
+      goto dump_omp_loop_cilk_for;
+
     case OMP_DISTRIBUTE:
       pp_string (buffer, "#pragma omp distribute");
       goto dump_omp_loop;
@@ -2420,6 +2429,8 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
     dump_omp_loop:
       dump_omp_clauses (buffer, OMP_FOR_CLAUSES (node), spc, flags);
 
+    dump_omp_loop_cilk_for:
+
       if (!(flags & TDF_SLIM))
 	{
 	  int i;
@@ -2440,7 +2451,10 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
 		{
 		  spc += 2;
 		  newline_and_indent (buffer, spc);
-		  pp_string (buffer, "for (");
+		  if (TREE_CODE (node) == CILK_FOR)
+		    pp_string (buffer, "_Cilk_for (");
+		  else 
+		    pp_string (buffer, "for (");
 		  dump_generic_node (buffer,
 				     TREE_VEC_ELT (OMP_FOR_INIT (node), i),
 				     spc, flags, false);
@@ -2454,6 +2468,8 @@  dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
 				     spc, flags, false);
 		  pp_right_paren (buffer);
 		}
+	      if (TREE_CODE (node) == CILK_FOR) 
+		dump_omp_clauses (buffer, OMP_FOR_CLAUSES (node), spc, flags);
 	    }
 	  if (OMP_FOR_BODY (node))
 	    {
diff --git a/gcc/tree.def b/gcc/tree.def
index f8d6444..558d7c8 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1051,6 +1051,10 @@  DEFTREECODE (OMP_SIMD, "omp_simd", tcc_statement, 6)
    Operands like for OMP_FOR.  */
 DEFTREECODE (CILK_SIMD, "cilk_simd", tcc_statement, 6)
 
+/* Cilk Plus - _Cilk_for (..)
+   Operands like for OMP_FOR.  */
+DEFTREECODE (CILK_FOR, "cilk_for", tcc_statement, 6)
+
 /* OpenMP - #pragma omp distribute [clause1 ... clauseN]
    Operands like for OMP_FOR.  */
 DEFTREECODE (OMP_DISTRIBUTE, "omp_distribute", tcc_statement, 6)